cmn_UTF8Converter Class Reference
[Common, basic classes, functions and types]

#include <cmn_utf8.h>

Collaboration diagram for cmn_UTF8Converter:

Collaboration graph
[legend]

List of all members.


Detailed Description

Converts from a specified code page to UTF-8 and back.

Uses cmn_UTF8 as input/output.

Definition at line 41 of file cmn_utf8.h.


Public Member Functions

 cmn_UTF8Converter (const string &a_codeSet)
virtual ~cmn_UTF8Converter ()
string ConvertToUTF8 (const string &a_source)
string ConvertFromUTF8 (const string &a_utf8)

Static Public Member Functions

static const char * InitCodeSet ()

Private Attributes

 log_CLASSID_m
string m_codeset
cmn_Mutex m_converterToUTF8_x
cmn_Mutex m_converterFromUTF8_x
iconv_t m_fromUTFiconvh
iconv_t m_toUTFiconvh

Constructor & Destructor Documentation

cmn_UTF8Converter::cmn_UTF8Converter ( const string &  a_codeSet  ) 

Definition at line 72 of file cmn_utf8.cpp.

References errno, log_ERR_m, log_FUNC_m, m_codeset, m_fromUTFiconvh, m_toUTFiconvh, and UTF8_c().

00073   : m_codeset(a_codeSet) {
00074 
00075   if (a_codeSet.empty()) {
00076       log_FUNC_m(cmn_UTF8Converter);
00077       log_ERR_m(
00078           "Codeset wasn't detected properly. Please check system sesttings.");
00079   }
00080 
00081 #if IVD_POSIX_OS
00082     // Initialization of conversion
00083 
00084     if (m_codeset == UTF8_c) {
00085     return;
00086     }
00087 
00088     m_toUTFiconvh = iconv_open(UTF8_c, a_codeSet.c_str());
00089 
00090     if (m_toUTFiconvh == iconv_t(-1)) {
00091         throw ivd_SysError(
00092             errno, string("iconv_open(): source code set") + a_codeSet );
00093     }
00094 
00095     m_fromUTFiconvh = iconv_open(a_codeSet.c_str(), UTF8_c);
00096 
00097     if (m_fromUTFiconvh == iconv_t(-1)) {
00098         throw ivd_SysError(
00099             errno, string("iconv_open(): target code set") + a_codeSet );
00100     }
00101 #endif
00102 }

Here is the call graph for this function:

cmn_UTF8Converter::~cmn_UTF8Converter (  )  [virtual]

Definition at line 104 of file cmn_utf8.cpp.

References m_codeset, m_fromUTFiconvh, m_toUTFiconvh, and UTF8_c().

00104                                       {
00105 #if IVD_POSIX_OS
00106    if (m_codeset == UTF8_c) {
00107        return;
00108    }
00109    iconv_close(m_fromUTFiconvh);
00110    iconv_close(m_toUTFiconvh);
00111 #endif
00112 }

Here is the call graph for this function:


Member Function Documentation

const char * cmn_UTF8Converter::InitCodeSet (  )  [static]

Definition at line 114 of file cmn_utf8.cpp.

References NULL.

Referenced by cmn_SysInfo::GetSystemData().

00114                                            {
00115 
00116 #if IVD_POSIX_OS
00117     const char* locale = setlocale(LC_CTYPE, "");
00118 
00119     if (locale == NULL) {
00120         return "";
00121     }
00122 
00123     const char* codeset = nl_langinfo(CODESET);
00124 
00125     if (codeset == NULL) {
00126         return "";
00127     }
00128 
00129     return codeset;
00130 
00131 #elif TGT_OS_windows
00132     return "UTF-16";
00133 #else
00134     #error "Unknown/unsupported platform."
00135 #endif
00136 }

Here is the caller graph for this function:

string cmn_UTF8Converter::ConvertToUTF8 ( const string &  a_source  ) 

Definition at line 138 of file cmn_utf8.cpp.

References cmn_HexDump(), cmn_MAX_UTF8_CHAR_SIZE_c, errno, ie_FATAL_ERROR, ie_NYI, log_FUNC_m, log_WRN_m, m_codeset, m_converterToUTF8_x, m_toUTFiconvh, NULL, and UTF8_c().

Referenced by cmn_LocaleToUTF8().

00138                                                               {
00139 
00140  #if IVD_POSIX_OS
00141 
00142     if (m_codeset == UTF8_c) {
00143         return a_source;
00144     }
00145 
00146     string dest((a_source.length()+1)*cmn_MAX_UTF8_CHAR_SIZE_c, '\0');
00147 
00148     char *src_p = const_cast<char*>(a_source.c_str());
00149     char *dest_p = const_cast<char*>(dest.data());
00150     size_t inbytesleft = a_source.length();
00151     size_t outbytesleft = dest.length();
00152 
00153     size_t conversions;
00154     {  // iconv must be under mutex
00155         cmn_MutexLock l(m_converterToUTF8_x);
00156         conversions = iconv(m_toUTFiconvh,
00157             &src_p, &inbytesleft,
00158             &dest_p, &outbytesleft
00159         );
00160 
00161         if (conversions == size_t(-1)) {
00162             log_FUNC_m(ConvertToUTF8);
00163             int convError = errno;
00164             // Resets conversion handle to the initial state so
00165             // that next conversion won't have problems.
00166             iconv(m_toUTFiconvh, NULL, NULL, NULL, NULL);
00167             throw ivd_SysError(
00168                 convError, string("iconv(...): \'") + a_source + "\'");
00169         }
00170     }
00171     if (inbytesleft > 0) {
00172         log_FUNC_m(ConvertToUTF8);
00173         if (outbytesleft == 0) {
00174             throw ivd_InternalError(ie_NYI, "dest is too small.");
00175             // Buffer will be expanded and we retry
00176         }
00177         else {
00178             log_WRN_m(
00179                 "Incorrect input buffer when converting to UTF-8: " <<
00180                 a_source
00181             );
00182         }
00183     };
00184 
00185     UInt32_t tgtSize = dest.length() - outbytesleft;
00186     if (dest.length() >= outbytesleft) {
00187         dest.resize(tgtSize);
00188     }
00189     else {
00190         log_FUNC_m(ConvertToUTF8);
00191         ostringstream sstr;
00192         sstr <<"dest.length() is less than outbytesleft.  tgtSize = " << tgtSize
00193                         << ", dest.length() = " << dest.length()
00194                         << " outbytesleft = " << outbytesleft
00195                         << " source = " << cmn_HexDump(
00196                             a_source.c_str(), a_source.length(), 16, true)
00197                         << " dest str = " << dest;
00198         throw ivd_InternalError(ie_FATAL_ERROR, sstr.str());
00199     }
00200     return dest;
00201 
00202 #else
00203 
00204     // Windows: Dummy conversion (just copy data).
00205 
00206     return a_source;
00207 
00208 #endif
00209 }

Here is the call graph for this function:

Here is the caller graph for this function:

string cmn_UTF8Converter::ConvertFromUTF8 ( const string &  a_utf8  ) 

Definition at line 211 of file cmn_utf8.cpp.

References cmn_HexDump(), errno, ie_FATAL_ERROR, ie_NYI, log_FUNC_m, log_WRN_m, m_codeset, m_converterFromUTF8_x, m_fromUTFiconvh, NULL, and UTF8_c().

Referenced by cmn_UTF8ToLocale().

00211                                                               {
00212 
00213 #if IVD_POSIX_OS
00214 
00215     if (m_codeset == UTF8_c || a_utf8.empty()) {
00216         return a_utf8;
00217     }
00218 
00219     string dest((a_utf8.length()+1), '\0');
00220     char* src_p = const_cast<char*>(a_utf8.c_str());
00221     char* dest_p = const_cast<char*>(dest.data());
00222     size_t inbytesleft = a_utf8.length();
00223     size_t outbytesleft = dest.length();
00224 
00225     size_t conversions;
00226     {  // iconv must be under mutex
00227         cmn_MutexLock l(m_converterFromUTF8_x);
00228         conversions = iconv(m_fromUTFiconvh,
00229             &src_p, &inbytesleft,
00230             &dest_p, &outbytesleft);
00231 
00232         if (conversions == size_t(-1)) {
00233             log_FUNC_m(ConvertFromUTF8);
00234             int convError = errno;
00235             // Resets conversion handle to the initial state so
00236             // that next conversion won't have problems.
00237             iconv(m_fromUTFiconvh, NULL, NULL, NULL, NULL);
00238             throw ivd_SysError(
00239                 convError, string("iconv(...): \'") + a_utf8 + "\'");
00240         }
00241     }
00242 
00243     if (inbytesleft > 0) {
00244         log_FUNC_m(ConvertFromUTF8);
00245         if (outbytesleft == 0) {
00246             throw ivd_InternalError(ie_NYI, "dest is too small.");
00247         }
00248         else {
00249             log_WRN_m(
00250                 "Incorrect input buffer when converting from UTF-8: " <<
00251                 a_utf8
00252             );
00253         }
00254     }
00255 
00256     //
00257     // It is assumed that the data format contains zero-terminated strings.
00258     // Otherwise something is wrong.
00259     //
00260     UInt32_t convsize = dest.length() - outbytesleft;
00261 
00262     // Works properly for zero-terminated strings and prevents
00263     // accessing invalid memory for others.
00264     if (dest.length() >= outbytesleft) {
00265         dest.resize(convsize);
00266     }
00267     else {
00268         log_FUNC_m(ConvertFromUTF8);
00269         ostringstream sstr;
00270         sstr <<"dest.length() is less than outbytesleft.  convsize = " << convsize
00271                         << ", dest.length() = " << dest.length()
00272                         << " outbytesleft = " << outbytesleft
00273                         << " source = " << cmn_HexDump(
00274                             a_utf8.c_str(), a_utf8.length(), 16, true)
00275                         << " dest str = " << dest;
00276         throw ivd_InternalError(ie_FATAL_ERROR, sstr.str());
00277     }
00278 
00279     return dest;
00280 
00281 #else
00282 
00283     // Windows: Dummy conversion; just create a string from the buffer.
00284 
00285     return a_utf8;
00286 #endif
00287 
00288 }

Here is the call graph for this function:

Here is the caller graph for this function:


Member Data Documentation

Definition at line 52 of file cmn_utf8.h.

string cmn_UTF8Converter::m_codeset [private]

Definition at line 55 of file cmn_utf8.h.

Referenced by cmn_UTF8Converter(), ConvertFromUTF8(), ConvertToUTF8(), and ~cmn_UTF8Converter().

Definition at line 58 of file cmn_utf8.h.

Referenced by ConvertToUTF8().

Definition at line 59 of file cmn_utf8.h.

Referenced by ConvertFromUTF8().

Definition at line 60 of file cmn_utf8.h.

Referenced by cmn_UTF8Converter(), ConvertFromUTF8(), and ~cmn_UTF8Converter().

Definition at line 61 of file cmn_utf8.h.

Referenced by cmn_UTF8Converter(), ConvertToUTF8(), and ~cmn_UTF8Converter().


The documentation for this class was generated from the following files:

Generated on Mon Feb 27 19:06:02 2012 for OPENARCHIVE by  doxygen 1.5.6