/* AbiSource Program Utilities * Copyright (C) 1998-2000 AbiSource, Inc. * Copyright (C) 2003 Tomas Frydrych * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include #include "ut_mbtowc.h" #include "ut_locale.h" UT_UCS2_mbtowc::Converter::Converter (const char * from_charset) : m_cd(UT_iconv_open (ucs2Internal (), from_charset)) { // } UT_UCS2_mbtowc::Converter::~Converter () { if (UT_iconv_isValid (m_cd)) UT_iconv_close (m_cd); } void UT_UCS2_mbtowc::Converter::initialize () { UT_iconv_reset (m_cd); } void UT_UCS2_mbtowc::initialize (bool clear) { m_converter->initialize (); if (clear) m_bufLen = 0; } UT_UCS2_mbtowc::UT_UCS2_mbtowc () : m_converter(new Converter(UT_LocaleInfo::system().getEncoding().utf8_str())), m_bufLen(0) { // } UT_UCS2_mbtowc::UT_UCS2_mbtowc (const char * from_charset) : m_converter(new Converter(from_charset)), m_bufLen(0) { // } UT_UCS2_mbtowc::~UT_UCS2_mbtowc () { delete m_converter; } void UT_UCS2_mbtowc::setInCharset (const char * from_charset) { Converter * converter = new Converter(from_charset); if (converter) { delete m_converter; m_converter = converter; } } int UT_UCS2_mbtowc::mbtowc (UT_UCS2Char & wc, char mb) { if(++m_bufLen > iMbLenMax) { initialize (); return 0; } m_buf[m_bufLen-1] = mb; const char * inptr = m_buf; UT_UCS2Char ucs2; char * outptr = reinterpret_cast(&ucs2); size_t inlen = m_bufLen; size_t outlen = sizeof (UT_UCS2Char); const UT_iconv_t cd = m_converter->cd (); size_t len = UT_iconv (const_cast(cd), &inptr, &inlen, &outptr, &outlen); if (len != (size_t)-1) { wc = ucs2; m_bufLen = 0; return 1; } if (errno == EINVAL) { /* reset iconv, pointer might be messed up; need more chars... */ initialize (false); } else { initialize (true); /* wrong seq */ } return 0; } UT_UCS4_mbtowc::Converter::Converter (const char * from_charset) : m_cd(UT_iconv_open (UCS_INTERNAL, from_charset)) { // } UT_UCS4_mbtowc::Converter::~Converter () { if (UT_iconv_isValid (m_cd)) UT_iconv_close (m_cd); } void UT_UCS4_mbtowc::Converter::initialize () { UT_iconv_reset (m_cd); } void UT_UCS4_mbtowc::initialize (bool clear) { m_converter->initialize (); if (clear) m_bufLen = 0; } UT_UCS4_mbtowc::UT_UCS4_mbtowc () : m_converter(new Converter(UT_LocaleInfo::system().getEncoding().utf8_str())), m_bufLen(0) { // } UT_UCS4_mbtowc::UT_UCS4_mbtowc (const char * from_charset) : m_converter(new Converter(from_charset)), m_bufLen(0) { // } UT_UCS4_mbtowc::~UT_UCS4_mbtowc () { delete m_converter; } void UT_UCS4_mbtowc::setInCharset (const char * from_charset) { Converter * converter = new Converter(from_charset); if (converter) { delete m_converter; m_converter = converter; } } int UT_UCS4_mbtowc::mbtowc (UT_UCS4Char & wc, char mb) { if(++m_bufLen > iMbLenMax) { initialize (); return 0; } m_buf[m_bufLen-1] = mb; const char * inptr = m_buf; // need eight bytes in order for the hack below to work (see below) UT_UCS4Char ucs4[2]; char * outptr = reinterpret_cast(&ucs4); size_t inlen = m_bufLen; size_t outlen = sizeof (ucs4[0]); const UT_iconv_t cd = m_converter->cd (); size_t len = UT_iconv (const_cast(cd), &inptr, &inlen, &outptr, &outlen); // This is a nasty hack I would prefer not to have to do // here. Sometimes iconv does not write the translated value out, // and it sits somewhere internatlly until the next call; on the // next call it issues the previous character and the new one sits // somewhere in. I discovered this in the rtf importer when trying // to hanlde cp1255, and it is caused by the fact that some // combinations of characters in 1255 can translate into a // precombined Unicode glyph, and iconv is waiting for the next // character. I flush it out by feeding it a NULL character. // (The output buffer in that case needs to be 8 bytes, so that the // iconv can output both the cached charcter and the NULL, otherwise // it will complain.) // Tomas, May 2, 2003 if(len == 0 && outlen == sizeof(ucs4[0])) { char c = 0; inptr = &c; inlen = 1; outlen = sizeof (ucs4); len = UT_iconv (const_cast(cd), &inptr, &inlen, &outptr, &outlen); } if (len != (size_t)-1) { wc = ucs4[0]; m_bufLen = 0; return 1; } if (errno == EINVAL) { /* reset iconv, pointer might be messed up; need more chars... */ initialize (false); } else { initialize (true); /* wrong seq */ } return 0; }