/* AbiSource Program Utilities * Copyright (C) 1998 AbiSource, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <ctype.h> #include "ut_types.h" #include "ut_misc.h" #include "ut_assert.h" #include "ut_string.h" #include "ut_debugmsg.h" #include "ut_growbuf.h" #ifdef BIDI_ENABLED #include "fribidi.h" #endif /* If WITHOUT_MB is defined, UT_Mbtowc and UT_Wctomb won't be used. I don't there there could be reason for defining WITHOUT_MB, since UT_Mbtowc and UT_Wctomb use iconv internally, which works fine everywhere. */ #ifndef WITHOUT_MB #include "ut_mbtowc.h" #include "ut_wctomb.h" #endif #include "xap_EncodingManager.h" #define UT_STRING_CPP #include "ut_case.h" #undef UT_STRING_CPP // really simple way to determine if something is a hyperlink // probably not 100% correct, but better than !stricmp(http://), ... bool UT_isUrl ( const char * szName ) { if (!szName) return false; int len = strlen ( szName ); int mailto_len = strlen ( "mailto:" ); if ( NULL != strstr ( szName, "://") ) // dumb check, but probably true return true; else if ( ( len >= mailto_len ) && !UT_XML_strnicmp ( "mailto:", szName, mailto_len ) ) return true; else return false; } /* * This is cut & pasted from glib 1.3 (c) RedHat * We need this for to convert UTF16 to UTF8 */ int unichar_to_utf8 (int c, unsigned char *outbuf) { size_t len = 0; int first; int i; if (c < 0x80) { first = 0; len = 1; } else if (c < 0x800) { first = 0xc0; len = 2; } else if (c < 0x10000) { first = 0xe0; len = 3; } else if (c < 0x200000) { first = 0xf0; len = 4; } else if (c < 0x4000000) { first = 0xf8; len = 5; } else { first = 0xfc; len = 6; } if (outbuf) { for (i = len - 1; i > 0; --i) { outbuf[i] = (c & 0x3f) | 0x80; c >>= 6; } outbuf[0] = c | first; } return len; } ////////////////////////////////////////////////////////////////// // char * UT_catPathname(const char * szPath, const char * szFile); // is defined in platform-specific code. ////////////////////////////////////////////////////////////////// char * UT_strdup(const char * szSource) { UT_ASSERT(szSource); int len = strlen(szSource)+1; if(char * ret = (char *)UT_calloc(len, sizeof(char))) return((char *)memcpy(ret, szSource, len)); else return(NULL); } UT_sint32 UT_stricmp(const char * s1, const char * s2) { UT_ASSERT(s1); UT_ASSERT(s2); // Lifted from glibc. Looks better (in a constant-factor sort of way) // than what we had before. Ideally this should be per-platform. const unsigned char *p1 = (const unsigned char *) s1; const unsigned char *p2 = (const unsigned char *) s2; unsigned char c1, c2; if (s1 == s2) return 0; do { c1 = tolower (*p1++); c2 = tolower (*p2++); if (c1 == '\0') break; } while (c1 == c2); return c1 - c2; } // should really be a size_t, but that might break compilation on weird // platforms. I don't know. UT_sint32 UT_strnicmp(const char *s1, const char *s2, int n) { UT_ASSERT(s1); UT_ASSERT(s2); const unsigned char *p1 = (const unsigned char *) s1; const unsigned char *p2 = (const unsigned char *) s2; unsigned char c1, c2; if (p1 == p2 || n == 0) return 0; do { c1 = tolower (*p1++); c2 = tolower (*p2++); if (c1 == '\0' || c1 != c2) return c1 - c2; } while (--n > 0); return c1 - c2; } bool UT_cloneString(char *& rszDest, const char * szSource) { if (szSource && *szSource) { UT_uint32 length = strlen(szSource) + 1; rszDest = (char *)UT_calloc(length,sizeof(char)); if (!rszDest) return false; memmove(rszDest,szSource,length*sizeof(char)); } else rszDest = NULL; return true; } bool UT_replaceString(char *& rszDest, const char * szSource) { if (rszDest) free(rszDest); rszDest = NULL; return UT_cloneString(rszDest,szSource); } UT_uint32 UT_XML_strlen(const XML_Char * sz) { if (!sz || !*sz) return 0; UT_uint32 k = 0; while (sz[k]) k++; return k; } // Is this function implemented somewhere else? bool UT_XML_cloneList(XML_Char **& rszDest, const XML_Char ** szSource) { if (!szSource) return true; XML_Char ** newmemory = (XML_Char **) UT_calloc(UT_pointerArrayLength((void **) szSource) + 1, sizeof(XML_Char *)); if (newmemory == NULL) return false; memcpy((void *) newmemory, (const void *) szSource, UT_pointerArrayLength((void **) szSource ) * sizeof(XML_Char *)); rszDest = newmemory; return true; } bool UT_XML_replaceList(XML_Char **& rszDest, const XML_Char ** szSource) { FREEP(rszDest); return UT_XML_cloneList(rszDest, szSource); } bool UT_XML_cloneString(XML_Char *& rszDest, const XML_Char * szSource) { UT_uint32 length = UT_XML_strlen(szSource) + 1; rszDest = (XML_Char *)UT_calloc(length,sizeof(XML_Char)); if (!rszDest) return false; memmove(rszDest,szSource,length*sizeof(XML_Char)); return true; } UT_sint32 UT_XML_stricmp(const XML_Char * sz1, const XML_Char * sz2) { UT_ASSERT(sizeof(char) == sizeof(XML_Char)); return UT_stricmp((const char*)sz1,(const char*)sz2); } UT_sint32 UT_XML_strnicmp(const XML_Char * sz1, const XML_Char * sz2, const UT_uint32 n) { UT_ASSERT(sizeof(char) == sizeof(XML_Char)); return UT_strnicmp((const char*)sz1,(const char*)sz2,n); } UT_sint32 UT_XML_strcmp(const XML_Char * sz1, const XML_Char * sz2) { UT_ASSERT(sizeof(char) == sizeof(XML_Char)); return strcmp((const char*)sz1,(const char*)sz2); } bool UT_XML_cloneNoAmpersands(XML_Char *& rszDest, const XML_Char * szSource) { if (szSource == NULL) return false; UT_uint32 length = UT_XML_strlen(szSource) + 1; rszDest = (XML_Char *) UT_calloc(length, sizeof(XML_Char)); if (!rszDest) return false; const XML_Char * o = szSource; XML_Char * n = rszDest; while (*o != 0) { if (*o != '&') { *n = *o; n++; } o++; } return true; } /* This uses the clone no ampersands but dumps into a static buffer */ XML_Char *UT_XML_transNoAmpersands(const XML_Char * szSource) { static XML_Char *rszDestBuffer = NULL; static UT_uint32 iDestBufferLength = 0; if (szSource == NULL) return NULL; UT_uint32 length = UT_XML_strlen(szSource) + 1; if (length > iDestBufferLength) { if (rszDestBuffer && iDestBufferLength) { free(rszDestBuffer); } iDestBufferLength = 0; rszDestBuffer = (XML_Char *) UT_calloc(length, sizeof(XML_Char)); if (!rszDestBuffer) return NULL; iDestBufferLength = length; } memset(rszDestBuffer, 0, iDestBufferLength); const XML_Char * o = szSource; XML_Char * n = rszDestBuffer; while (*o != 0) { if (*o != '&') { *n = *o; n++; } o++; } return rszDestBuffer; } // TODO : put a better strncpy here; resolve to platform version if available UT_uint32 UT_XML_strncpy(XML_Char * szDest, UT_uint32 nLen, const XML_Char * szSource) { if (!szSource) return 0; UT_ASSERT(szDest); UT_uint32 i = 0; while (i < nLen) { szDest[i] = szSource[i]; // if we just wrote NULL, return if (szDest[i] == 0) return i; i++; } return i; } UT_uint32 UT_pointerArrayLength(void ** array) { if (! (array && *array)) return 0; UT_uint32 i = 0; while (array[i]) i++; return i; } /* The following code is from the GNU C library, version 2.0.6. It has been reformatted and tweaked to do Unicode strstrs. All this licensing stuff is kinda ugly, but I didn't want to risk merging the licensing for fear I might break some law. */ /* Copyright (C) 1994, 1996 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* * My personal strstr() implementation that beats most other algorithms. * Until someone tells me otherwise, I assume that this is the * fastest implementation of strstr() in C. * I deliberately chose not to comment it. You should have at least * as much fun trying to understand it, as I had to write it :-). * * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */ typedef UT_UCSChar chartype; UT_UCSChar * UT_UCS_strstr(const UT_UCSChar * phaystack, const UT_UCSChar * pneedle) { register const UT_UCSChar *haystack, *needle; register chartype b, c; haystack = (const UT_UCSChar *) phaystack; needle = (const UT_UCSChar *) pneedle; b = *needle; if (b != '\0') { haystack--; /* possible ANSI violation */ do { c = *++haystack; if (c == '\0') goto ret0; } while (c != b); c = *++needle; if (c == '\0') goto foundneedle; ++needle; goto jin; for (;;) { register chartype a; register const UT_UCSChar *rhaystack, *rneedle; do { a = *++haystack; if (a == '\0') goto ret0; if (a == b) break; a = *++haystack; if (a == '\0') goto ret0; shloop: ; // need a statement here for EGCS 1.1.1 to accept it } while (a != b); jin: a = *++haystack; if (a == '\0') goto ret0; if (a != c) goto shloop; rhaystack = haystack-- + 1; rneedle = needle; a = *rneedle; if (*rhaystack == a) do { if (a == '\0') goto foundneedle; ++rhaystack; a = *++needle; if (*rhaystack != a) break; if (a == '\0') goto foundneedle; ++rhaystack; a = *++needle; } while (*rhaystack == a); needle = rneedle; /* took the register-poor approach */ if (a == '\0') break; } } foundneedle: return (UT_UCSChar *) haystack; ret0: return 0; } UT_sint32 UT_UCS_strcmp(const UT_UCSChar* left, const UT_UCSChar* right) { UT_ASSERT(left); UT_ASSERT(right); while (*left && *right) { if (*left < *right) { return -1; } if (*left > *right) { return 1; } left++; right++; } if (*left) { return -1; } else if (*right) { return 1; } else { return 0; } } /* Latin-1 Unicode case-insensitive string comparison and casing done by Pierre Sarrazin <ps@cam.org>. */ /** * Convert a given character to uppercase */ UT_UCSChar UT_UCS_toupper(UT_UCSChar c) { if (c < 128) // in ASCII range return toupper(c); #if 0 if (c >= 256) return c; #else if (XAP_EncodingManager::get_instance()->single_case()) return c; /*let's trust libc! -- does not seem to work :(*/ #if 0 UT_UCSChar local = XAP_EncodingManager::get_instance()->try_UToNative(c); if (!local || local>0xff) return c; local = XAP_EncodingManager::get_instance()->try_nativeToU(toupper(local)); return local ? local : c; #else case_entry * letter = (case_entry *)bsearch(&c, &case_table, NrElements(case_table),sizeof(case_entry),s_cmp_case); if(!letter || letter->type == 1) return c; return letter->other; #endif #endif } /* Converts the given character to lowercase if it is an uppercase letter. Returns it unchanged if it is not. This function created by Pierre Sarrazin 1999-02-06 */ UT_UCSChar UT_UCS_tolower(UT_UCSChar c) { if (c < 128) return tolower(c); #if 0 if (c >= 256) return c; /* Unicode but not Latin-1 - don't know what to do */ if (c >= 0xC0 && c <= 0xDE && c != 0xD7) /* uppercase Latin-1 chars */ return c + 0x20; return c; #else if (XAP_EncodingManager::get_instance()->single_case()) return c; /*let's trust libc!*/ #if 0 UT_UCSChar local = XAP_EncodingManager::get_instance()->try_UToNative(c); if (!local || local>0xff) return c; local = XAP_EncodingManager::get_instance()->try_nativeToU(tolower(local)); return local ? local : c; #else case_entry * letter = (case_entry *)bsearch(&c, &case_table, NrElements(case_table),sizeof(case_entry),s_cmp_case); if(!letter || letter->type == 0) return c; return letter->other; #endif #endif } /* Characters are converted to lowercase (if applicable) when they are read from the needle or the haystack. See UT_UCS_tolower(). This function created by Pierre Sarrazin 1999-02-06 */ UT_UCSChar * UT_UCS_stristr(const UT_UCSChar * phaystack, const UT_UCSChar * pneedle) { register const UT_UCSChar *haystack, *needle; register chartype b, c; haystack = (const UT_UCSChar *) phaystack; needle = (const UT_UCSChar *) pneedle; b = UT_UCS_tolower(*needle); if (b != '\0') { haystack--; /* possible ANSI violation */ do { c = UT_UCS_tolower(*++haystack); if (c == '\0') goto ret0; } while (c != b); c = UT_UCS_tolower(*++needle); if (c == '\0') goto foundneedle; ++needle; goto jin; for (;;) { register chartype a; register const UT_UCSChar *rhaystack, *rneedle; do { a = UT_UCS_tolower(*++haystack); if (a == '\0') goto ret0; if (a == b) break; a = UT_UCS_tolower(*++haystack); if (a == '\0') goto ret0; shloop: ; // need a statement here for EGCS 1.1.1 to accept it } while (a != b); jin: a = UT_UCS_tolower(*++haystack); if (a == '\0') goto ret0; if (a != c) goto shloop; rhaystack = haystack-- + 1; rneedle = needle; a = UT_UCS_tolower(*rneedle); if (UT_UCS_tolower(*rhaystack) == a) do { if (a == '\0') goto foundneedle; ++rhaystack; a = UT_UCS_tolower(*++needle); if (UT_UCS_tolower(*rhaystack) != a) break; if (a == '\0') goto foundneedle; ++rhaystack; a = UT_UCS_tolower(*++needle); } while (UT_UCS_tolower(*rhaystack) == a); needle = rneedle; /* took the register-poor approach */ if (a == '\0') break; } } foundneedle: return (UT_UCSChar *) haystack; ret0: return 0; } /****************************************************************************/ UT_uint32 UT_UCS_strlen(const UT_UCSChar * string) { UT_uint32 i; for(i = 0; *string != 0; string++, i++) ; return i; } UT_UCSChar * UT_UCS_strcpy(UT_UCSChar * dest, const UT_UCSChar * src) { UT_ASSERT(dest); UT_ASSERT(src); UT_UCSChar * d = dest; UT_UCSChar * s = (UT_UCSChar *) src; while (*s != 0) *d++ = *s++; *d = 0; return dest; } // TODO shouldn't all of the 'char *' strings be 'unsigned char *' strings ?? UT_UCSChar * UT_UCS_strcpy_char(UT_UCSChar * dest, const char * src) { UT_ASSERT(dest); UT_ASSERT(src); UT_UCSChar * d = dest; unsigned char * s = (unsigned char *) src; #ifndef WITHOUT_MB static UT_Mbtowc m; wchar_t wc; #endif while (*s != 0) { #ifdef WITHOUT_MB *d++ = *s++; #else if(m.mbtowc(wc,*s))*d++=wc; s++; #endif } *d = 0; return dest; } char * UT_UCS_strcpy_to_char(char * dest, const UT_UCSChar * src) { UT_ASSERT(dest); UT_ASSERT(src); char * d = dest; UT_UCSChar * s = (UT_UCSChar *) src; #ifndef WITHOUT_MB UT_Wctomb w; #endif while (*s != 0) { #ifdef WITHOUT_MB *d++ = *s++; #else int length; w.wctomb_or_fallback(d,length,*s++); d+=length; #endif } *d = 0; return dest; } bool UT_UCS_cloneString(UT_UCSChar ** dest, const UT_UCSChar * src) { UT_uint32 length = UT_UCS_strlen(src) + 1; *dest = (UT_UCSChar *)UT_calloc(length,sizeof(UT_UCSChar)); if (!*dest) return false; memmove(*dest,src,length*sizeof(UT_UCSChar)); return true; } bool UT_UCS_cloneString_char(UT_UCSChar ** dest, const char * src) { #ifdef WITHOUT_MB UT_uint32 length = strlen(src) + 1; *dest = (UT_UCSChar *)UT_calloc(length,sizeof(UT_UCSChar)); if (!*dest) return false; UT_UCS_strcpy_char(*dest, src); return true; #else UT_uint32 length = MB_LEN_MAX*strlen(src) + 1; *dest = (UT_UCSChar *)UT_calloc(length,sizeof(UT_UCSChar)); if (!*dest) return false; UT_UCSChar * d= *dest; unsigned char * s = (unsigned char *) src; UT_Mbtowc m; wchar_t wc; while (*s != 0) { if(m.mbtowc(wc,*s))*d++=wc; s++; } *d = 0; return true; #endif } // convert each character in a string to ASCII uppercase char * UT_upperString(char * string) { if (!string) return 0; for (char * ch = string; *ch != 0; ch++) *ch = toupper(*ch); return string; } // convert each character in a string to ASCII lowercase char * UT_lowerString(char * string) { if (!string) return 0; for (char * ch = string; *ch != 0; ch++) *ch = tolower(*ch); return string; } /////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////// UT_UCSChar UT_decodeUTF8char(const XML_Char * p, UT_uint32 len) { UT_UCSChar ucs, ucs1, ucs2, ucs3; switch (len) { case 2: ucs1 = (UT_UCSChar)(p[0] & 0x1f); ucs2 = (UT_UCSChar)(p[1] & 0x3f); ucs = (ucs1 << 6) | ucs2; return ucs; case 3: ucs1 = (UT_UCSChar)(p[0] & 0x0f); ucs2 = (UT_UCSChar)(p[1] & 0x3f); ucs3 = (UT_UCSChar)(p[2] & 0x3f); ucs = (ucs1 << 12) | (ucs2 << 6) | ucs3; return ucs; default: UT_ASSERT(UT_SHOULD_NOT_HAPPEN); return 0; } } void UT_decodeUTF8string(const XML_Char * pString, UT_uint32 len, UT_GrowBuf * pResult) { // decode the given string [ p[0]...p[len] ] and append to the given growbuf. UT_ASSERT(sizeof(XML_Char) == sizeof(UT_Byte)); UT_Byte * p = (UT_Byte *)pString; // XML_Char is signed... int bytesInSequence = 0; int bytesExpectedInSequence = 0; XML_Char buf[5]; for (UT_uint32 k=0; k<len; k++) { if (p[k] < 0x80) // plain us-ascii part of latin-1 { UT_ASSERT(bytesInSequence == 0); UT_UCSChar c = p[k]; pResult->append(&c,1); } else if ((p[k] & 0xf0) == 0xf0) // lead byte in 4-byte surrogate pair { // surrogate pairs are defined in section 3.7 of the // unicode standard version 2.0 as an extension // mechanism for rare characters in future extensions // of the unicode standard. UT_ASSERT(bytesInSequence == 0); UT_ASSERT(UT_NOT_IMPLEMENTED); } else if ((p[k] & 0xe0) == 0xe0) // lead byte in 3-byte sequence { UT_ASSERT(bytesInSequence == 0); bytesExpectedInSequence = 3; buf[bytesInSequence++] = p[k]; } else if ((p[k] & 0xc0) == 0xc0) // lead byte in 2-byte sequence { UT_ASSERT(bytesInSequence == 0); bytesExpectedInSequence = 2; buf[bytesInSequence++] = p[k]; } else if ((p[k] & 0x80) == 0x80) // trailing byte in multi-byte sequence { UT_ASSERT(bytesInSequence > 0); buf[bytesInSequence++] = p[k]; if (bytesInSequence == bytesExpectedInSequence) // final byte in multi-byte sequence { UT_UCSChar c = UT_decodeUTF8char(buf,bytesInSequence); pResult->append(&c,1); bytesInSequence = 0; bytesExpectedInSequence = 0; } } } } #if 1 // i didn't get a chance to test this -- jeff XML_Char* UT_encodeUTF8char(UT_UCSChar cIn) { // convert the given unicode character into a UTF8 sequence // return pointer to static buffer. static XML_Char sBuf[10]; memset(sBuf,0,sizeof(sBuf)); if (cIn < 0x0080) { sBuf[0] = (XML_Char)cIn; } else if (cIn < 0x0800) { // 110xxxxx 10xxxxxx sBuf[0] = 0x00c0 | ((cIn >> 6) & 0x001f); sBuf[1] = 0x0080 | ( cIn & 0x003f); } else { // 1110xxxx 10xxxxxx 10xxxxxx sBuf[0] = 0x00e0 | ((cIn >> 12) & 0x000f); sBuf[1] = 0x0080 | ((cIn >> 6) & 0x003f); sBuf[2] = 0x0080 | ( cIn & 0x003f); } return sBuf; } #endif // --jeff bool UT_isSmartQuotableCharacter(UT_UCSChar c) { // TODO: this is anglo-centric; really need a locale argument or // TODO: something to get smart quote rules for the rest of the world bool result; switch (c) { case '"': case '`': case '\'': result = true; break; default: result = false; break; } return (result); } bool UT_isSmartQuotedCharacter(UT_UCSChar c) { // TODO: this is anglo-centric; really need a locale argument or // TODO: something to get smart quote rules for the rest of the world bool result; switch (c) { case UCS_LQUOTE: case UCS_RQUOTE: case UCS_LDBLQUOTE: case UCS_RDBLQUOTE: result = true; break; default: result = false; break; } return (result); } bool UT_UCS_isupper(UT_UCSChar c) { #if 0 if (XAP_EncodingManager::get_instance()->single_case()) return 1;/* FIXME: anyone has better idea? */ UT_UCSChar local = XAP_EncodingManager::get_instance()->try_UToNative(c); return local && local <0xff ? isupper(local)!=0 : 0; #else if(c < 127) return isupper(c)!=0; case_entry * letter = (case_entry *)bsearch(&c, &case_table, NrElements(case_table),sizeof(case_entry),s_cmp_case); if(letter && letter->type == 1) return true; return false; #endif }; bool UT_UCS_islower(UT_UCSChar c) { #if 0 if (XAP_EncodingManager::get_instance()->single_case()) return 1;/* FIXME: anyone has better idea? */ UT_UCSChar local = XAP_EncodingManager::get_instance()->try_UToNative(c); return local && local <0xff ? islower(local)!=0 : 0; #else if(c < 127) return islower(c)!=0; case_entry * letter = (case_entry *)bsearch(&c, &case_table, NrElements(case_table),sizeof(case_entry),s_cmp_case); if(!letter || letter->type == 0) return true; return false; #endif }; bool UT_UCS_isspace(UT_UCSChar c) { // the whitespace table is small, so use linear search for (UT_uint32 i = 0; i < NrElements(whitespace_table); i++) { if(whitespace_table[i].high < c) continue; if(whitespace_table[i].low <= c) return true; // if we got here, then low > c return false; } return false; }; /* TODO: proper Unicode implementation required This function is not working -- it assumes that c can be translated to the native encoding, which does not have to be always true. We will treat all non-translatable characters as alpha characters, because more often than not this will be true, but we need a proper Unicode implementation here */ bool UT_UCS_isalpha(UT_UCSChar c) { #ifdef BIDI_ENABLED FriBidiCharType type = fribidi_get_type(c); return FRIBIDI_IS_LETTER(type); #else UT_UCSChar local = XAP_EncodingManager::get_instance()->try_UToNative(c); if(!local) return true; xxx_UT_DEBUGMSG(("UT_UCS_isalpha: c 0x%x, local 0x%x\n",c, loacal)); return local && local < 0xff ? isalpha(local)!=0 : local > 0xff /* we consider it alpha if it's > 0xff */; #endif }; bool UT_UCS_isSentenceSeparator(UT_UCSChar c) { switch(c) { case '.': return true; default: return false; } } /* this one prints floating point value but using dot as fractional serparator independent of the current locale's settings. */ const char* std_size_string(float f) { static char string[10]; int i=(int)f; if(f-i<0.1) sprintf(string, "%d", i); else { int fr = int(10*(f-i)); sprintf(string,"%d.%d", i,fr); }; return string; }; #ifdef BIDI_ENABLED /* copies exactly n-chars from src to dest; NB! does not check for 00 i src */ UT_UCSChar * UT_UCS_strncpy(UT_UCSChar * dest, const UT_UCSChar * src, UT_uint32 n) { UT_ASSERT(dest); UT_ASSERT(src); UT_UCSChar * d = dest; UT_UCSChar * s = (UT_UCSChar *) src; for (; d < (UT_UCSChar *)dest + n;) *d++ = *s++; *d = NULL; return dest; } /* reverses str of len n; used by BiDi which always knows the len of string to process thus we can save ourselves searching for the 00 */ UT_UCSChar * UT_UCS_strnrev(UT_UCSChar * src, UT_uint32 n) { UT_UCSChar t; UT_uint32 i; for(i = 0; i < n/2; i++) { t = *(src + i); *(src + i) = *(src + n - i - 1); //-1 so that we do not move the 00 *(src + n - i - 1) = t; } return src; } #endif