/* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */ /* AbiWord * Copyright (C) 2003 Francis James Franklin * Copyright (C) 2003 AbiSource, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include #include "ut_exception.h" #include "ut_IntStrMap.h" static bool key_lt (const char * key, UT_uint32 key_length, const UT_UTF8String & key2); static bool key_gt (const char * key, UT_uint32 key_length, const UT_UTF8String & key2); static bool key_eq (const char * key, UT_uint32 key_length, const UT_UTF8String & key2); static void s_pass_whitespace (const char *& csstr); static const char * s_pass_name (const char *& csstr, char end); static const char * s_pass_value (const char *& csstr); static const char * s_pass_string (const char *& csstr); UT_IntStrMap::UT_IntStrMap () : m_pair(0), m_pair_count(0), m_pair_max(0), m_index(0), m_increment(8) { // } UT_IntStrMap::UT_IntStrMap (UT_uint32 increment) : m_pair(0), m_pair_count(0), m_pair_max(0), m_index(0), m_increment(increment) { if (m_increment == 0) m_increment = 8; } UT_IntStrMap::~UT_IntStrMap () { clear (); if (m_pair) free (m_pair); } void UT_IntStrMap::clear () { for (UT_uint32 i = 0; i < m_pair_count; i++) delete m_pair[i].value; m_pair_count = 0; } /* responsibility for value passes here */ bool UT_IntStrMap::ins (UT_sint32 key, UT_UTF8String * value) { if (value == 0) return false; UT_uint32 index; if (lookup (key, index)) { delete m_pair[index].value; m_pair[index].value = value; return true; } if (!grow ()) return false; if (index < m_pair_count) { memmove (m_pair + index + 1, m_pair + index, (m_pair_count - index) * sizeof (IntStr)); } ++m_pair_count; m_pair[index].key = key; m_pair[index].value = value; return true; } bool UT_IntStrMap::ins (UT_sint32 key, const char * value) { if (value == 0) return false; UT_uint32 index; if (lookup (key, index)) { *(m_pair[index].value) = value; return true; } if (!grow ()) return false; UT_UTF8String * utf8str = 0; UT_TRY { utf8str = new UT_UTF8String(value); } UT_CATCH(...) { utf8str = 0; } if (utf8str == 0) return false; return ins (key, utf8str); } /* value is deleted */ bool UT_IntStrMap::del (UT_sint32 key) { UT_uint32 index; if (!lookup (key, index)) return false; delete m_pair[index].value; if (index < --m_pair_count) { memmove (m_pair + index, m_pair + index + 1, (m_pair_count - index) * sizeof (IntStr)); } return true; } /* value is passed back */ bool UT_IntStrMap::del (UT_sint32 key, UT_UTF8String *& value) { UT_uint32 index; if (!lookup (key, index)) return false; value = m_pair[index].value; if (index < --m_pair_count) { memmove (m_pair + index, m_pair + index + 1, (m_pair_count - index) * sizeof (IntStr)); } return true; } bool UT_IntStrMap::lookup (UT_sint32 key, UT_uint32 & index) { if (m_index < m_pair_count) if (key == m_pair[m_index].key) { index = m_index; return true; } if (m_pair_count == 0) { index = 0; return false; } if (key <= m_pair[0].key) { index = 0; return (key == m_pair[0].key); } if (key > m_pair[m_pair_count-1].key) { index = m_pair_count; return false; } if (m_index < m_pair_count) { if ((key < m_pair[m_index].key) && m_index) if (key > m_pair[m_index-1].key) { index = m_index; return false; } } else { m_index = m_pair_count / 2; if (key == m_pair[m_index].key) { index = m_index; return true; } } bool bFound = false; UT_uint32 min_index = 0; UT_uint32 max_index = m_pair_count; if (key > m_pair[m_index].key) min_index = m_index; else max_index = m_index; while (true) { if (max_index - min_index == 1) { m_index = max_index; break; } m_index = min_index + (max_index - min_index) / 2; if (key == m_pair[m_index].key) { bFound = true; break; } if (key > m_pair[m_index].key) min_index = m_index; else max_index = m_index; } index = m_index; return bFound; } bool UT_IntStrMap::grow () { if (m_pair_count < m_pair_max) return true; if (m_pair == 0) { m_pair = (IntStr *) malloc (m_increment * sizeof (IntStr)); if (m_pair == 0) return false; m_pair_max = m_increment; } else { IntStr * more = (IntStr *) realloc (m_pair, (m_pair_max + m_increment) * sizeof (IntStr)); if (more == 0) return false; m_pair = more; m_pair_max += m_increment; } return true; } UT_NumberMap::UT_NumberMap (UT_sint32 default_value, UT_uint32 increment) : m_pair(0), m_pair_count(0), m_pair_max(0), m_index(0), m_increment(increment), m_default_value(default_value) { // } UT_NumberMap::~UT_NumberMap () { clear (); if (m_pair) free (m_pair); } void UT_NumberMap::clear () { for (UT_uint32 i = 0; i < m_pair_count; i++) delete m_pair[i].key; m_pair_count = 0; } bool UT_NumberMap::ins (const UT_UTF8String & key, UT_sint32 value) { UT_uint32 index; if (lookup (key, index)) { m_pair[index].value = value; return true; } if (!grow ()) return false; UT_UTF8String * utf8str = 0; UT_TRY { utf8str = new UT_UTF8String(key); } UT_CATCH(...) { utf8str = 0; } if (utf8str == 0) return false; if (index < m_pair_count) { memmove (m_pair + index + 1, m_pair + index, (m_pair_count - index) * sizeof (NumberStr)); } ++m_pair_count; m_pair[index].key = utf8str; m_pair[index].value = value; return true; } /* returns false if no such key-value */ bool UT_NumberMap::del (const char * key) { UT_uint32 index; if (!lookup (key, index)) return false; delete m_pair[index].key; if (index < --m_pair_count) { memmove (m_pair + index, m_pair + index + 1, (m_pair_count - index) * sizeof (NumberStr)); } return true; } bool UT_NumberMap::lookup (const char * key, UT_uint32 & index) { return key ? lookup (key, strlen (key), index) : false; } bool UT_NumberMap::lookup (const UT_UTF8String & key, UT_uint32 & index) { return lookup (key.utf8_str (), key.byteLength (), index); } bool UT_NumberMap::lookup (const char * key, UT_uint32 key_length, UT_uint32 & index) { if (m_index < m_pair_count) if (key_eq (key, key_length, *m_pair[m_index].key)) { index = m_index; return true; } if (m_pair_count == 0) { index = 0; return false; } if (!key_gt (key, key_length, *m_pair[0].key)) { index = 0; return key_eq (key, key_length, *m_pair[0].key); } if (key_gt (key, key_length, *m_pair[m_pair_count-1].key)) { index = m_pair_count; return false; } if (m_index < m_pair_count) { if (key_lt (key, key_length, *m_pair[m_index].key) && m_index) if (key_gt (key, key_length, *m_pair[m_index-1].key)) { index = m_index; return false; } } else { m_index = m_pair_count / 2; if (key_eq (key, key_length, *m_pair[m_index].key)) { index = m_index; return true; } } bool bFound = false; UT_uint32 min_index = 0; UT_uint32 max_index = m_pair_count; if (key_gt (key, key_length, *m_pair[m_index].key)) min_index = m_index; else max_index = m_index; while (true) { if (max_index - min_index == 1) { m_index = max_index; break; } m_index = min_index + (max_index - min_index) / 2; if (key_eq (key, key_length, *m_pair[m_index].key)) { bFound = true; break; } if (key_gt (key, key_length, *m_pair[m_index].key)) min_index = m_index; else max_index = m_index; } index = m_index; return bFound; } bool UT_NumberMap::grow () { if (m_pair_count < m_pair_max) return true; if (m_pair == 0) { m_pair = (NumberStr *) malloc (m_increment * sizeof (NumberStr)); if (m_pair == 0) return false; m_pair_max = m_increment; } else { NumberStr * more = (NumberStr *) realloc (m_pair, (m_pair_max + m_increment) * sizeof (NumberStr)); if (more == 0) return false; m_pair = more; m_pair_max += m_increment; } return true; } UT_GenericUTF8Hash::KeyValue::KeyValue (const UT_UTF8String & key) : m_key(key), m_value(0) { // } UT_GenericUTF8Hash::KeyValue::~KeyValue () { if (m_value) delete m_value; } /* responsibility for value passes here */ void UT_GenericUTF8Hash::KeyValue::setValue (UT_GenericBase * value) { if (m_value) delete m_value; m_value = value; } UT_GenericBase * UT_GenericUTF8Hash::KeyValue::getValue () { UT_GenericBase * value = m_value; m_value = 0; return value; } UT_GenericUTF8Hash::UT_GenericUTF8Hash (UT_uint32 increment) : m_pair(0), m_pair_count(0), m_pair_max(0), m_index(0), m_increment(increment) { // } UT_GenericUTF8Hash::~UT_GenericUTF8Hash () { clear (false); if (m_pair) free (m_pair); } /* deletes all key/value pairs, but doesn't free() array of pointers */ void UT_GenericUTF8Hash::clear (bool delete_values) { for (UT_uint32 i = 0; i < m_pair_count; i++) { if (!delete_values) m_pair[i]->getValue (); // detach value, if any, so not deleted delete m_pair[i]; } m_pair_count = 0; } /* for easy sequential access of map members: */ bool UT_GenericUTF8Hash::pair (UT_uint32 index, const UT_UTF8String *& key, const UT_GenericBase *& value) const { if (index >= m_pair_count) return false; key = &m_pair[index]->key (); value = m_pair[index]->value (); return true; } /* responsibility for value passes here */ bool UT_GenericUTF8Hash::ins (const UT_UTF8String & key, UT_GenericBase * value) { UT_uint32 index; if (lookup (key, index)) { m_pair[index]->setValue (value); return true; } if (!grow ()) { if (value) delete value; return false; } KeyValue * KV = 0; UT_TRY { KV = new KeyValue(key); } UT_CATCH(...) { KV = 0; } if (KV == 0) { if (value) delete value; return false; } if (index < m_pair_count) { memmove (m_pair + index + 1, m_pair + index, (m_pair_count - index) * sizeof (KeyValue *)); } ++m_pair_count; m_pair[index] = KV; KV->setValue (value); return true; } /* returns false if no such key-value */ bool UT_GenericUTF8Hash::del (const char * key) { UT_GenericBase * value = 0; bool removed = del (key, value); if (removed && value) delete value; return removed; } /* returns false if no such key-value */ bool UT_GenericUTF8Hash::del (const UT_UTF8String & key) { UT_GenericBase * value = 0; bool removed = del (key, value); if (removed && value) delete value; return removed; } bool UT_GenericUTF8Hash::del (const char * key, UT_GenericBase *& value) // return value rather than deleting { UT_uint32 index; if (!lookup (key, index)) return false; value = m_pair[index]->getValue (); delete m_pair[index]; if (index < --m_pair_count) { memmove (m_pair + index, m_pair + index + 1, (m_pair_count - index) * sizeof (KeyValue *)); } return true; } bool UT_GenericUTF8Hash::del (const UT_UTF8String & key, UT_GenericBase *& value) // return value rather than deleting { UT_uint32 index; if (!lookup (key, index)) return false; value = m_pair[index]->getValue (); delete m_pair[index]; if (index < --m_pair_count) { memmove (m_pair + index, m_pair + index + 1, (m_pair_count - index) * sizeof (KeyValue *)); } return true; } const UT_GenericBase * UT_GenericUTF8Hash::lookup (const char * key) { UT_uint32 index; return lookup (key, index) ? m_pair[index]->value () : 0; } const UT_GenericBase * UT_GenericUTF8Hash::lookup (const UT_UTF8String & key) { UT_uint32 index; return lookup (key, index) ? m_pair[index]->value () : 0; } bool UT_GenericUTF8Hash::lookup (const char * key, UT_uint32 & index) { return key ? lookup (key, strlen (key), index) : false; } bool UT_GenericUTF8Hash::lookup (const UT_UTF8String & key, UT_uint32 & index) { return lookup (key.utf8_str (), key.byteLength (), index); } bool UT_GenericUTF8Hash::lookup (const char * key, UT_uint32 key_length, UT_uint32 & index) { if (m_index < m_pair_count) if (key_eq (key, key_length, m_pair[m_index]->key ())) { index = m_index; return true; } if (m_pair_count == 0) { index = 0; return false; } if (!key_gt (key, key_length, m_pair[0]->key ())) { index = 0; return key_eq (key, key_length, m_pair[0]->key ()); } if (key_gt (key, key_length, m_pair[m_pair_count-1]->key ())) { index = m_pair_count; return false; } if (m_index < m_pair_count) { if (key_lt (key, key_length, m_pair[m_index]->key ()) && m_index) if (key_gt (key, key_length, m_pair[m_index-1]->key ())) { index = m_index; return false; } } else { m_index = m_pair_count / 2; if (key_eq (key, key_length, m_pair[m_index]->key ())) { index = m_index; return true; } } bool bFound = false; UT_uint32 min_index = 0; UT_uint32 max_index = m_pair_count; if (key_gt (key, key_length, m_pair[m_index]->key ())) min_index = m_index; else max_index = m_index; while (true) { if (max_index - min_index == 1) { m_index = max_index; break; } m_index = min_index + (max_index - min_index) / 2; if (key_eq (key, key_length, m_pair[m_index]->key ())) { bFound = true; break; } if (key_gt (key, key_length, m_pair[m_index]->key ())) min_index = m_index; else max_index = m_index; } index = m_index; return bFound; } bool UT_GenericUTF8Hash::grow () { if (m_pair_count < m_pair_max) return true; if (m_pair == 0) { m_pair = (KeyValue **) malloc (m_increment * sizeof (KeyValue *)); if (m_pair == 0) return false; m_pair_max = m_increment; } else { KeyValue ** more = (KeyValue **) realloc (m_pair, (m_pair_max + m_increment) * sizeof (KeyValue *)); if (more == 0) return false; m_pair = more; m_pair_max += m_increment; } return true; } UT_UTF8Hash::UT_UTF8Hash (bool bStripEmptyValues) : UT_GenericUTF8Hash(32), m_bStripEmptyValues(bStripEmptyValues) { } UT_UTF8Hash::~UT_UTF8Hash () { clear (); } bool UT_UTF8Hash::pair (UT_uint32 index, const UT_UTF8String *& key, const UT_UTF8String *& value) const { const UT_GenericBase * generic_value = 0; bool found = UT_GenericUTF8Hash::pair (index, key, generic_value); value = static_cast(generic_value); return found; } bool UT_UTF8Hash::ins (const UT_UTF8String & key, const UT_UTF8String & value) { if (m_bStripEmptyValues) if (value.byteLength () == 0) { del (key); return true; } UT_UTF8String * utf8_value = 0; UT_TRY { utf8_value = new UT_UTF8String(value); } UT_CATCH(...) { utf8_value = 0; } if (utf8_value == 0) return false; return ins (key, utf8_value); } bool UT_UTF8Hash::ins (const char * key, const char * value) { if (value == 0) return false; if (m_bStripEmptyValues) if (*value == 0) { del (key); return true; } UT_UTF8String utf8_key(key); UT_UTF8String * utf8_value = 0; UT_TRY { utf8_value = new UT_UTF8String(value); } UT_CATCH(...) { utf8_value = 0; } if (utf8_value == 0) return false; return ins (utf8_key, utf8_value); } bool UT_UTF8Hash::ins (const char ** attrs) { bool okay = true; UT_UTF8String utf8_key; while (*attrs) { utf8_key = *attrs++; const char * value = *attrs++; if (value == 0) continue; if (m_bStripEmptyValues) if (*value == 0) { del (utf8_key); continue; } UT_UTF8String * utf8_value = 0; UT_TRY { utf8_value = new UT_UTF8String(value); } UT_CATCH(...) { utf8_value = 0; } if (utf8_value == 0) { okay = false; break; } if (!ins (utf8_key, utf8_value)) { okay = false; break; } } return okay; } /* return value rather than deleting */ bool UT_UTF8Hash::del (const char * key, UT_UTF8String *& value) { UT_GenericBase * generic_value = 0; bool found = UT_GenericUTF8Hash::del (key, generic_value); value = static_cast(generic_value); return found; } /* return value rather than deleting */ bool UT_UTF8Hash::del (const UT_UTF8String & key, UT_UTF8String *& value) { UT_GenericBase * generic_value = 0; bool found = UT_GenericUTF8Hash::del (key, generic_value); value = static_cast(generic_value); return found; } static bool key_lt (const char * key, UT_uint32 key_length, const UT_UTF8String & key2) { UT_uint32 length = key2.byteLength (); length = 1 + (length > key_length) ? length : key_length; return (memcmp (key, key2.utf8_str (), length) < 0); } static bool key_gt (const char * key, UT_uint32 key_length, const UT_UTF8String & key2) { UT_uint32 length = key2.byteLength (); length = 1 + (length > key_length) ? length : key_length; return (memcmp (key, key2.utf8_str (), length) > 0); } static bool key_eq (const char * key, UT_uint32 key_length, const UT_UTF8String & key2) { if (key_length != key2.byteLength ()) return false; if (key_length == 0) return true; return (memcmp (key, key2.utf8_str (), key_length) == 0); } static void s_pass_whitespace (const char *& csstr) { while (*csstr) { unsigned char u = static_cast(*csstr); if (u & 0x80) { UT_UTF8Stringbuf::UCS4Char ucs4 = UT_UTF8Stringbuf::charCode (csstr); if (UT_UCS4_isspace (ucs4)) { while (static_cast(*++csstr) & 0x80) { } continue; } } else if (isspace (static_cast(u))) { csstr++; continue; } break; } } static const char * s_pass_name (const char *& csstr, char end) { const char * name_end = csstr; while (*csstr) { unsigned char u = static_cast(*csstr); if (u & 0x80) { UT_UTF8Stringbuf::UCS4Char ucs4 = UT_UTF8Stringbuf::charCode (csstr); if (UT_UCS4_isspace (ucs4)) { name_end = csstr; break; } while (static_cast(*++csstr) & 0x80) { } continue; } else if ((isspace (static_cast(u))) || (*csstr == end)) { name_end = csstr; break; } csstr++; } return name_end; } static const char * s_pass_value (const char *& csstr) { const char * value_end = csstr; bool bQuoted = false; while (*csstr) { bool bSpace = false; unsigned char u = static_cast(*csstr); if (u & 0x80) { UT_UTF8Stringbuf::UCS4Char ucs4 = UT_UTF8Stringbuf::charCode (csstr); if (!bQuoted) if (UT_UCS4_isspace (ucs4)) { bSpace = true; break; } while (static_cast(*++csstr) & 0x80) { } if (!bSpace) value_end = csstr; continue; } else if ((*csstr == '\'') || (*csstr == '"')) { bQuoted = (bQuoted ? false : true); } else if (*csstr == ';') { if (!bQuoted) { csstr++; break; } } else if (!bQuoted && isspace (static_cast(u))) bSpace = true; csstr++; if (!bSpace) value_end = csstr; } return value_end; } static const char * s_pass_string (const char *& csstr_ptr) { if (*csstr_ptr == 0) return 0; const char * csstr = csstr_ptr; char quote = 0; if ((*csstr == '\'') || (*csstr == '"')) quote = *csstr; bool valid = true; bool skip = false; while (true) { unsigned char u = static_cast(*++csstr); if ((u & 0xc0) == 0x80) continue; // trailing byte if (u == 0) { valid = false; break; } if (skip) { skip = false; continue; } if (*csstr == quote) { ++csstr; break; } if (*csstr == '\\') skip = true; } if (valid) { csstr_ptr = csstr; csstr--; } else { csstr = csstr_ptr; } return csstr; // points to end quote on success, and to start quote on failure } void UT_UTF8Hash::parse_properties (const char * properties) { if ( properties == 0) return; if (*properties == 0) return; const char * csstr = properties; UT_UTF8String name; UT_UTF8String value; bool bSkip = false; while (*csstr) { if (bSkip) { if (*csstr == ';') bSkip = false; ++csstr; continue; } s_pass_whitespace (csstr); const char * name_start = csstr; const char * name_end = s_pass_name (csstr, ':'); if (*csstr == 0) break; // whatever we have, it's not a "name:value;" pair if (name_start == name_end) // ?? stray colon? { bSkip = true; continue; } name.assign (name_start, name_end - name_start); s_pass_whitespace (csstr); if (*csstr != ':') // whatever we have, it's not a "name:value;" pair { bSkip = true; continue; } csstr++; s_pass_whitespace (csstr); if (*csstr == 0) break; // whatever we have, it's not a "name:value;" pair const char * value_start = csstr; const char * value_end = s_pass_value (csstr); if (value_start == value_end) // ?? no value... { bSkip = true; continue; } value.assign (value_start, value_end - value_start); ins (name, value); } } void UT_UTF8Hash::parse_attributes (const char * attributes) { if ( attributes == 0) return; if (*attributes == 0) return; const char * atstr = attributes; UT_UTF8String name; UT_UTF8String value; while (*atstr) { s_pass_whitespace (atstr); const char * name_start = atstr; const char * name_end = s_pass_name (atstr, '='); if (*atstr != '=') break; // whatever we have, it's not a name="value" pair if (name_start == name_end) break; // ?? stray equals? name.assign (name_start, name_end - name_start); atstr++; if ((*atstr != '\'') && (*atstr != '"')) break; // whatever we have, it's not a name="value" pair const char * value_start = atstr; const char * value_end = s_pass_string (atstr); if (value_start == value_end) break; // ?? no value... value_start++; value.assign (value_start, value_end - value_start); ins (name, value); } } bool operator== (const UT_UTF8Hash & lhs, const UT_UTF8Hash & rhs) { if (lhs.count () != rhs.count ()) return false; const UT_UTF8String * lhs_key = 0; const UT_UTF8String * lhs_value = 0; const UT_UTF8String * rhs_key = 0; const UT_UTF8String * rhs_value = 0; bool equal = true; UT_uint32 count = lhs.count (); for (UT_uint32 i = 0; i < count; i++) { lhs.pair (i, lhs_key, lhs_value); rhs.pair (i, rhs_key, rhs_value); if ((*lhs_key != *rhs_key) || (*lhs_value != *rhs_value)) { equal = false; break; } } return equal; }