/* Copyright (C) 2007 One Laptop Per Child * Author: Marc Maurer * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include "ut_assert.h" #include "ut_debugmsg.h" #include "LanguagePattern.h" static bool s_get_bool_value(const string& val, bool default_value = true) { if (val.size() == 0) return default_value; if (val == "TRUE" || val == "true" || val == "1") return true; else if (val == "FALSE" || val == "false" || val == "0") return false; UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN); return default_value; } static string s_get_translatable_property(xmlNode* node, string prop) { UT_return_val_if_fail(node, ""); char* name = (char *)xmlGetProp(node, (const xmlChar *)prop.c_str()); if (!name) // no worries, _name is allowed too, meaning it is eligable for translation { prop.insert(0, "_"); name = (char *)xmlGetProp(node, (const xmlChar *)prop.c_str()); } return (name ? name : ""); } void LanguagePattern::construct(xmlNode* pattern, vector& patterns) { UT_return_if_fail(pattern); UT_return_if_fail(pattern->type == XML_ELEMENT_NODE); if (strcmp(reinterpret_cast(pattern->name), "escape-char") == 0) { UT_DEBUGMSG(("Pattern match: %s\n", pattern->name)); LanguagePattern* pPattern = new LanguagePattern(); pPattern->type = LanguagePattern::ESCAPE_CHAR; //pPattern->_parseAttribs(*pPattern, pattern); //pPattern->_parseElements(*pPattern, pattern); patterns.push_back(pPattern); } else if (strcmp(reinterpret_cast(pattern->name), "string") == 0) { xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name)); LanguagePattern* pPattern = new StringLanguagePattern(); pPattern->_parseAttribs(pattern); pPattern->_parseElements(pattern); patterns.push_back(pPattern); } else if (strcmp(reinterpret_cast(pattern->name), "keyword-list") == 0) { xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name)); KeywordListLanguagePattern* pPattern = new KeywordListLanguagePattern(); // this is only a temporary placeholder for our keywords pPattern->_parseAttribs(pattern); pPattern->_parseElements(pattern); for (vector::iterator it = pPattern->m_vKeywords.begin(); it != pPattern->m_vKeywords.end(); it++) { // TODO: make a nice copy function KeywordLanguagePattern* pKeywordPattern = new KeywordLanguagePattern(); pKeywordPattern->name = pPattern->name; pKeywordPattern->style = pPattern->style; if (pPattern->attr_beginning_regex.size() > 0 && pPattern->attr_end_regex.size() == 0) { pKeywordPattern->start_regex = pPattern->attr_beginning_regex; pKeywordPattern->endAtLineEnd = true; } else if (pPattern->attr_beginning_regex.size() > 0 && pPattern->attr_end_regex.size() > 0) { pKeywordPattern->start_regex = pPattern->attr_beginning_regex; pKeywordPattern->end_regex = pPattern->attr_end_regex; } else if (pPattern->attr_beginning_regex.size() == 0 && pPattern->attr_end_regex.size() > 0) { UT_DEBUGMSG(("pPattern->attr_beginning_regex.size() == 0 && pPattern->attr_end_regex.size() > 0 for pattern %s\n", pPattern->name.c_str())); // TODO: is this a correct interpretation? pKeywordPattern->start_regex = *it; // shouldn't we add \\b before this? pKeywordPattern->end_regex = pPattern->attr_end_regex; } else { string kwpat = "\\b"; kwpat += *it; kwpat += "\\b"; pKeywordPattern->regex = kwpat; } pKeywordPattern->caseSensitive = pPattern->caseSensitive; pKeywordPattern->matchEmptyStringAtBeginning = pPattern->matchEmptyStringAtBeginning; pKeywordPattern->matchEmptyStringAtEnd = pPattern->matchEmptyStringAtEnd; patterns.push_back(pKeywordPattern); } DELETEP(pPattern); } else if (strcmp(reinterpret_cast(pattern->name), "line-comment") == 0) { xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name)); LanguagePattern* pPattern = new LineCommentLanguagePattern(); pPattern->_parseAttribs(pattern); pPattern->_parseElements(pattern); patterns.push_back(pPattern); } else if (strcmp(reinterpret_cast(pattern->name), "block-comment") == 0) { xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name)); LanguagePattern* pPattern = new BlockCommentLanguagePattern(); pPattern->_parseAttribs(pattern); pPattern->_parseElements(pattern); patterns.push_back(pPattern); } else if (strcmp(reinterpret_cast(pattern->name), "syntax-item") == 0) { xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name)); LanguagePattern* pPattern = new SyntaxItemLanguagePattern(); pPattern->_parseAttribs(pattern); pPattern->_parseElements(pattern); patterns.push_back(pPattern); } else if (strcmp(reinterpret_cast(pattern->name), "pattern-item") == 0) { xxx_UT_DEBUGMSG(("Pattern match: %s\n", pattern->name)); LanguagePattern* pPattern = new PatternItemLanguagePattern(); pPattern->_parseAttribs(pattern); pPattern->_parseElements(pattern); patterns.push_back(pPattern); } else { UT_DEBUGMSG(("Unknown pattern match: %s\n", pattern->name)); UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED); } } void LanguagePattern::_parseAttribs(xmlNode* patternNode) { UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE); // get all generic attributes name = s_get_translatable_property(patternNode, "name"); style = s_get_translatable_property(patternNode, "style"); } void LanguagePattern::_parseElements(xmlNode* patternNode) { UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE); for (xmlNode* child = patternNode->children; child; child = child->next) { if (child->type == XML_ELEMENT_NODE) { /* get the generic regular expressions only */ if (strcmp(reinterpret_cast(child->name), "regex") == 0) { regex = reinterpret_cast(xmlNodeGetContent(child)); xxx_UT_DEBUGMSG(("Found regex: %s\n", start_regex.c_str())); } else if (strcmp(reinterpret_cast(child->name), "start-regex") == 0) { start_regex = reinterpret_cast(xmlNodeGetContent(child)); xxx_UT_DEBUGMSG(("Found start-regex: %s\n", start_regex.c_str())); } else if (strcmp(reinterpret_cast(child->name), "end-regex") == 0) { string tmp_end_regex = reinterpret_cast(xmlNodeGetContent(child)); if (strcmp(tmp_end_regex.c_str(), "\\n") == 0) { // this is both faster, and \n never really matches a line end in a regex // not sure if i consider this a hack, or a nice optimization endAtLineEnd = true; } else end_regex = tmp_end_regex; xxx_UT_DEBUGMSG(("Found end-regex: %s\n", end_regex.c_str())); } else { UT_DEBUGMSG(("Unimplemented pattern element: %s\n", child->name)); UT_ASSERT_HARMLESS(UT_NOT_IMPLEMENTED); } } } } void KeywordListLanguagePattern::_parseAttribs(xmlNode* patternNode) { UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE); // get all generic attributes LanguagePattern::_parseAttribs(patternNode); // get all keyword-list specific attributes caseSensitive = s_get_bool_value( s_get_translatable_property(patternNode, "case-sensitive"), true ); matchEmptyStringAtBeginning = s_get_bool_value( s_get_translatable_property(patternNode, "match-empty-string-at-beginning"), false ); matchEmptyStringAtEnd = s_get_bool_value( s_get_translatable_property(patternNode, "match-empty-string-at-end"), false ); // FIXME: is beginning-regex/end_regex really translatable? attr_beginning_regex = s_get_translatable_property(patternNode, "beginning-regex"); attr_end_regex = s_get_translatable_property(patternNode, "end-regex"); } void KeywordListLanguagePattern::_parseElements(xmlNode* patternNode) { UT_return_if_fail(patternNode && patternNode->type == XML_ELEMENT_NODE); for (xmlNode* child = patternNode->children; child; child = child->next) { if (child->type == XML_ELEMENT_NODE) { if (strcmp(reinterpret_cast(child->name), "keyword") == 0) { xxx_UT_DEBUGMSG(("Found keyword: %s\n", reinterpret_cast(xmlNodeGetContent(child)))); m_vKeywords.push_back(reinterpret_cast(xmlNodeGetContent(child))); } else { UT_DEBUGMSG(("Unimplemented pattern element: %s\n", child->name)); UT_ASSERT(UT_SHOULD_NOT_HAPPEN); } } } }