/* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */ /* AbiWord * Copyright (C) 2001-2002 Dom Lachowicz * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301 USA. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include "ut_types.h" #include "ut_assert.h" #include "ut_debugmsg.h" #include "ut_string.h" #include "ie_imp_WML.h" #include "ie_impGraphic.h" #include "ie_types.h" #include "pd_Document.h" #include "ut_growbuf.h" #include "ut_path.h" #include "ut_string_class.h" #include "fg_GraphicRaster.h" /* * This file is meant to import WML documents. * WML is an XML derivate and is the standard markup * language for wireless communication devices such * as web-phones and other PDAs. * * The correspondence between WML and Word processing * documents isn't as good as say, HTML is, but everyone * gets a stiffie now when you mention the word "wireless" * in front of anything, so... */ /*****************************************************************/ /*****************************************************************/ IE_Imp_WML_Sniffer::IE_Imp_WML_Sniffer (const char * _name) : IE_ImpSniffer(_name) { // } // supported suffixes static IE_SuffixConfidence IE_Imp_WML_Sniffer__SuffixConfidence[] = { { "wml", UT_CONFIDENCE_PERFECT }, { "", UT_CONFIDENCE_ZILCH } }; const IE_SuffixConfidence * IE_Imp_WML_Sniffer::getSuffixConfidence () { return IE_Imp_WML_Sniffer__SuffixConfidence; } const IE_MimeConfidence * IE_Imp_WML_Sniffer::getMimeConfidence () { // mimetypes once getMimeConfidence is implemented (need to check correctness) // "text/vnd.wap.wml" return NULL; } UT_Confidence_t IE_Imp_WML_Sniffer::recognizeContents(const char * szBuf, UT_uint32 /*iNumbytes*/) { // TODO: scan the first few lines if(strstr(szBuf, "!DOCTYPE wml PUBLIC") == NULL) return UT_CONFIDENCE_ZILCH; return UT_CONFIDENCE_PERFECT; } UT_Error IE_Imp_WML_Sniffer::constructImporter(PD_Document * pDocument, IE_Imp ** ppie) { IE_Imp_WML * p = new IE_Imp_WML(pDocument); *ppie = p; return UT_OK; } bool IE_Imp_WML_Sniffer::getDlgLabels(const char ** pszDesc, const char ** pszSuffixList, IEFileType * ft) { *pszDesc = "WML (.wml)"; *pszSuffixList = "*.wml"; *ft = getFileType(); return true; } /*****************************************************************/ /*****************************************************************/ IE_Imp_WML::~IE_Imp_WML() { DELETEP(m_TableHelperStack); } IE_Imp_WML::IE_Imp_WML (PD_Document * pDocument) : IE_Imp_XML(pDocument,false), m_bOpenedBlock(false), m_bOpenedSection(false), m_iColumns(0), m_iImages(0), m_iOpenedColumns(0), m_TableHelperStack(new IE_Imp_TableHelperStack()) { } /*****************************************************************/ /*****************************************************************/ #define TT_OTHER 0 // anything else #define TT_DOCUMENT 1 // a document <wml> #define TT_SECTION 2 // card or section <card> #define TT_BLOCK 3 // a paragraph <p> #define TT_IMAGE 4 // an image object <img> #define TT_BREAK 5 // a forced line-break <br/> #define TT_BOLD 6 // bold text <b> #define TT_ITALIC 7 // italic text <i> #define TT_UNDERLINE 8 // underlined text <u> #define TT_STRONG 9 // strong(bold) text <strong> #define TT_EMPHASIS 10 // emphasis(bold) text <em> #define TT_BIG 11 // big(superscript) text <big> #define TT_SMALL 12 // small(subscript) text <small> #define TT_TABLE 13 // <table> #define TT_TABLE_ROW 14 // <tr> #define TT_TABLE_CELL 15 // <td> #define TT_ACCESS 16 // <access> #define TT_HEAD 17 // <head> #define TT_META 18 // <meta> #define TT_TEMPLATE 19 // <template> #define TT_DO 20 // <do> #define TT_ONEVENT 21 // <onevent> #define TT_POSTFIELD 22 // <postfield> #define TT_GO 23 // <go> #define TT_NOOP 24 // <noop> #define TT_PREV 25 // <prev> #define TT_REFRESH 26 // <refresh> #define TT_FIELDSET 27 // <fieldset> #define TT_INPUT 28 // <input> #define TT_OPTGROUP 29 // <optgroup> #define TT_OPTION 30 // <option> #define TT_SELECT 31 // <select> #define TT_SETVAR 32 // <setvar> #define TT_TIMER 33 // <timer> #define TT_ANCHOR 34 // <anchor> #define TT_LINK 35 // <a> // KEEP IN ALPHABETICAL ORDER!! static struct xmlToIdMapping s_Tokens[] = { { "a", TT_LINK }, { "access", TT_ACCESS }, { "anchor", TT_ANCHOR }, { "b", TT_BOLD }, { "big", TT_BIG }, { "br", TT_BREAK }, { "card", TT_SECTION }, { "do", TT_DO }, { "em", TT_EMPHASIS }, { "fieldset", TT_FIELDSET }, { "go", TT_GO }, { "head", TT_HEAD }, { "i", TT_ITALIC }, { "img", TT_IMAGE }, { "input", TT_INPUT }, { "meta", TT_META }, { "noop", TT_NOOP }, { "onevent", TT_ONEVENT }, { "optgroup", TT_OPTGROUP }, { "option", TT_OPTION }, { "p", TT_BLOCK }, { "postfield", TT_POSTFIELD }, { "prev", TT_PREV }, { "refresh", TT_REFRESH }, { "select", TT_SELECT }, { "setvar", TT_SETVAR }, { "small", TT_SMALL }, { "strong", TT_STRONG }, { "table", TT_TABLE }, { "td", TT_TABLE_CELL }, { "template", TT_TEMPLATE }, { "timer", TT_TIMER }, { "tr", TT_TABLE_ROW }, { "u", TT_UNDERLINE }, { "wml", TT_DOCUMENT } }; #define TokenTableSize ((sizeof(s_Tokens)/sizeof(s_Tokens[0]))) /*****************************************************************/ /*****************************************************************/ #define X_TestParseState(ps) ((m_parseState==(ps))) #define X_VerifyParseState(ps) do { if (!(X_TestParseState(ps))) \ { m_error = UT_IE_BOGUSDOCUMENT; \ UT_DEBUGMSG(("WML import: X_TestParseState() failed %s\n", #ps)); \ return; } } while (0) #define X_CheckDocument(b) do { if (!(b)) \ { m_error = UT_IE_BOGUSDOCUMENT; \ UT_DEBUGMSG(("WML import: X_CheckDocument() failed %s\n", #b)); \ return; } } while (0) #define X_CheckError(v) do { if (!(v)) \ { m_error = UT_ERROR; \ UT_DEBUGMSG(("WML import: X_CheckError() failed %s\n", #v)); \ return; } } while (0) #define X_EatIfAlreadyError() do { if (m_error) return; } while (0) /*****************************************************************/ /*****************************************************************/ void IE_Imp_WML::openTable(const gchar **atts) { const gchar * wml_columns = 0; wml_columns = _getXMLPropValue ("columns", atts); if (wml_columns) { m_iColumns = atoi(wml_columns); if (m_iColumns < 1) m_iColumns = 1; } else X_CheckDocument(false); // columns is a required attribute, bail out X_CheckError(m_TableHelperStack->tableStart(getDoc(),NULL)); } void IE_Imp_WML::closeTable(void) { X_CheckError(m_TableHelperStack->tableEnd()); } void IE_Imp_WML::openRow(const gchar ** /*atts*/) { X_CheckError(m_TableHelperStack->trStart(NULL)); } void IE_Imp_WML::closeRow(void) { //corrective code for columns that use colspan while(m_iColumns > m_iOpenedColumns) { const gchar ** empty = NULL; openCell(empty); closeCell(); } } void IE_Imp_WML::openCell(const gchar ** /*atts*/) { // Note: there's no rowspan or colspan in WML 1.1 m_iOpenedColumns++; X_CheckError(m_TableHelperStack->tdStart(1,1,NULL)); } void IE_Imp_WML::closeCell(void) { m_TableHelperStack->tdEnd(); } /*****************************************************************/ /*****************************************************************/ void IE_Imp_WML::startElement(const gchar *name, const gchar **atts) { UT_DEBUGMSG(("WML import: startElement: %s\n", name)); // xml parser keeps running until buffer consumed X_EatIfAlreadyError(); UT_uint32 tokenIndex = _mapNameToToken (name, s_Tokens, TokenTableSize); switch (tokenIndex) { case TT_DOCUMENT: { X_VerifyParseState(_PS_Init); m_parseState = _PS_Doc; return; } case TT_SECTION: { X_VerifyParseState(_PS_Doc); m_parseState = _PS_Sec; // Keep this appendStrux() call here to support files with more // than one <card> X_CheckError(appendStrux(PTX_Section,static_cast<const gchar **>(NULL))); m_bOpenedSection = true; return; } case TT_HEAD: { X_VerifyParseState(_PS_Doc); m_parseState = _PS_MetaData; return; } case TT_META: { X_VerifyParseState(_PS_MetaData); m_parseState = _PS_Meta; const gchar *metaname = NULL, *content = NULL; metaname = static_cast<const gchar*>(_getXMLPropValue("name", atts)); content = static_cast<const gchar*>(_getXMLPropValue("content", atts)); if(!metaname || !content) return; if(!strcmp("title", metaname)) { getDoc()->setMetaDataProp("dc.title",content); } else if(!strcmp("author", metaname)) { getDoc()->setMetaDataProp("dc.creator",content); } else if(!strcmp("subject", metaname)) { getDoc()->setMetaDataProp("dc.subject",content); } else if(!strcmp("description", metaname)) { getDoc()->setMetaDataProp("dc.description",content); } else if(!strcmp("publisher", metaname)) { getDoc()->setMetaDataProp("dc.publisher",content); } else if(!strcmp("contributor", metaname)) { getDoc()->setMetaDataProp("dc.contributor",content); } else if(!strcmp("source", metaname)) { getDoc()->setMetaDataProp("dc.source",content); } else if(!strcmp("relation", metaname)) { getDoc()->setMetaDataProp("dc.relation",content); } else if(!strcmp("coverage", metaname)) { getDoc()->setMetaDataProp("dc.coverage",content); } else if(!strcmp("rights", metaname)) { getDoc()->setMetaDataProp("dc.rights",content); } else if(!strcmp("keywords", metaname)) { getDoc()->setMetaDataProp("abiword.keywords",content); } return; } case TT_BLOCK: { X_VerifyParseState(_PS_Sec); m_parseState = _PS_Block; const gchar *p_val = NULL; const gchar *buf[3]; bool left = false; buf[0] = "props"; buf[1] = NULL; buf[2] = NULL; p_val = static_cast<const gchar*>(_getXMLPropValue("align", atts)); if(!p_val || !atts) { UT_DEBUGMSG(("WML: got <p> with no props\n")); left = true; } else { if(!strcmp(p_val, "center")) { buf[1] = "text-align:center"; } else if(!strcmp(p_val, "right")) { buf[1] = "text-align:right"; } else { left = true; } } X_CheckError(appendStrux(PTX_Block, (left ? NULL : const_cast<const gchar **>(buf)))); m_bOpenedBlock = true; return; } case TT_IMAGE: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell) || (m_parseState == _PS_Sec)); if(m_parseState == _PS_Sec) { X_CheckError(appendStrux(PTX_Block,NULL)); m_bOpenedBlock = true; } const gchar *p_val = NULL; p_val = _getXMLPropValue(static_cast<const gchar *>("src"), atts); if(p_val) createImage (p_val, atts); return; } case TT_BREAK: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); if(m_parseState == _PS_Block) //AbiWord doesn't allow breaks in tables { UT_UCSChar ucs = UCS_LF; X_CheckError(appendSpan(&ucs,1)); } return; } case TT_ITALIC: case TT_UNDERLINE: case TT_BOLD: case TT_STRONG: case TT_EMPHASIS: case TT_BIG: case TT_SMALL: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); const gchar **p_atts; const gchar *buf[3]; buf[0] = "props"; buf[1] = NULL; buf[2] = NULL; switch(tokenIndex) { case TT_ITALIC: { buf[1] = "font-style:italic"; break; } case TT_UNDERLINE: { buf[1] = "text-decoration:underline"; break; } case TT_BOLD: case TT_STRONG: case TT_EMPHASIS: { buf[1] = "font-weight:bold"; break; } case TT_BIG: { buf[1] = "text-position:superscript"; break; } case TT_SMALL: { buf[1] = "text-position:subscript"; break; } default: { UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN); UT_DEBUGMSG(("DOM: WML: %s\n", name)); break; } } p_atts = buf; X_CheckError(_pushInlineFmt(p_atts)); X_CheckError(appendFmt(&m_vecInlineFmt)); return; } case TT_TABLE: { m_iColumns = 0; X_VerifyParseState(_PS_Block); m_parseState = _PS_Table; openTable(atts); return; } case TT_TABLE_ROW: { m_iOpenedColumns = 0; X_VerifyParseState(_PS_Table); openRow(atts); return; } case TT_TABLE_CELL: { X_VerifyParseState(_PS_Table); m_parseState = _PS_Cell; openCell(atts); return; } case TT_ANCHOR: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); const gchar *buf[5]; buf[4] = NULL; const gchar *p_val = NULL; p_val = _getXMLPropValue("id", atts); if(p_val) { buf[0] = PT_TYPE_ATTRIBUTE_NAME; buf[1] = "start"; buf[2] = PT_NAME_ATTRIBUTE_NAME; buf[3] = p_val; X_CheckError(appendObject(PTO_Bookmark, buf)); buf[1] = "end"; X_CheckError(appendObject(PTO_Bookmark, buf)); } return; } case TT_LINK: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); const gchar *buf[3]; buf[2] = NULL; const gchar *p_val = NULL; p_val = _getXMLPropValue("href", atts); if(p_val) { buf[0] = "xlink:href"; buf[1] = p_val; X_CheckError(appendObject(PTO_Hyperlink, buf)); } else //href is required, bail out { X_CheckDocument(false); } return; } case TT_ACCESS: case TT_DO: case TT_FIELDSET: case TT_GO: case TT_INPUT: case TT_NOOP: case TT_ONEVENT: case TT_OPTGROUP: case TT_OPTION: case TT_POSTFIELD: case TT_PREV: case TT_REFRESH: case TT_SELECT: case TT_SETVAR: case TT_TEMPLATE: case TT_TIMER: return; case TT_OTHER: default: UT_DEBUGMSG(("WML: Unknown or knowingly unhandled tag [%s]\n",name)); } } void IE_Imp_WML::endElement(const gchar *name) { UT_DEBUGMSG(("WML import: endElement: %s\n", name)); // xml parser keeps running until buffer consumed X_EatIfAlreadyError(); UT_uint32 tokenIndex = _mapNameToToken (name, s_Tokens, TokenTableSize); switch (tokenIndex) { case TT_DOCUMENT: { X_VerifyParseState(_PS_Doc); if(!m_bOpenedSection) { X_CheckError(appendStrux(PTX_Section, NULL)); X_CheckError(appendStrux(PTX_Block, NULL)); } m_parseState = _PS_Init; return; } case TT_SECTION: { X_VerifyParseState(_PS_Sec); m_parseState = _PS_Doc; if(!m_bOpenedBlock) X_CheckError(appendStrux(PTX_Block, NULL)); m_bOpenedBlock = false; return; } case TT_HEAD: { X_VerifyParseState(_PS_MetaData); m_parseState = _PS_Doc; return; } case TT_META: { X_VerifyParseState(_PS_Meta); m_parseState = _PS_MetaData; return; } case TT_BLOCK: { UT_ASSERT_HARMLESS(m_lenCharDataSeen==0); X_VerifyParseState(_PS_Block); m_parseState = _PS_Sec; X_CheckDocument(_getInlineDepth()==0); return; } case TT_IMAGE: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell) || (m_parseState == _PS_Sec)); return; } case TT_BREAK: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); return; } case TT_ITALIC: case TT_UNDERLINE: case TT_BOLD: case TT_STRONG: case TT_EMPHASIS: case TT_BIG: case TT_SMALL: { UT_ASSERT_HARMLESS(m_lenCharDataSeen==0); X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); X_CheckDocument(_getInlineDepth()>0); _popInlineFmt(); X_CheckError(appendFmt(&m_vecInlineFmt)); return; } case TT_TABLE: { X_VerifyParseState(_PS_Table); m_parseState = _PS_Block; m_iColumns = 0; closeTable(); return; } case TT_TABLE_ROW: { X_VerifyParseState(_PS_Table); closeRow(); m_iOpenedColumns = 0; return; } case TT_TABLE_CELL: { X_VerifyParseState(_PS_Cell); m_parseState = _PS_Table; closeCell(); break; } case TT_ANCHOR: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); return; } case TT_LINK: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell)); X_CheckError(appendObject(PTO_Hyperlink, NULL)); return; } case TT_ACCESS: case TT_DO: case TT_FIELDSET: case TT_GO: case TT_INPUT: case TT_NOOP: case TT_ONEVENT: case TT_OPTGROUP: case TT_OPTION: case TT_POSTFIELD: case TT_PREV: case TT_REFRESH: case TT_SELECT: case TT_SETVAR: case TT_TEMPLATE: case TT_TIMER: return; case TT_OTHER: default: UT_DEBUGMSG(("WML: Unknown or intentionally unhandled end tag [%s]\n",name)); } } void IE_Imp_WML::createImage(const char *name, const gchar **atts) { char * relative_file = UT_go_url_resolve_relative(m_szFileName, name); if(!relative_file) return; UT_UTF8String filename(relative_file); g_free(relative_file); FG_Graphic * pfg = 0; if (IE_ImpGraphic::loadGraphic (filename.utf8_str(), IEGFT_Unknown, &pfg) != UT_OK) return; const UT_ByteBuf * pBB = pfg->getBuffer(); X_CheckError(pBB); UT_UTF8String dataid; UT_UTF8String_sprintf (dataid, "image%u", static_cast<unsigned int>(m_iImages++)); UT_UTF8String alt; const gchar *p_val = NULL; p_val = _getXMLPropValue(static_cast<const gchar *>("alt"), atts); if (p_val) alt += p_val; X_CheckError (getDoc()->createDataItem (dataid.utf8_str(), false, pBB, pfg->getMimeType(), NULL)); const gchar *buf[7]; buf[0] = "dataid"; buf[1] = dataid.utf8_str(); buf[2] = "alt"; buf[3] = alt.utf8_str(); buf[4] = NULL; buf[5] = NULL; buf[6] = NULL; UT_UTF8String props; p_val = _getXMLPropValue("height", atts); if(p_val) { props = "height:"; props += UT_UTF8String_sprintf("%fin", UT_convertDimToInches(UT_convertDimensionless(p_val), DIM_PX)); } p_val = _getXMLPropValue("width", atts); if(p_val) { if(props.length()) props+= "; "; props+= "width:"; props += UT_UTF8String_sprintf("%fin", UT_convertDimToInches(UT_convertDimensionless(p_val), DIM_PX)); } p_val = _getXMLPropValue("xml:lang", atts); if(p_val && *p_val) { if(props.length()) props+= "; "; props+= "lang:"; props+= p_val; } if(props.length()) { buf[4] = PT_PROPS_ATTRIBUTE_NAME; buf[5] = props.utf8_str(); } X_CheckError(appendObject(PTO_Image, const_cast<const gchar **>(buf))); DELETEP(pfg); } void IE_Imp_WML::charData(const gchar *s, int len) { if(m_parseState == _PS_Cell) { UT_UCS4String span = s; m_TableHelperStack->Inline(span.ucs4_str(), span.length()); return; } IE_Imp_XML :: charData (s, len); }