/* AbiWord * Copyright (C) 2001 AbiSource, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include #include "ut_types.h" #include "ut_assert.h" #include "ut_debugmsg.h" #include "ut_string.h" #include "ie_impexp_XSL-FO.h" #include "ie_imp_XSL-FO.h" #include "ie_types.h" #include "pd_Document.h" #include "ut_growbuf.h" #include "ut_string_class.h" /* * This is meant to import XSL-FO documents. XSL-FO are XML/XSL * Formatting objects, meant to be similar in scope to LaTeX. * The reference I've been using is located at: * http://zvon.org/xxl/xslfoReference/Output/index.html * * Dom */ // this importer is of Beta quality // it handles a lot of XSL-FO but also doesn't handle a // lot of key things /*****************************************************************/ /*****************************************************************/ IE_Imp_XSL_FO_Sniffer::IE_Imp_XSL_FO_Sniffer (const char * name) : IE_ImpSniffer(name) { // } UT_Confidence_t IE_Imp_XSL_FO_Sniffer::recognizeContents(const char * szBuf, UT_uint32 iNumbytes) { UT_uint32 iLinesToRead = 6; UT_uint32 iBytesScanned = 0; const char *p; char * magic; p = szBuf; while ( iLinesToRead-- ) { magic = "= iNumbytes ) return(UT_CONFIDENCE_ZILCH); } /* Seek past the next newline: */ if ( *p == '\n' || *p == '\r' ) { iBytesScanned++ ; p++ ; if ( *p == '\n' || *p == '\r' ) { iBytesScanned++; p++; } } } return UT_CONFIDENCE_ZILCH; } UT_Confidence_t IE_Imp_XSL_FO_Sniffer::recognizeSuffix(const char * szSuffix) { if (UT_stricmp(szSuffix,".fo") == 0) return UT_CONFIDENCE_PERFECT; return UT_CONFIDENCE_ZILCH; } UT_Error IE_Imp_XSL_FO_Sniffer::constructImporter(PD_Document * pDocument, IE_Imp ** ppie) { IE_Imp_XSL_FO * p = new IE_Imp_XSL_FO(pDocument); *ppie = p; return UT_OK; } bool IE_Imp_XSL_FO_Sniffer::getDlgLabels(const char ** pszDesc, const char ** pszSuffixList, IEFileType * ft) { *pszDesc = "XSL-FO (.fo)"; *pszSuffixList = "*.fo"; *ft = getFileType(); return true; } /*****************************************************************/ /*****************************************************************/ IE_Imp_XSL_FO::~IE_Imp_XSL_FO() { DELETEP(m_TableHelperStack); } IE_Imp_XSL_FO::IE_Imp_XSL_FO(PD_Document * pDocument) : IE_Imp_XML(pDocument, false), m_iBlockDepth(0), m_iListDepth(0), m_iListBlockDepth(0), m_iTableDepth(0), m_TableHelperStack(new IE_Imp_TableHelperStack()) { } /*****************************************************************/ /*****************************************************************/ static struct xmlToIdMapping s_Tokens[] = { { "fo:basic-link", TT_BASICLINK }, { "fo:block", TT_BLOCK }, { "fo:character", TT_CHAR }, { "fo:external-graphic", TT_IMAGE }, { "fo:flow", TT_SECTION }, { "fo:footnote", TT_FOOTNOTE }, { "fo:footnote-body", TT_FOOTNOTEBODY }, { "fo:inline", TT_INLINE }, { "fo:layout-master-set", TT_LAYOUT_MASTER_SET }, { "fo:list", TT_LIST }, { "fo:list-block", TT_LISTBLOCK }, { "fo:list-item", TT_LISTITEM }, { "fo:list-item-body", TT_LISTITEMBODY }, { "fo:list-item-label", TT_LISTITEMLABEL }, { "fo:page-sequence", TT_PAGE_SEQUENCE }, { "fo:region-body", TT_REGION_BODY }, { "fo:root", TT_DOCUMENT }, { "fo:simple-page-master", TT_SIMPLE_PAGE_MASTER }, { "fo:table", TT_TABLE }, { "fo:table-body", TT_TABLEBODY }, { "fo:table-cell", TT_TABLECELL }, { "fo:table-column", TT_TABLECOLUMN }, { "fo:table-row", TT_TABLEROW }, }; #define TokenTableSize ((sizeof(s_Tokens)/sizeof(s_Tokens[0]))) /*****************************************************************/ /*****************************************************************/ #define X_TestParseState(ps) ((m_parseState==(ps))) #define X_VerifyParseState(ps) do { if (!(X_TestParseState(ps))) \ { m_error = UT_IE_BOGUSDOCUMENT; \ UT_DEBUGMSG(("XSL-FO: X_VerifyParseState failed: %s\n", #ps)); \ return; } } while (0) #define X_CheckDocument(b) do { if (!(b)) \ { m_error = UT_IE_BOGUSDOCUMENT; \ UT_DEBUGMSG(("XSL-FO: X_CheckDocument failed: %s\n", #b)); \ return; } } while (0) #define X_CheckError(v) do { if (!(v)) \ { m_error = UT_ERROR; \ UT_DEBUGMSG(("XSL-FO: X_CheckError failed: %s\n", #v)); \ return; } } while (0) #define X_EatIfAlreadyError() do { if (m_error) return; } while (0) /*****************************************************************/ /*****************************************************************/ #define USED() do {if(used) sBuf+="; "; else used = true;} while (0) void IE_Imp_XSL_FO::startElement(const XML_Char *name, const XML_Char **atts) { UT_DEBUGMSG(("XSL-FO import: startElement: %s\n", name)); // xml parser keeps running until buffer consumed X_EatIfAlreadyError(); UT_uint32 tokenIndex = _mapNameToToken (name, s_Tokens, TokenTableSize); m_utnsTagStack.push(tokenIndex); const XML_Char * buf[3]; const XML_Char ** p_atts; buf[0] = static_cast("props"); buf[2] = NULL; UT_UTF8String sBuf; const XML_Char * pVal = NULL; bool used = false; switch (tokenIndex) { case TT_DOCUMENT: { X_VerifyParseState(_PS_Init); m_parseState = _PS_Doc; break; } case TT_SECTION: { X_VerifyParseState(_PS_Doc); m_parseState = _PS_Sec; X_CheckError(appendStrux(PTX_Section,static_cast(NULL))); break; } case TT_BLOCK: { X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_List)); //blocks can be nested m_parseState = _PS_Block; m_iBlockDepth++; pVal = static_cast(_getXMLPropValue("background-color", atts)); if (pVal) { USED(); sBuf += "bgcolor:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("color", atts)); if (pVal) { USED(); sBuf += "color:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("language", atts)); if (pVal) { USED(); sBuf += "lang:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-size", atts)); if (pVal) { USED(); sBuf += "font-size:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-family", atts)); if (pVal) { USED(); sBuf += "font-family:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-weight", atts)); if (pVal) { USED(); sBuf += "font-weight:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-style", atts)); if (pVal) { USED(); sBuf += "font-style:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-stretch", atts)); if (pVal) { USED(); sBuf += "font-stretch:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("keep-together", atts)); if (pVal) { USED(); sBuf += "keep-together:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("keep-with-next", atts)); if (pVal) { USED(); sBuf += "keep-with-next:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("line-height", atts)); if (pVal) { USED(); sBuf += "line-height:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("margin-bottom", atts)); if (pVal) { USED(); sBuf += "margin-bottom:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("margin-top", atts)); if (pVal) { USED(); sBuf += "margin-top:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("margin-left", atts)); if (pVal) { USED(); sBuf += "margin-left:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("margin-right", atts)); if (pVal) { USED(); sBuf += "margin-right:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("text-align", atts)); if (pVal) { USED(); sBuf += "text-align:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("widows", atts)); if (pVal) { USED(); sBuf += "widows:"; sBuf += static_cast(pVal); } if(sBuf.length()) buf[1] = sBuf.utf8_str(); else buf[0] = NULL; xxx_UT_DEBUGMSG(("FO import: block props='%s'\n", sBuf.utf8_str())); // append the atts/block to the document X_CheckError(appendStrux(PTX_Block, buf)); break; } case TT_FOOTNOTE: { X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_List)); break; } case TT_FOOTNOTEBODY: { X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_List)); break; } // we treat both of these as if they were the same // they represent character-level formatting case TT_CHAR: case TT_INLINE: { X_VerifyParseState(_PS_Block); { pVal = static_cast(_getXMLPropValue("background-color", atts)); if (pVal) { USED(); sBuf += "bgcolor:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("color", atts)); if (pVal) { USED(); sBuf += "color:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("language", atts)); if (pVal) { USED(); sBuf += "lang:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-size", atts)); if (pVal) { USED(); sBuf += "font-size:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-family", atts)); if (pVal) { USED(); sBuf += "font-family:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-weight", atts)); if (pVal) { USED(); sBuf += "font-weight:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-style", atts)); if (pVal) { USED(); sBuf += "font-style:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("font-stretch", atts)); if (pVal) { USED(); sBuf += "font-stretch:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("keep-together", atts)); if (pVal) { USED(); sBuf += "keep-together:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("keep-with-next", atts)); if (pVal) { USED(); sBuf += "keep-with-next:"; sBuf += static_cast(pVal); } pVal = static_cast(_getXMLPropValue("text-decoration", atts)); if (pVal) { USED(); sBuf += "text-decoration:"; sBuf += static_cast(pVal); } buf[1] = sBuf.utf8_str(); xxx_UT_DEBUGMSG(("FO import: inline props='%s'\n", sBuf.utf8_str())); p_atts = static_cast(&buf[0]); X_CheckError(_pushInlineFmt(p_atts)); X_CheckError(appendFmt(&m_vecInlineFmt)); pVal = static_cast(_getXMLPropValue("id", atts)); if(pVal) { XML_Char *buf2[5]; buf2[0] = PT_TYPE_ATTRIBUTE_NAME; buf2[1] = "start"; buf2[2] = PT_NAME_ATTRIBUTE_NAME; buf2[3] = (XML_Char*)pVal; buf2[4] = NULL; X_CheckError(appendObject(PTO_Bookmark, const_cast(buf2))); buf2[1] = "end"; X_CheckError(appendObject(PTO_Bookmark, const_cast(buf2))); } } break; } case TT_BASICLINK: { XML_Char *buf[3]; buf[2] = NULL; XML_Char *p_val = NULL; p_val = (XML_Char *)_getXMLPropValue(static_cast("internal-destination"), atts); if(p_val) //internal { UT_UTF8String link = "#"; link += p_val; buf[0] = "xlink:href"; buf[1] = (XML_Char*)link.utf8_str(); X_CheckError(appendObject(PTO_Hyperlink, const_cast(buf))); break; } p_val = (XML_Char *)_getXMLPropValue(static_cast("external-destination"), atts); if(p_val) //external { if(strstr(p_val, "url(") && (UT_XML_strlen(p_val) > 5)) { p_val = p_val + 5; if(UT_XML_strlen(p_val) > 2) p_val[UT_XML_strlen(p_val) - 2] = '\0'; } buf[0] = "xlink:href"; buf[1] = (XML_Char*)p_val; X_CheckError(appendObject(PTO_Hyperlink, const_cast(buf))); } break; } //Lists: case TT_LIST: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_List) || (m_parseState == _PS_Sec)); m_parseState = _PS_ListSec; m_iListDepth++; break; } case TT_LISTBLOCK: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_List) || (m_parseState == _PS_ListSec) || (m_parseState == _PS_Sec)); m_parseState = _PS_List; m_iListBlockDepth++; break; } case TT_LISTITEM: { X_VerifyParseState(_PS_List); break; } case TT_LISTITEMLABEL: { X_VerifyParseState(_PS_List); break; } case TT_LISTITEMBODY: { X_VerifyParseState(_PS_List); break; } //Tables: case TT_TABLE: { X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_List)); X_CheckError(m_TableHelperStack->tableStart(getDoc(),NULL)); m_iTableDepth++; m_parseState = _PS_Table; break; } case TT_TABLEROW: { X_VerifyParseState(_PS_Table); X_CheckError(m_TableHelperStack->trStart(NULL)); break; } case TT_TABLEBODY: case TT_TABLECOLUMN: { X_VerifyParseState(_PS_Table); break; } case TT_TABLECELL: { X_VerifyParseState(_PS_Table); m_parseState = _PS_Block; UT_uint32 rowspan = 1, colspan = 1; pVal = static_cast(_getXMLPropValue("number-columns-spanned", atts)); if(pVal) colspan = atoi(pVal); pVal = static_cast(_getXMLPropValue("number-rows-spanned", atts)); if(pVal) rowspan = atoi(pVal); X_CheckError(m_TableHelperStack->tdStart(rowspan, colspan, NULL)); break; } // here we set the page size case TT_SIMPLE_PAGE_MASTER: { X_VerifyParseState(_PS_Doc); { // TODO: we should do some cool stuff based on these prop=val keys: // margin-top, margin-bottom, margin-left, margin-right, // page-width, page-height } break; } // we should really try to get this working // at least with local graphics case TT_IMAGE: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_List) || (m_parseState == _PS_Sec)); UT_ASSERT(UT_TODO); break; } // these we just plain ignore case TT_LAYOUT_MASTER_SET: case TT_REGION_BODY: case TT_PAGE_SEQUENCE: { break; } default: { UT_DEBUGMSG(("Unknown or knowingly unhandled tag [%s]\n",name)); break; } } } #undef USED void IE_Imp_XSL_FO::endElement(const XML_Char *name) { UT_DEBUGMSG(("XSL-FO import: endElement: %s\n", name)); // xml parser keeps running until buffer consumed X_EatIfAlreadyError(); UT_uint32 tokenIndex = _mapNameToToken (name, s_Tokens, TokenTableSize), i = 0; m_utnsTagStack.pop((UT_sint32*)&i); if(i != tokenIndex) UT_DEBUGMSG(("DocBook: Parse error!\n")); switch (tokenIndex) { case TT_DOCUMENT: { X_VerifyParseState(_PS_Doc); m_parseState = _PS_Init; break; } case TT_SECTION: { X_VerifyParseState(_PS_Sec); m_parseState = _PS_Doc; break; } case TT_BLOCK: { UT_ASSERT_HARMLESS(m_lenCharDataSeen == 0); X_VerifyParseState( _PS_Block); m_iBlockDepth--; if(_isInListTag()) m_parseState = _PS_List; else if(m_iTableDepth) m_parseState = _PS_Block; else if(m_iBlockDepth == 0) m_parseState = _PS_Sec; X_CheckDocument(_getInlineDepth() == 0); break; } case TT_FOOTNOTE: { X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_List)); break; } case TT_FOOTNOTEBODY: { X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_List)); if(_isInListTag()) m_parseState = _PS_List; else if(m_iBlockDepth) m_parseState = _PS_Block; else if(m_iBlockDepth == 0) m_parseState = _PS_Sec; break; } case TT_INLINE: case TT_CHAR: { UT_ASSERT_HARMLESS(m_lenCharDataSeen==0); X_VerifyParseState(_PS_Block); X_CheckDocument(_getInlineDepth() > 0); _popInlineFmt(); X_CheckError(appendFmt(&m_vecInlineFmt)); break; } case TT_BASICLINK: { X_VerifyParseState(_PS_Block); X_CheckError(appendObject(PTO_Hyperlink, NULL)); break; } //Lists: case TT_LIST: { X_VerifyParseState(_PS_ListSec); m_iListDepth--; if(m_iBlockDepth) m_parseState = _PS_Block; else if(_isInListTag()) m_parseState = _PS_List; break; } case TT_LISTBLOCK: { X_VerifyParseState(_PS_List); m_iListBlockDepth--; if(_isInListTag()) m_parseState = _PS_List; else if((m_iListBlockDepth == 0) && (m_iListDepth > 0)) m_parseState = _PS_ListSec; else if((m_iBlockDepth > 0) || m_iTableDepth) m_parseState = _PS_Block; else if((m_iBlockDepth == 0) && (m_iListDepth == 0)) m_parseState = _PS_Sec; break; } case TT_LISTITEM: { X_VerifyParseState(_PS_List); break; } case TT_LISTITEMLABEL: { X_VerifyParseState(_PS_List); break; } case TT_LISTITEMBODY: { X_VerifyParseState(_PS_List); break; } //Tables: case TT_TABLE: { X_VerifyParseState(_PS_Table); m_iTableDepth--; if(_isInListTag()) m_parseState = _PS_List; else if(m_iBlockDepth > 0) m_parseState = _PS_Block; else m_parseState = _PS_Sec; X_CheckError(m_TableHelperStack->tableEnd()); break; } case TT_TABLEBODY: case TT_TABLEROW: { X_VerifyParseState(_PS_Table); break; } case TT_TABLECOLUMN: { X_VerifyParseState(_PS_Table); break; } case TT_TABLECELL: { X_VerifyParseState(_PS_Block); m_parseState = _PS_Table; X_CheckError(m_TableHelperStack->tdEnd()); break; } case TT_IMAGE: { X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_List) || (m_parseState == _PS_Sec)); break; } case TT_OTHER: case TT_LAYOUT_MASTER_SET: case TT_SIMPLE_PAGE_MASTER: case TT_REGION_BODY: case TT_PAGE_SEQUENCE: { break; } default: { UT_DEBUGMSG(("XSL-FO: Unknown or intentionally unhandled end tag [%s]\n",name)); break; } } } bool IE_Imp_XSL_FO::_isInListTag(void) { return ((_tagTop() == TT_LISTBLOCK) || (_tagTop() == TT_LISTITEM) || (_tagTop() == TT_LISTITEMLABEL) || (_tagTop() == TT_LISTITEMBODY)); } UT_uint32 IE_Imp_XSL_FO::_tagTop(void) { UT_sint32 i = 0; if (m_utnsTagStack.viewTop (i)) return (UT_uint32)i; return 0; } void IE_Imp_XSL_FO::charData(const XML_Char *s, int len) { if(m_iTableDepth && (m_parseState != _PS_Table)) { UT_UCS4String span = s; if(UT_strcmp(span.utf8_str(), "\n") != 0) m_TableHelperStack->Inline(span.ucs4_str(), span.length()); return; } IE_Imp_XML :: charData (s, len); }