/* Crossmark processing library * Copyright (C) 2006, Robert Staudinger <robert.staudinger@gmail.com> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /*! * \file cm-scanner-private.hh * \brief Crossmark scanner. * \internal */ #ifndef CM_SCANNER_PRIVATE_HH #define CM_SCANNER_PRIVATE_HH #include <glib.h> #include <list> #include <crossmark/cm-features.hh> #include <crossmark/cm-stream.hh> #include <crossmark/cm-string-private.hh> namespace crossmark { /*! * \internal * \brief Tokens recognised by the scanner. * * This namespace contains the various token types recognised by the * scanner. */ namespace token { /*! * \brief Abstract token base class. */ class Token { public: enum Class { START, END, TEXT, INDENT, STYLE, NEWLINE, PARAGRAPH, HEADING }; virtual ~Token () {} virtual Token::Class getClass () const = 0; virtual gchar const * toHtml () const = 0; virtual gchar const * toString () const = 0; protected: Token () {} }; /*! * \brief Start-of-file. */ class Start : public Token { public: Start () {} virtual ~Start () {} virtual Token::Class getClass () const { return Token::START; } virtual gchar const * toHtml () const { return "<html><body>\n"; } virtual gchar const * toString () const { return NULL; } }; /*! * \brief End-of-file. */ class End : public Token { public: End () {} virtual ~End () {} virtual Token::Class getClass () const { return Token::END; } virtual gchar const * toHtml () const { return "\n</body></html>\n"; } virtual gchar const * toString () const { return NULL; } }; /*! * \brief Text token. */ class Text : public Token { public: Text (gchar const *text) : _text (text) { g_assert (text); } virtual ~Text () {} virtual void append (gunichar c) { _text.append (c); } virtual gchar const * getBuffer () { return _text.c_str (); } virtual const String & getString () { return _text; } virtual Token::Class getClass () const { return Token::TEXT; } virtual gchar const * toHtml () const { return _text.c_str (); } virtual gchar const * toString () const { return _text.c_str (); } private: String _text; }; /*! * \brief Indentation token. */ class Indent : public Token { public: Indent () {} virtual ~Indent () {} virtual Token::Class getClass () const { return Token::INDENT; } virtual gchar const * toHtml () const { return "\t "; } /* * \todo Might become an issue with whitespace-indentation. */ virtual gchar const * toString () const { return "\t"; } }; /*! * \brief Style token. */ class Style : public Token { public: // don't change order // see document::Style::Type enum Type { ASTERISK, SLASH, BACKTICK, UNDERSCORE }; enum Pos { LEFT, CENTER, RIGHT }; Style (Type type, Pos pos) : _type (type), _pos (pos) { if (_type == ASTERISK && _pos == LEFT) _text = " *"; else if (_type == ASTERISK && _pos == CENTER) _text = "*"; else if (_type == ASTERISK && _pos == RIGHT) _text = "* "; else if (_type == SLASH && _pos == LEFT) _text = " /"; else if (_type == SLASH && _pos == CENTER) _text = "/"; else if (_type == SLASH && _pos == RIGHT) _text = "/ "; else if (_type == BACKTICK && _pos == LEFT) _text = " `"; else if (_type == BACKTICK && _pos == CENTER) _text = "`"; else if (_type == BACKTICK && _pos == RIGHT) _text = "` "; else if (_type == UNDERSCORE && _pos == LEFT) _text = " _"; else if (_type == UNDERSCORE && _pos == CENTER) _text = "_"; else if (_type == UNDERSCORE && _pos == RIGHT) _text = " _"; else g_assert (FALSE); } virtual ~Style () {} virtual Type getType () const { return _type; } virtual Pos getPos () const { return _pos; } virtual Token::Class getClass () const { return Token::STYLE; } virtual gchar const * toHtml () const { if (_type == ASTERISK && _pos == LEFT) return "<b>"; else if (_type == ASTERISK && _pos == CENTER) return "*"; else if (_type == ASTERISK && _pos == RIGHT) return "</b>"; else if (_type == SLASH && _pos == LEFT) return "<i>"; else if (_type == SLASH && _pos == CENTER) return "*"; else if (_type == SLASH && _pos == RIGHT) return "</i>"; else if (_type == BACKTICK && _pos == LEFT) return "<code>"; else if (_type == BACKTICK && _pos == CENTER) return "`"; else if (_type == BACKTICK && _pos == RIGHT) return "</code>"; else if (_type == UNDERSCORE && _pos == LEFT) return "<u>"; else if (_type == UNDERSCORE && _pos == CENTER) return "_"; else if (_type == UNDERSCORE && _pos == RIGHT) return "</u>"; else g_assert (FALSE); } virtual gchar const * toString () const { return _text.c_str (); } private: Type _type; Pos _pos; String _text; }; /*! * \brief Newline token. */ class Newline : public Token { public: Newline () {} virtual ~Newline () {} virtual Token::Class getClass () const { return Token::NEWLINE; } virtual gchar const * toHtml () const { return "<br />\n"; } virtual gchar const * toString () const { return "\n"; } }; /*! * \brief Paragraph break. */ class Paragraph : public Token { public: Paragraph () {} virtual ~Paragraph () {} virtual Token::Class getClass () const { return Token::PARAGRAPH; } virtual gchar const * toHtml () const { return "<br /><br />\n\n"; } virtual gchar const * toString () const { return "\n\n"; } }; /*! * \brief Heading. */ class Heading : public Token { public: enum Type { HEADING_1 = 1, HEADING_2, HEADING_3, HEADING_4 }; Heading (Type type) : _type (type) { switch (_type) { case HEADING_1: _html = "<h1>"; _text = "<h>"; break; case HEADING_2: _html = "<h2>"; _text = "<hh>"; break; case HEADING_3: _html = "<h3>"; _text = "<hhh>"; break; case HEADING_4: _html = "<h4>"; _text = "<hhhh>"; break; default: g_assert_not_reached (); } } virtual ~Heading () {} virtual Token::Class getClass () const { return Token::HEADING; } virtual Heading::Type getType () const { return _type; } virtual gchar const * toHtml () const { return _html; } virtual gchar const * toString () const { return _text; } private: Type _type; gchar const *_html; gchar const *_text; }; /*! * \brief Token factory interface. */ class FactoryIface { public: virtual ~FactoryIface () {} virtual Token * createTokenImpl (Token::Class klass) const = 0; virtual Style * createStyleTokenImpl (Style::Type type, Style::Pos pos) const = 0; virtual Text * createTextTokenImpl (gchar const *text) const = 0; }; /*! * \brief Token factory. * * \todo Use Factory to create tokens, then get rid of Token::toHtml() and hook factory from cm-scan. */ class Factory : public FactoryIface { public: virtual ~Factory () {} static Factory & instance (); virtual void hook (const FactoryIface *factory); virtual void unhook (const FactoryIface *factory); virtual Token * createToken (Token::Class klass) const; virtual Style * createStyleToken (Style::Type type, Style::Pos pos) const; virtual Text * createTextToken (gchar const *text) const; // token factory virtual Token * createTokenImpl (Token::Class klass) const; virtual Style * createStyleTokenImpl (Style::Type type, Style::Pos pos) const; virtual Text * createTextTokenImpl (gchar const *text) const; protected: Factory (); private: std::list<const FactoryIface *> _factories; }; }; // namespace tokens /*! * \internal * \brief The scanner provides token-based input. * * Initial scanner grammar * Yeah, this is flawed, but I'm trying to treat markup stuff * as a single character (by using lookahead). * \verbatim {charset} := {UTF-8} \ {" *", "* ", " /", "/ ", " `", "` ", " _", "_ "} token := paragraph | style | text | sof | eof paragraph := '\n' '\n' '\n'* h1 := '\n' "=" "="* h2 := '\n' "-" "-"* h3 := paragraph "===" " " h4 := paragraph "====" " " style := " *" | "* " | " /" | "/ " | " `" | "` " | " _" | "_ " text := {charset}* \endverbatim * * \todo The scanner supports only '\n' newlines for now. */ class Scanner { public: Scanner (gchar const *file); Scanner (stream::Input &istream); virtual ~Scanner (); virtual token::Token * fetchToken (); protected: virtual token::Token * scanEnd (); virtual token::Token * scanEnd (gunichar c); virtual token::Token * scanHeading (); virtual token::Token * scanNewline (); virtual token::Token * scanIndent (); virtual token::Token * scanStyle (gunichar c2, gunichar &tail); private: token::Token * _return (token::Token *token) { _prev = token->getClass (); return token; } stream::Input &_istream; gboolean _ownStream; token::Token::Class _prev; token::Token *_next; gunichar _c1; }; }; // namespace crossmark #endif // CM_SCANNER_PRIVATE_HH