# Created by Jon Dehdari 2002 # Perl 5.6 # Converts Isiri 3342 Farsi text to Unicode HTML Page # To Do: auto-insert alef before short vowels, change final yeh to farsi style yeh %isiri2unicode = ( "\xb0" => '۰', # EXTENDED ARABIC-INDIC DIGIT ZERO # Farsi specific "\xb1" => '۱', # EXTENDED ARABIC-INDIC DIGIT ONE # Farsi specific "\xb2" => '۲', # EXTENDED ARABIC-INDIC DIGIT TWO # Farsi specific "\xb3" => '۳', # EXTENDED ARABIC-INDIC DIGIT THREE # Farsi specific "\xb4" => '۴', # EXTENDED ARABIC-INDIC DIGIT FOUR # Farsi specific "\xb5" => '۵', # EXTENDED ARABIC-INDIC DIGIT FIVE # Farsi specific "\xb6" => '۶', # EXTENDED ARABIC-INDIC DIGIT SIX # Farsi specific "\xb7" => '۷', # EXTENDED ARABIC-INDIC DIGIT SEVEN # Farsi specific "\xb8" => '۸', # EXTENDED ARABIC-INDIC DIGIT EIGHT # Farsi specific "\xb9" => '۹', # EXTENDED ARABIC-INDIC DIGIT NINE # Farsi specific "\xc1" => 'ا', #etc. # ARABIC LETTER ALEF "\xc3" => 'ب', # ARABIC LETTER BEH "\xc4" => 'پ', # ARABIC LETTER PEH # Farsi specific "\xc5" => 'ت', # ARABIC LETTER TEH "\xc6" => 'ث', # ARABIC LETTER THEH "\xc7" => 'ج', # ARABIC LETTER JEEM "\xc8" => 'چ', # ARABIC LETTER TCHEH # Farsi specific "\xc9" => 'ح', # ARABIC LETTER HAH "\xca" => 'خ', # ARABIC LETTER KHAH "\xcb" => 'د', # ARABIC LETTER DAL "\xcc" => 'ذ', # ARABIC LETTER THAL "\xcd" => 'ر', # ARABIC LETTER REH "\xce" => 'ز', # ARABIC LETTER ZAIN "\xcf" => 'ژ', # ARABIC LETTER JEH # Farsi specific "\xd0" => 'س', # ARABIC LETTER SEEN "\xd1" => 'ش', # ARABIC LETTER SHEEN "\xd2" => 'ص', # ARABIC LETTER SAD "\xd3" => 'ض', # ARABIC LETTER DAD "\xd4" => 'ط', # ARABIC LETTER TAH "\xd5" => 'ظ', # ARABIC LETTER ZAH "\xd6" => 'ع', # ARABIC LETTER AIN "\xd7" => 'غ', # ARABIC LETTER GHAIN "\xd8" => 'ف', # ARABIC LETTER FEH "\xd9" => 'ق', # ARABIC LETTER QAF "\xda" => 'ک', # ARABIC LETTER KEHEH # Farsi specific "\xdb" => 'گ', # ARABIC LETTER GAF # Farsi specific "\xdc" => 'ل', # ARABIC LETTER LAM "\xdd" => 'م', # ARABIC LETTER MEEM "\xde" => 'ن', # ARABIC LETTER NOON "\xdf" => 'و', # ARABIC LETTER WAW #note "u", "v", and "w" are all "vav"s in Farsi orthography. #"\xdf" => 'و', # ARABIC LETTER WAW #"\xdf" => 'و', # ARABIC LETTER WAW "\xe0" => 'ه', # ARABIC LETTER HEH #"\xd2" => 'ۀ', # ARABIC LETTER HEH WITH YEH ABOVE "\xe1" => 'ي', # ARABIC LETTER YEH #note "i" and "y" are both "ya"s in Farsi orthography. #"\xe1" => 'ي', # ARABIC LETTER YEH "\xfb" => 'ئ', # ARABIC LETTER YEH WITH HAMZA ABOVE #note "`" and "I" are the same in Farsi orthography. #"\xd2" => 'ئ', # ARABIC LETTER YEH WITH HAMZA ABOVE "\xf0" => 'َ', # ARABIC FATHA #note the short vowels "a", "o" and "e" do not normally appear in Farsi orthography. "\xf2" => 'ُ', # ARABIC DAMMA "\xf1" => 'ِ', # ARABIC KASRA "\xc0" => 'آ', # ARABIC LETTER ALEF WITH MADDA ABOVE "\xc2" => 'ء', # ARABIC LETTER HAMZA "\xf8" => 'أ', # ARABIC LETTER ALEF WITH HAMZA ABOVE "\xfa" => 'ؤ', # ARABIC LETTER WAW WITH HAMZA ABOVE "\xf9" => 'إ', # ARABIC LETTER ALEF WITH HAMZA BELOW "\xfb" => 'ئ', # ARABIC LETTER YEH WITH HAMZA ABOVE "\xa5" => '٪', # ARABIC PERCENT SIGN "\xag" => '،', # ARABIC COMMA "\xbb" => '؛', # ARABIC SEMICOLON "\xbf" => '؟', # ARABIC QUESTION MARK "\xa0" => ' ', # SPACE (really ) "\xa8" => '(', # LEFT PARENTHESIS "\xa9" => ')', # RIGHT PARENTHESIS "\xad" => '-', # HYPHEN-MINUS "\xa6" => '.', # FULL STOP (really .) "\xba" => ':', # COLON ); $in = shift || die "Provide a valid input file argument\n"; $out= shift || die "Provide an output file argument\n"; $in ne $out || die "Input and output files cannot be the same\n"; open(IN,$in); open(OUT,">$out"); #prints the top part of an html page print OUT ( '', "\n", '', "\n", "\n", "