#!/usr/bin/perl # Written by Jon Dehdari 2002-2004 # Perl 5.6 and newer # Converts Romanized Persian text to web page with HTML Unicode characters # Syntax: ./roman2unicode_1-4.pl < input.txt > output.html %roman2unicode = ( '0' => '۰', # EXTENDED ARABIC-INDIC DIGIT ZERO # Persian specific '1' => '۱', # EXTENDED ARABIC-INDIC DIGIT ONE # Persian specific '2' => '۲', # EXTENDED ARABIC-INDIC DIGIT TWO # Persian specific '3' => '۳', # EXTENDED ARABIC-INDIC DIGIT THREE # Persian specific '4' => '۴', # EXTENDED ARABIC-INDIC DIGIT FOUR # Persian specific '5' => '۵', # EXTENDED ARABIC-INDIC DIGIT FIVE # Persian specific '6' => '۶', # EXTENDED ARABIC-INDIC DIGIT SIX # Persian specific '7' => '۷', # EXTENDED ARABIC-INDIC DIGIT SEVEN # Persian specific '8' => '۸', # EXTENDED ARABIC-INDIC DIGIT EIGHT # Persian specific '9' => '۹', # EXTENDED ARABIC-INDIC DIGIT NINE # Persian specific 'A' => 'ا', # ARABIC LETTER ALEF '|' => 'ا', # ARABIC LETTER ALEF 'b' => 'ب', # ARABIC LETTER BEH 'p' => 'پ', # ARABIC LETTER PEH 't' => 'ت', # ARABIC LETTER TEH 'V' => 'ث', # ARABIC LETTER THEH 'j' => 'ج', # ARABIC LETTER JEEM 'c' => 'چ', # ARABIC LETTER TCHEH 'H' => 'ح', # ARABIC LETTER HAH 'x' => 'خ', # ARABIC LETTER KHAH 'd' => 'د', # ARABIC LETTER DAL 'L' => 'ذ', # ARABIC LETTER THAL 'r' => 'ر', # ARABIC LETTER REH 'z' => 'ز', # ARABIC LETTER ZAIN 'J' => 'ژ', # ARABIC LETTER JEH 's' => 'س', # ARABIC LETTER SEEN 'C' => 'ش', # ARABIC LETTER SHEEN 'S' => 'ص', # ARABIC LETTER SAD 'D' => 'ض', # ARABIC LETTER DAD 'T' => 'ط', # ARABIC LETTER TAH 'Z' => 'ظ', # ARABIC LETTER ZAH 'E' => 'ع', # ARABIC LETTER AIN 'G' => 'غ', # ARABIC LETTER GHAIN 'f' => 'ف', # ARABIC LETTER FEH 'q' => 'ق', # ARABIC LETTER QAF 'K' => 'ک', # ARABIC LETTER KEHEH 'k' => 'ك', # ARABIC LETTER KAF 'g' => 'گ', # ARABIC LETTER GAF 'l' => 'ل', # ARABIC LETTER LAM 'm' => 'م', # ARABIC LETTER MEEM 'n' => 'ن', # ARABIC LETTER NOON 'u' => 'و', # ARABIC LETTER WAW 'v' => 'و', # ARABIC LETTER WAW 'w' => 'و', # ARABIC LETTER WAW 'h' => 'ه', # ARABIC LETTER HEH 'X' => 'ۀ', # ARABIC LETTER HEH WITH YEH ABOVE #'i' => 'ي', # ARABIC LETTER YEH 'i' => 'ی', # ARABIC LETTER FARSI YEH # For Arabic, comment out this line and uncomment previous line #'y' => 'ی', # ARABIC LETTER FARSI YEH # This should not be used, except as a temporary crutch 'I' => 'ئ', # ARABIC LETTER YEH WITH HAMZA ABOVE 'a' => 'َ', # ARABIC FATHA 'o' => 'ُ', # ARABIC DAMMA 'e' => 'ِ', # ARABIC KASRA '~' => 'ّ', # ARABIC SHADDA ',' => '،', # ARABIC COMMA ';' => '؛', # ARABIC SEMICOLON '?' => '؟', # ARABIC QUESTION MARK ' ' => ' ', # space '.' => '.', # period ']' => 'آ', # ARABIC LETTER ALEF WITH MADDA ABOVE 'M' => 'ء', # ARABIC LETTER HAMZA 'N' => 'ً', # ARABIC FATHATAN 'U' => 'ؤ', # ARABIC LETTER WAW WITH HAMZA ABOVE '-' => '‌', # ZERO WIDTH NON-JOINER # add more characters later! ); #$in = shift || die "Provide a valid input file argument\n"; #$out= shift || die "Provide an output file argument\n"; #$in ne $out || die "Input and output files cannot be the same\n"; #open(IN,$in); #open(OUT,">$out"); #prints the top part of an html page print STDOUT ( '', "\n", '', "\n", "\n", "