বিষয়বস্তুতে চলুন

ব্যবহারকারী:Rahul amin roktim/b2u.php

উইকিপিডিয়া, মুক্ত বিশ্বকোষ থেকে

বিজয় বাংলা কীবোর্ড থেকে বাংলা ইউনিকোড কনভার্টার

[সম্পাদনা]

ব্যাবহার

[সম্পাদনা]
  • প্রথমে এই ফাইলের নাম / ফাংশন ইনক্লুড করতে হবে ।
  • তারপর এন্ট্রিপয়েন্ট convertBijoyToUnicode("যে স্ট্রিং রূপান্তর করতে চান") তা ইনপুট করলে আপনি আপনার ইউনিকোড ফেরত পাবেন ।

সোর্স কোড

[সম্পাদনা]
<?php

/*
    Usage:    1. include 'bijoy2unicode.php' 
                2. Call convertBijoyToUnicode($stringToFormat) function, Unicode formatted string will be returned.
  
*/




$preConversionMap = array(
    ' +' => ' ',
    'yy' => 'y', //Double Hrosh-u-Kar
    'vv' => 'v', //Double Aa-Kar
    '­­' => '­', //Double Jukto-L - L+Double-L = Triple L
    'y&' => 'y', //Hoshonto+Hrosh-u
    '„&' => '„', //Hoshonto+Ri-Kar
    '‡u' => 'u‡', //ChondroBindu Error /Typing Mistake
    'wu' => 'uw', //ChondroBindu Error /Typing Mistake
    ' ,' => ',',
    ' \\|' => '\\|',
    '\\\\ ' => '',
    ' \\\\' => '',
    '\\\\' => '',
    '\n +' => '\n',
    ' +\n' => '\n',
    '\n\n\n\n\n' => '\n\n',
    '\n\n\n\n' => '\n\n',
    '\n\n\n' => '\n\n'
);

$conversionMap = array(
    // Vowels Start
    'Av' => 'আ',
    'A' => 'অ',
    'B' => 'ই',
    'C' => 'ঈ',
    'D' => 'উ',
    'E' => 'ঊ',
    'F' => 'ঋ',
    'G' => 'এ',
    'H' => 'ঐ',
    'I' => 'ও',
    'J' => 'ঔ',
    // Constants
    'K' => 'ক',
    'L' => 'খ',
    'M' => 'গ',
    'N' => 'ঘ',
    'O' => 'ঙ',
    'P' => 'চ',
    'Q' => 'ছ',
    'R' => 'জ',
    'S' => 'ঝ',
    'T' => 'ঞ',
    'U' => 'ট',
    'V' => 'ঠ',
    'W' => 'ড',
    'X' => 'ঢ',
    'Y' => 'ণ',
    'Z' => 'ত',
    '_' => 'থ',
    '`' => 'দ',
    'a' => 'ধ',
    'b' => 'ন',
    'c' => 'প',
    'd' => 'ফ',
    'e' => 'ব',
    'f' => 'ভ',
    'g' => 'ম',
    'h' => 'য',
    'i' => 'র',
    'j' => 'ল',
    'k' => 'শ',
    'l' => 'ষ',
    'm' => 'স',
    'n' => 'হ',
    'o' => 'ড়',
    'p' => 'ঢ়',
    'q' => 'য়',
    'r' => 'ৎ',
    's' => 'ং',
    't' => 'ঃ',
    'u' => 'ঁ',
    // Numbers
    '0' => '০',
    '1' => '১',
    '2' => '২',
    '3' => '৩',
    '4' => '৪',
    '5' => '৫',
    '6' => '৬',
    '7' => '৭',
    '8' => '৮',
    '9' => '৯',
    // Kars
    '•' => 'ঙ্',
    'v' => 'া', // Aa-Kar
    'w' => 'ি', // i-Kar
    'x' => 'ী', // I-Kar
    'y' => 'ু', // u-Kar
    'z' => 'ু', // u-Kar
    '“' => 'ু', // u-kar
    '–' => 'ু', // u-kar
    '~' => 'ূ', // U-kar
    'ƒ' => 'ূ', // U-kaar
    '‚' => 'ূ', // U-kaar
    '„„' => 'ৃ', //Double Rri-kar Bug
    '„' => 'ৃ', // Ri-Kar
    '…' => 'ৃ', // Ri-Kar
    '†' => 'ে', // E-Kar
    '‡' => 'ে', // E-Kar
    'ˆ' => 'ৈ', // Oi-Kar
    '‰' => 'ৈ', // Oi-Kar
    'Š' => 'ৗ', // Ou-Kar
    '\\|' => '।', // Full-Stop
    '\\&' => '্‌', // Ho-shonto
    //	Jukto Okkhor
    '\\^' => '্ব',
    '‘' => '্তু',
    '’' => '্থ',
    '‹' => '্ক',
    'Œ' => '্ক্র',
    '”' => 'চ্',
    '—' => '্ত',
    '˜' => 'দ্',
    '™' => 'দ্',
    'š' => 'ন্',
    '›' => 'ন্',
    'œ' => '্ন',
    'Ÿ' => '্ব',
    '¡' => '্ব',
    '¢' => '্ভ',
    '£' => '্ভ্র',
    '¤' => 'ম্',
    '¥' => '্ম',
    '¦' => '্ব',
    '§' => '্ম',
    '¨' => '্য',
    '©' => 'র্',
    'ª' => '্র',
    '«' => '্র',
    '¬' => '্ল',
    '­' => '্ল',
    '®' => 'ষ্',
    '¯' => 'স্',
    '°' => 'ক্ক',
    '±' => 'ক্ট',
    '²' => 'ক্ষ্ণ', //shu(kkhno)
    '³' => 'ক্ত',
    '´' => 'ক্ম',
    'µ' => 'ক্র',
    '¶' => 'ক্ষ',
    '·' => 'ক্স',
    '¸' => 'গু',
    '¹' => 'জ্ঞ',
    'º' => 'গ্দ',
    '»' => 'গ্ধ',
    '¼' => 'ঙ্ক',
    '½' => 'ঙ্গ',
    '¾' => 'জ্জ',
    '¿' => '্ত্র',
    'À' => 'জ্ঝ',
    'Á' => 'জ্ঞ',
    'Â' => 'ঞ্চ',
    'Ã' => 'ঞ্ছ',
    'Ä' => 'ঞ্জ',
    'Å' => 'ঞ্ঝ',
    'Æ' => 'ট্ট',
    'Ç' => 'ড্ড',
    'È' => 'ণ্ট',
    'É' => 'ণ্ঠ',
    'Ê' => 'ণ্ড',
    'Ë' => 'ত্ত',
    'Ì' => 'ত্থ',
    'Í' => 'ত্ম',
    'Î' => 'ত্র',
    'Ï' => 'দ্দ',
    'Ð' => '-',
    'Ñ' => '-',
    'Ò' => '"',
    'Ó' => '"',
    'Ô' => "'",
    'Õ' => "'",
    'Ö' => '্র',
    '×' => 'দ্ধ',
    'Ø' => 'দ্ব',
    'Ù' => 'দ্ম',
    'Ú' => 'ন্ঠ',
    'Û' => 'ন্ড',
    'Ü' => 'ন্ধ',
    'Ý' => 'ন্স',
    'Þ' => 'প্ট',
    'ß' => 'প্ত',
    'à' => 'প্প',
    'á' => 'প্স',
    'â' => 'ব্জ',
    'ã' => 'ব্দ',
    'ä' => 'ব্ধ',
    'å' => 'ভ্র',
    'æ' => 'ম্ন',
    'ç' => 'ম্ফ',
    'è' => '্ন',
    'é' => 'ল্ক',
    'ê' => 'ল্গ',
    'ë' => 'ল্ট',
    'ì' => 'ল্ড',
    'í' => 'ল্প',
    'î' => 'ল্ফ',
    'ï' => 'শু',
    'ð' => 'শ্চ',
    'ñ' => 'শ্ছ',
    'ò' => 'ষ্ণ',
    'ó' => 'ষ্ট',
    'ô' => 'ষ্ঠ',
    'õ' => 'ষ্ফ',
    'ö' => 'স্খ',
    '÷' => 'স্ট',
    'ø' => 'স্ন', //(sn)eho //†ønØ
    'ù' => 'স্ফ',
    'ú' => '্প',
    'û' => 'হু',
    'ü' => 'হৃ',
    'ý' => 'হ্ন',
    'þ' => 'হ্ম'
);

$proConversionMap = array('্্' => '্');



$postConversionMap = array(
    //Colon with Number/Space
    '০ঃ' => '০:',
    '১ঃ' => '১:',
    '২ঃ' => '২:',
    '৩ঃ' => '৩:',
    '৪ঃ' => '৪:',
    '৫ঃ' => '৫:',
    '৬ঃ' => '৬:',
    '৭ঃ' => '৭:',
    '৮ঃ' => '৮:',
    '৯ঃ' => '৯:',
    ' ঃ' => ' :',
    '\nঃ' => '\n:',
    ']ঃ' => ']:',
    '\\[ঃ' => '\\[:',
    '  ' => ' ',
    'অা' => 'আ',
    '্‌্‌' => '্‌'
);

function IsBanglaDigit($c) {
    if ($c >= '০' && $c <= '৯')
        return true;
    return false;
}

function IsBanglaPreKar($c) {
    if ($c == 'ি' || $c == 'ৈ' || $c == 'ে')
        return true;
    return false;
}

function IsBanglaPostKar($c) {
    if ($c == 'া' || $c == 'ো' || $c == 'ৌ' || $c == 'ৗ' || $c == 'ু' || $c == 'ূ' || $c == 'ী' || $c == 'ৃ')
        return true;
    return false;
}

function IsBanglaKar($c) {
    if (IsBanglaPreKar($c) || IsBanglaPostKar($c))
        return true;
    return false;
}

function IsBanglaBanjonborno($c) {
    if ($c == 'ক' || $c == 'খ' || $c == 'গ' || $c == 'ঘ' || $c == 'ঙ' || $c == 'চ' || $c == 'ছ' || $c == 'জ' || $c == 'ঝ' || $c == 'ঞ' || $c == 'ট' || $c == 'ঠ' || $c == 'ড' || $c == 'ঢ' || $c == 'ণ' || $c == 'ত' || $c == 'থ' || $c == 'দ' || $c == 'ধ' || $c == 'ন' || $c == 'প' || $c == 'ফ' || $c == 'ব' || $c == 'ভ' || $c == 'ম' || $c == 'য' || $c == 'র' || $c == 'ল' || $c == 'শ' || $c == 'ষ' || $c == 'স' || $c == 'হ' || $c == 'ড়' || $c == 'ঢ়' || $c == 'য়' || $c == 'ৎ' || $c == 'ং' || $c == 'ঃ' || $c == 'ঁ')
        return true;
    return false;
}

function IsBanglaSoroborno($c) {
    if ($c == 'অ' || $c == 'আ' || $c == 'ই' || $c == 'ঈ' || $c == 'উ' || $c == 'ঊ' || $c == 'ঋ' || $c == 'ঌ' || $c == 'এ' || $c == 'ঐ' || $c == 'ও' || $c == 'ঔ')
        return true;
    return false;
}

function IsBanglaNukta($c) {
    if ($c == 'ঁ')
        return true;
    return false;
}

function IsBanglaHalant($c) {
    if ($c == '্')
        return true;
    return false;
}

function IsSpace($c) {
    if ($c == ' ' || $c == '\t' || $c == '\n' || $c == '\r')
        return true;
    return false;
}

function reArrangeUnicodeConvertedText($str) {

    mb_internal_encoding("UTF-8"); //force multi-byte UTF-8 encoding

    global $proConversionMap;

    for ($i = 0; $i < mb_strlen($str); ++$i) {

        // Change refs
        if ($i < (mb_strlen($str) - 1) && mbCharAt($str, $i) == 'র' && IsBanglaHalant(mbCharAt($str, $i + 1)) && !IsBanglaHalant(mbCharAt($str, $i - 1))) {
            $j = 1;
            while (true) {
                if ($i - $j < 0) {
                    break;
                }
                if (IsBanglaBanjonborno(mbCharAt($str, $i - $j)) && IsBanglaHalant(mbCharAt($str, $i - $j - 1))) {
                    $j += 2;
                } else if ($j == 1 && IsBanglaKar(mbCharAt($str, $i - $j))) {
                    $j++;
                } else {
                    break;
                }
            }
            $temp = subString($str, 0, $i - $j);
            $temp .= mbCharAt($str, $i);
            $temp .= mbCharAt($str, $i + 1);
            $temp .= subString($str, $i - $j, $i);
            $temp .= subString($str, $i + 2, mb_strlen($str));
            $str = $temp;
            $i += 1;
            continue;
        }
    }

    $str = doCharMap($str, $proConversionMap);



    for ($i = 0; $i < mb_strlen($str); ++$i) {


        if ($i < mb_strlen($str) - 1 && mbCharAt($str, $i) == 'র' && IsBanglaHalant(mbCharAt($str, $i + 1)) && !IsBanglaHalant(mbCharAt($str, $i - 1)) && IsBanglaHalant(mbCharAt($str, $i + 2))) {
            $j = 1;
            while (true) {
                if ($i - $j < 0) {
                    break;
                }
                if (IsBanglaBanjonborno(mbCharAt($str, $i - $j)) && IsBanglaHalant(mbCharAt($str, $i - $j - 1))) {
                    $j += 2;
                } else if ($j == 1 && IsBanglaKar(mbCharAt($str, $i - $j))) {
                    $j++;
                } else {
                    break;
                }
            }
            $temp = subString($str, 0, $i - $j);
            $temp .= mbCharAt($str, $i);
            $temp .= mbCharAt($str, $i + 1);
            $temp .= subString($str, $i - $j, $i);
            $temp .= subString($str, $i + 2, mb_strlen($str));
            $str = $temp;
            $i += 1;
            continue;
        }

        // for 'Vowel + HALANT + Consonant' it should be 'HALANT + Consonant + Vowel'
        if ($i > 0 && mbCharAt($str, $i) == '\u09CD' && (IsBanglaKar(mbCharAt($str, $i - 1)) || IsBanglaNukta(mbCharAt($str, $i - 1))) && $i < mb_strlen($str) - 1) {
            $temp = subString($str, 0, $i - 1);
            $temp .= mbCharAt($str, $i);
            $temp .= mbCharAt($str, $i + 1);
            $temp .= mbCharAt($str, $i - 1);
            $temp .= subString($str, $i + 2, mb_strlen($str));
            $str = $temp;
        }

        // for 'RA (\u09B0) + HALANT + Vowel' it should be 'Vowel + RA (\u09B0) + HALANT'
        if ($i > 0 && $i < mb_strlen($str) - 1 && mbCharAt($str, $i) == '\u09CD' && mbCharAt($str, $i - 1) == '\u09B0' && mbCharAt($str, $i - 2) != '\u09CD' && IsBanglaKar(mbCharAt($str, $i + 1))) {
            $temp = subString($str, 0, $i - 1);
            $temp .= mbCharAt($str, $i + 1);
            $temp .= mbCharAt($str, $i - 1);
            $temp .= mbCharAt($str, $i);
            $temp .= subString($str, $i + 2, mb_strlen($str));
            $str = $temp;
        }
        

        // Change pre-kar to post format suitable for unicode
        if ($i < mb_strlen($str) - 1 && IsBanglaPreKar(mbCharAt($str, $i)) && IsSpace(mbCharAt($str, $i + 1)) == false) {
            $temp = subString($str, 0, $i);

            $j = 1;

            while (($i + $j) < mb_strlen($str) - 1 && IsBanglaBanjonborno(mbCharAt($str, $i + $j))) {
                if (($i + $j) < mb_strlen($str) && IsBanglaHalant(mbCharAt($str, $i + $j + 1))) {
                    $j += 2;
                } else {
                    break;
                }
            }
            $temp .= subString($str, $i + 1, $i + $j + 1);

            $l = 0;
            if (mbCharAt($str, $i) == 'ে' && mbCharAt($str, $i + $j + 1) == 'া') {
                $temp .= "ো";
                $l = 1;
            } else if (mbCharAt($str, $i) == 'ে' && mbCharAt($str, $i + $j + 1) == "ৗ") {
                $temp .= "ৌ";
                $l = 1;
            } else {
                $temp .= mbCharAt($str, $i);
            }
            $temp .= subString($str, $i + $j + $l + 1, mb_strlen($str));
            $str = $temp;
            $i += $j;
        }

        // nukta should be placed after kars
        if ($i < mb_strlen($str) - 1 && IsBanglaNukta(mbCharAt($str, $i)) && IsBanglaPostKar(mbCharAt($str, $i + 1))) {
            $temp = subString($str, 0, $i);
            $temp .= mbCharAt($str, $i + 1);
            $temp .= mbCharAt($str, $i);
            $temp .= subString($str, $i + 2, mb_strlen($str));
            $str = $temp;
        }
    }

    return $str;
}


//main conversion function
function convertBijoyToUnicode($srcString) {

    global $preConversionMap, $conversionMap, $postConversionMap;


    $srcString = doCharMap($srcString, $preConversionMap);
    $srcString = doCharMap($srcString, $conversionMap);
    $srcString = reArrangeUnicodeConvertedText($srcString);
    $srcString = doCharMap($srcString, $postConversionMap);
    return $srcString;
}

function doCharMap($text, $charMap) {
    foreach ($charMap as $srcKey => $keyVal) {
        $format = "@$srcKey@";
        $text = preg_replace($format, $keyVal, $text);
    }

    return $text;
}

//returns the $i-th byte of the multi-byte string $str
function mbCharAt($str, $i) {
    return mb_substr($str, $i, 1);
}

//returns the javascript 'substring' method equivalent
function subString($string, $from, $to) {
    return mb_substr($string, $from, $to - $from);
}

?>