首页 > 电脑网络 > 编程知识 > gb2312转utf8的PHP代码

gb2312转utf8的PHP代码
2008-08-13 02:29:15   来源:   点击:

    <?
    //初始化gb2312--unicode数组对应表作为全程变量,以提高处理速度
    $____global_codetable=array();
    $____global_filename=pathinfo($_SERVER["SCRIPT_FILENAME"]);
    $____global_filename=$____global_filename["dirname"]."/gb2312.txt";
    $____global_tmp=file($____global_filename);
    while(list($key,$value)=each($____global_tmp))
    {
     if (strcmp($value{0},’#’)!=0)
     $____global_codetable[hexdec(substr($value,2,4))]=substr($value,9,4);
    }
    reset($____global_tmp);
    while(list($key,$value)=each($____global_tmp))
    {
     if (strcmp($value{0},’#’)!=0)
     $____global_codetable2[hexdec(substr($value,9,4))]=hexdec(substr($value,2,4));
    }
    unset($____global_filename);
    unset($____global_tmp);


    /*
    将带 &#x3F8F;&#x5354;格式的文本(可以包含其它ASCII字符)转换成gb2312格式的文本;
    可以用于XML编码的转换
    需要注意的是,函数不改变xml中关于编码的声明
    */
    function unicode2gb($un)
    {
     if(!trim($un))
      return $un;
     $gb="";
     global $____global_codetable2;
     while(strlen($un)>0)
     {
      $p=strpos($un,"&#");
      if ($p===FALSE)//串中已无unicode字符
      {
       $gb.=$un;
       return $gb;
      }
      else
      {
       if ($p!=0)//串中unicode字符前缀不是第一个字符
       {
        $gb.=substr($un,0,$p);
        $un=substr($un,$p);
       }
       $p=strpos($un,";");
       if ($p===FALSE)//此前缀非unicode前缀,串中已无unicode字符
       {
             $gb.=$un;
             return $gb;
       }
       else
       {
        $code=substr($un,2,$p-2);
        $un=substr($un,$p+1);
        if (strcasecmp($code{0},"x")==0)//unicode码16进制表示
        {
         $code=hexdec(substr($code,1));
        }else
        {
         $code=intval($code);
        }
               $code=0x8080|$____global_codetable2[$code];
        $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
        $gb.=chr($code & 0xFF);
       }
      }
     }
     return $gb;
    }

    /*
    将 gb2312格式的文本(可以包含其它ASCII字符)转化为 带 &#x3F8F;&#x5354;格式的unicode文本;
    可以用于XML编码的转换
    需要注意的是,函数不改变xml中关于编码的声明
    */
    function gb2unicode($gb)
    {
       if(!trim($gb))
          return $gb;
       $utf="";
       global $____global_codetable;
       while(strlen($gb)>0)
        {
         if (ord(substr($gb,0,1))>127)
            {
             $this=substr($gb,0,2);
             $gb=substr($gb,2);
             $code=$____global_codetable[hexdec(bin2hex($this))&0x7F7F];
             $utf.="&#x".$code.";";
            }
         else
            {
             $utf.=substr($gb,0,1);
             $gb=substr($gb,1);
            }
         }
       return $utf;
    }

    /*
    将utf8格式的文本转化为gb2312格式的文本;这与上述的unicode2gb不同,是二进制格式的转换
    */
    function utf82gb($utf8)
    {
       if(!trim($utf8))
          return $utf8;
       global $____global_codetable2;
       $gb="";
       while(strlen($utf8)>0)
        {
     $c=substr($utf8,0,1);
     $d=ord($c);
     if (($d&0x80) == 0)//1位
     {
      $gb.=$c;
      $utf8=substr($utf8,1);
     }
     else
     if (($d&0xC0)==0x80)//错位
     {
      $utf8=substr($utf8,1);
     }
     else
     if (($d&0xE0)==0xC0)//2位
     {
      $utf8=substr($utf8,2);
     }
     else
     if (($d&0xF0)==0xE0)//3位
     {
      $d1=ord($utf8{1}) & 0x3F;
      $d2=ord($utf8{2}) & 0x3F;
      $d=$d & 0x0F;
      $d=($d<<12) + ($d1 <<6) + $d2;
             $code=0x8080|$____global_codetable2[$d];
      $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
      $gb.=chr($code & 0xFF);
      $utf8=substr($utf8,3);
     }
     else
     if (($d&0xF8)==0xF0)//4位
     {
      $d1=ord($utf8{1}) & 0x3F;
      $d2=ord($utf8{2}) & 0x3F;
      $d3=ord($utf8{3}) & 0x3F;
      $d=$d & 0x07;
      $d=($d<<18) + ($d1 <<12) + ($d2 << 6) +$d3;
      //$code=0x8080+getgb($d);
             $code=0x8080|$____global_codetable2[$d];
      $gb.=chr((($code & 0xFF00)>>8) & 0xFF);
      $gb.=chr($code & 0xFF);
      $utf8=substr($utf8,4);
     }
     else
     {
      $utf8=substr($utf8,1);
     }
        }
       return $gb;
    }

    /*
    将gb2312格式的文本转化为utf8格式的文本;这与上述的gb2unicode不同,是二进制格式的转换
    */
    function gb2utf8($gb)
    {
       if(!trim($gb))
          return $gb;
       global $____global_codetable;
       $utf8="";
       while(strlen($gb)>0)
       {
     if (ord(substr($gb,0,1))>127)
     {
             $code=substr($gb,0,2);
             $gb=substr($gb,2);
             //echo "gb=$code;";
             $code=bin2hex($code);
             //echo "code=$code;";
             $code=hexdec($code)&0x7F7F;
             //echo "newcode=".dechex($code);
             $code=$____global_codetable[$code];
             //echo "unicode=$code";
             $code=hexdec($code);
             //11位:6+5
             if (($code&0x7FF)==$code)
             {
              $utf8.=chr(0xC0|((($code&0x7C0)>>6)&0x3F));
              $utf8.=chr(0x80|($code&0x3F));
             }else
             //16位:12+4
             if (($code&0xFFFF)==$code)
             {
              $utf8.=chr(0xE0|((($code&0xF000)>>12)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
              $utf8.=chr(0x80|($code&0x3F));
              //echo "16位==$utf8;\n";
             }
             else
             //21位:18+3
             if (($code&0x1FFFFF)==$code)
             {
              $utf8.=chr(0xF0|((($code&0x1C0000)>>18)&0x3F));
              $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
              $utf8.=chr(0x80|($code&0x3F));
             }
             /*
             else
             //26位:24+2
             if (($code&0x3FFFFFF)==$code)
             {
              $utf8.=chr(0xF8|((($code&0x3000000)>>24)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
              $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
              $utf8.=chr(0x80|($code&0x3F));
             }
             else
             //31位:30+1
             if (($code&0x7FFFFFFF)==$code)
             {
              $utf8.=chr(0xFC|((($code&0x40000000)>>30)&0x3F));
              $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
              $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
              $utf8.=chr(0x80|($code&0x3F));
             }
             //36位
             else
             {
              //首字节全部作为前缀,无数据
              $utf8.=chr(0x80|((($code&0xC0000000)>>30)&0x3F));
              $utf8.=chr(0x80|((($code&0x3F000000)>>24)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0000)>>18)&0x3F));
              $utf8.=chr(0x80|((($code&0x3F000)>>12)&0x3F));
              $utf8.=chr(0x80|((($code&0xFC0)>>6)&0x3F));
              $utf8.=chr(0x80|($code&0x3F));
             }
             */
     }
     else
     {
      $utf8.=substr($gb,0,1);
      $gb=substr($gb,1);
     }
      }
      return $utf8;
    }
    ?>

相关热词搜索:gb2312转utf8 gb2312转utf8 php

上一篇:UTF-8转GB2312的php代码
下一篇:网页设计使用alt标签属性