<?
//初始化gb2312--unicode数组对应表作为全程变量,以提高处理速度
$____global_codetable=array();
$____global_filename=pathinfo($_SERVER["SCRIPT_FILENAME"]);
$____global_filename=$____global_filename["dirname"]."/gb2312.txt";
$____global_tmp=file($____global_filename);
while(list($key,$value)=each($____global_tmp))
{
if (strcmp($value{0},’#’)!=0)
$____global_codetable[hexdec(substr($value,2,4))]=substr($value,9,4);
}
reset($____global_tmp);
while(list($key,$value)=each($____global_tmp))
{
if (strcmp($value{0},’#’)!=0)
$____global_codetable2[hexdec(substr($value,9,4))]=hexdec(substr($value,2,4));
}
unset($____global_filename);
unset($____global_tmp);
/*
将带 㾏協格式的文本(可以包含其它ASCII字符)转换成gb2312格式的文本;
可以用于XML编码的转换
需要注意的是,函数不改变xml中关于编码的声明
*/
function unicode2gb($un)
{
if(!trim($un))
return $un;
$gb="";
global $____global_codetable2;
while(strlen($un)>0)
{
$p=strpos($un,"&#");
if ($p===FALSE)//串中已无unicode字符
{
$gb.=$un;
return $gb;
}
else
{
if ($p!=0)//串中unicode字符前缀不是第一个字符
{
$gb.=substr($un,0,$p);
$un=substr($un,$p);
}
$p=strpos($un,";");
if ($p===FALSE)//此前缀非unicode前缀,串中已无unicode字符
{
$gb.=$un;
return $gb;
}
else
{
$code=substr($un,2,$p-2);
$un=substr($un,$p+1);
if (strcasecmp($code{0},"x")==0)//unicode码16进制表示
{
$code=hexdec(substr($code,1));
}else
{
$code=intval($code);
}
$code=0x8080|$____global_codetable2[$code];
$gb.=chr((($code & 0xFF00)>>8) & 0xFF);
$gb.=chr($code & 0xFF);
}
}
}
return $gb;
}
/*
将 gb2312格式的文本(可以包含其它ASCII字符)转化为 带 㾏協格式的unicode文本;
可以用于XML编码的转换
需要注意的是,函数不改变xml中关于编码的声明
*/
function gb2unicode($gb)
{
if(!trim($gb))
return $gb;
$utf="";
global $____global_codetable;
while(strlen($gb)>0)
{
if (ord(substr($gb,0,1))>127)
&nb
标签(Tags):gb2312转utf8 gb2312转utf8 php
引用地址:
