求助
文章的keywords是通过分词后的词,但是phpanalysis又把这些分词使用数字编码形式,我想把他们转换成中文 ,需要怎么做
/**
* 获得保存目标编码
* @return int
*/
private function _source_result_charset()
{
if( preg_match("/^utf/", $this->targetCharSet) ) {
$rs = 1;
}
else if( preg_match("/^gb/", $this->targetCharSet) ) {
$rs = 2;
}
else if( preg_match("/^big/", $this->targetCharSet) ) {
$rs = 3;
}
else {
$rs = 4;
}
return $rs;
}
/**
* 编译词典
* @parem $sourcefile utf-8编码的文本词典数据文件<参见范例dict/not-build/base_dic_full.txt>
* 注意, 需要PHP开放足够的内存才能完成操作
* @return void
*/
public function MakeDict( $source_file, $target_file='' )
{
$target_file = ($target_file=='' ? $this->mainDicFile : $target_file);
$allk = array();
$fp = fopen($source_file, 'r');
while( $line = fgets($fp, 512) )
{
if( $line[0]=='@' ) continue;
list($w, $r, $a) = explode(',', $line);
$a = trim( $a );
$w = iconv('utf-8', UCS2, $w);
$k = $this->_get_index( $w );
if( isset($allk[ $k ]) )
$allk[ $k ][ $w ] = array($r, $a);
else
$allk[ $k ][ $w ] = array($r, $a);
}
fclose( $fp );
$fp = fopen($target_file, 'w');
$heade_rarr = array();
$alldat = '';
$start_pos = $this->mask_value * 8;
foreach( $allk as $k => $v )
{
$dat = serialize( $v );
$dlen = strlen($dat);
$alldat .= $dat;
$heade_rarr[ $k ][0] = $start_pos;
$heade_rarr[ $k ][1] = $dlen;
$heade_rarr[ $k ][2] = count( $v );
$start_pos += $dlen;
}
unset( $allk );
for($i=0; $i < $this->mask_value; $i++)
{
if( !isset($heade_rarr[$i]) )
{
$heade_rarr[$i] = array(0, 0, 0);
}
fwrite($fp, pack("Inn", $heade_rarr[$i][0], $heade_rarr[$i][1], $heade_rarr[$i][2]));
}
fwrite( $fp, $alldat);
fclose( $fp );
}
/**
* 导出词典的词条
* @parem $targetfile 保存位置
* @return void
*/
public function ExportDict( $targetfile )
{
if( !$this->mainDicHand )
{
$this->mainDicHand = fopen($this->mainDicFile, 'r');
}
$fp = fopen($targetfile, 'w');
for($i=0; $i <= $this->mask_value; $i++)
{
$move_pos = $i * 8;
fseek($this->mainDicHand, $move_pos, SEEK_SET);
$dat = fread($this->mainDicHand, 8);
$arr = unpack('I1s/n1l/n1c', $dat);
if( $arr['l'] == 0 )
{
continue;
}
fseek($this->mainDicHand, $arr['s'], SEEK_SET);
$data = @unserialize(fread($this->mainDicHand, $arr['l']));
if( !is_array($data) ) continue;
foreach($data as $k => $v)
{
$w = iconv(UCS2, 'utf-8', $k);
fwrite($fp, "{$w},{$v[0]},{$v[1]}\n");
}
}
fclose( $fp );
return true;
}
}