扒取方法

    public function index()
    {
        $url = 'http://www.dytt8.net/';
       // $url = 'Public/txt/movies.txt';
        $content = file_get_contents($url);
        $content = iconv("gb2312", "utf-8//IGNORE",$content);
        $reg = "|<div class=\"co_content2\">(.*?)<\/div>|is";//正则匹配div
        $res = preg_match_all($reg, $content, $match);
        $count = count($match[1]);
        //有数据
        if($count)
        {
            $arr = array();
            $array = array();
            for($i=0;$i<$count;$i++)
            {
                $a = "|<a[^>]*>(.*?)<\/a>|is"; //匹配value
                $patten="/<a href=[\'\"]?([^\'\" ]+).*?>/";//匹配href值
                preg_match_all($a, $match[1][$i], $mat);
                preg_match_all($patten, $match[1][$i], $href);
                foreach($mat[1] as $key=>$val){
                    $array = array(
                         'href'=>'http://www.dytt8.net/'.$href[1][$key],
                         'name'=>$val,
                         'ctime'=>time(),
                    );
                    array_push($arr,$array);
                }

            }
            $res = $this->Movies->addAll($arr);
            if($res)
            {
                echo '抓取成功!';
            }
            else
            {
                echo '抓取失败!';
            }
        }

    }

mmy123456
376 声望17 粉丝

有项目请联系:15201970281(毛毛)