Goutte怎么获取a标签里面的url,或者好用的PHP爬虫库,谢谢

TIGERB
  • 9.4k

Goutte怎么获取a标签里面的url?或者好用的PHP爬虫库,谢谢

<?php
require('./Vendor/autoload.php');
use Goutte\Client;

/**
* 
*/
class Spider 
{
    private $_client;
    private $_crawler;
    public  $_news = [
        'title'   => [],
        'link'    => [],
        'content' => [],
        'source'  => [],
        'date'    => [],
    ];

    public function __construct()
    {
        try {
            $this->_client  = new Client();
            $this->_crawler = $this->_client->request('GET', 'http://www.ningshan.gov.cn/Category_90/Index.aspx');
            // $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);
        } catch (Exception $e) {
            throw new \Exception($e->getMessage(), 1);
        }
    }

    public function getDate()
    {
        $this->_crawler->filter('div#list>ul>li>span')->each(function ($node) {
           $this->_news['date'][] = $node->text();
        });
    }

    public function getTitle()
    {
        $link = $this->_crawler->selectLink('宁陕县召开政协八届二十二次次常委会')->link();
        var_dump($link->getUri);die;
        $this->_crawler->filter('div#list>ul>li>a')->each(function ($node) {
           if ($node->text() !== '宁陕要闻') {
                $this->_news['title'][]  = $node->text();
                $this->_news['link'][]   = $node->link();
                $this->_news['source'][] = '宁陕要闻';
           }
        });
    }
}

//-----------------------------------
try {
    $spider = new Spider();
    $spider->getDate();
    $spider->getTitle();

    echo json_encode($spider->_news, JSON_UNESCAPED_UNICODE);
} catch (Exception $e) {
    echo $e->getMessage();
}

回复
阅读 3.1k
3 个回答
✓ 已被采纳
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
宣传栏