node爬虫代码优化?

var http = require('http'),
    url = require('url'),
    superagent = require('superagent'),
    cheerio = require('cheerio'),
    async = require('async'),
    eventproxy = require('eventproxy');
var data = {},
    urlLists = {
        'hangzhou':[],
        'shanghai':[]
    };
var ep = new eventproxy(),
    baseUrl = ['http://hz.lianjia.com', 'http://sh.lianjia.com'],
    urls = ['http://hz.lianjia.com/ershoufang/', 'http://sh.lianjia.com/ershoufang/'];
var count = 0;
function start() {
    var req = http.request(urls[0], function(res) {
        res.setEncoding('utf-8');
        res.on('data', function(chunk) {
            data['hangzhou'] += chunk;
        });
        res.on('end', function() {
            count++;
            var $ = cheerio.load(data['hangzhou']);
            $('#filter-options .option-list').first().children('a').each(function(index, item) {
                urlLists['hangzhou'].push(baseUrl[0] + $(item).attr('href'));
            });
            getUrl(count)
        });
    });
    req.end();
    var req1 = http.request(urls[1], function(res) {
        res.setEncoding('utf-8');
        res.on('data', function(chunk) {
            data['shanghai'] += chunk;
        });
        res.on('end', function() {
            count++;
            var $ = cheerio.load(data['shanghai']);
            $('#filter-options .option-list').first().children('a').each(function(index, item) {
                urlLists['shanghai'].push(baseUrl[1] + $(item).attr('href'));
            });
            getUrl(count)
        });
    });
    req1.end();
    function getUrl(count) {
        if( count === 2 ) {
            console.log(urlList);
         
        }
    }
}
start();




请问下面这段代码如何优化








阅读 2.8k
1 个回答

require('superagent'),说明你知道用类库来完成,底下的http.request就可以不写了。
require('eventproxy')require('async')说明你是想搞流程控制的。

你问怎么优化,其实你已经有那个意识了,剩下的只是照着api文档调整一下就好。

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题