import requests
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def yunpan_search(key):
url='http://www.wangpansou.cn/s.php?q='+key
html=requests.get(url)
soup=BeautifulSoup(html.text,"lxml")
url_get=soup.find_all('a',{'class':'cse-search-result_content_item_top_a'})
info_get=soup.find_all('div',{'class':'cse-search-result_content_item_mid'})
f = open('baidu_source.txt','w')
for i in range(len(url_get)):
href=url_get[i]['href']
title=''
for c in url_get[i].children:
title+=c.string.strip()
information=''
for info in info_get[i].children:
information+=info.string.strip().replace('\n','')
print str(i+1)+'_'*60
print '下载地址--'+href+'\n'+'链接标题--'+title+'\n'+'链接详情--'+information+'\n\n'
f.write(str(i+1)+'. _____________________________________________________________________\n')
f.write('下载地址--'+href+'\n'+'链接标题--'+title+'\n'+'链接详情--'+information+'\n\n')
f.close()
if __name__=='__main__':
key=raw_input('please input what you want to look for:')
yunpan_search(key)
print('finish')
简单看了下这个脚本:
应该是个盘搜应用,
http://www.wangpansou.cn/s.php?q=关键词
,请求url,获取查询结果,BeautifulSoup
我记得是个处理html,xml的库。soup.find_all
应该是用css选择器获取元素。那两个元素应该就是对应的搜索结果和链接了。要实现的话不难,这个你应该自己可以做:
js的话,用node,找个requests,html(xml)解析的库就好了。