#-*_coding:utf-8-*-
import requests
import re
import xlwt
reload(__import__('sys')).setdefaultencoding('utf-8')
def getHtml(url):
proxylist = (
'211.167.112.14:80',
'210.32.34.115:8080',
'115.47.8.39:80',
'211.151.181.41:80',
'219.239.26.23:80',
)
for proxy in proxylist:
proxies = {'': proxy}
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'}#构造头部
cook ={"Cookie":''}#cookie保密过了
#html=requests.get(url).content
html=requests.get(url,cookies=cook,headers=headers,proxies=proxies).text
return html
def changeurl(start_url,page): #传参数(开始的url,页数)
urls=[]
for i in range(1,page+1):
url=start_url+str(i)
urls.append(url)
return urls
start_url="https://list.tmall.com/search_product.htm?type=pc&totalPage=100&cat=50025135&sort=d&style=g&from=sn_1_cat-qp&active=1&jumpto="
urls=changeurl(start_url,1)
for url in urls:
html=getHtml(url)
reForProduct = re.compile('<div class="product " data-id="(\d+)"[\s\S]+?<p class="productPrice">\s+<em title="(.+?)">[\s\S]+?<p class="productTitle">\s+<a href="(.+?)" target="_blank" title="(.+?)"[\s\S]+?<\/div>\s+<\/div>')
products = reForProduct.findall(html)
for pro in products:
for (pid, price, url, title) in products:
text= ("%s\t%s\t%s\t%s") % (pid, price, title, url)
print text
能在控制显示制表符的结果
我想把这些数据写入到Excel中做下数据分析,请问怎么用xlwt模块写入这些数据内容。请帮忙,写个详细的excel写入代码块。。。。
写的比较丑。