python爬虫之ajax请求

  • 爬取豆瓣网的分页浏览get请求:
import urllib.request
import urllib.parse

url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&'

page = int(input('请输入想要第几页的数据:'))
# start=0  limit=20
# start=1
number = 20

# 构建get参数
data = {
    'start': (page - 1)*number,
    'limit': number,
}
# 将字典转化为query_string
query_string = urllib.parse.urlencode(data)
# 修改url
url += query_string

# 
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
}
request = urllib.request.Request(url=url, headers=headers)

response = urllib.request.urlopen(request)

print(response.read().decode())
  • 肯德基配送信息post请求
import urllib.request
import urllib.parse

post_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'

city = input('请输入要查询的城市:')
page = input('请输入要查询第几页:')
size = input('请输入要多少个:')
formdata = {
    'cname': city,
    'pid': '', 
    'pageIndex': page,
    'pageSize': size,
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
}

request = urllib.request.Request(url=post_url, headers=headers)
formdata = urllib.parse.urlencode(formdata).encode()

response = urllib.request.urlopen(request, data=formdata)

print(response.read().decode())

zzZ摇篮曲
28 声望2 粉丝

程序猿?菜鸟?我们不只是spider!!!