urllib

  • 导入urllib,有以下两种方式,喜欢那种方式看个人爱好:
import urllib.request
import urllib.parse
#from urllib import reuqest
  • urllib发送get请求:
# 反反爬虫的基本策略之一:设置user-agent浏览器标识自己要伪装的头部
import urllib.request
import urllib.parse

#设置ssl证书


import ssl
ssl._create_default_https_context = ssl._create_unverified_context


url = 'http://www.baidu.com/'

# response = urllib.request.urlopen(url)
# print(response.read().decode())

# 反反爬虫的基本策略之一:设置user-agent浏览器标识自己要伪装的头部
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
}
# 构建请求对象
request = urllib.request.Request(url=url, headers=headers)
# 发送请求
response = urllib.request.urlopen(request)

print(response.read().decode())
  • urllib发送post请求:
import urllib.request
import urllib.parse

post_url = 'http://fanyi.baidu.com/v2transapi'
word = 'wolf'
formdata = {
    'from':    'en',
    'to': 'zh',
    'query': word,
    'transtype': 'realtime',
    'simple_means_flag': '3',
    'sign': '275695.55262',
    'token': '7d9697542b6337bfd8f1b54c7887dcf5',
}

headers = {
    'Host': 'fanyi.baidu.com',
    # 'Connection': 'keep-alive',
    # 'Content-Length': '120',
    # 'Accept': '*/*',
    'Origin': 'http://fanyi.baidu.com',
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
    # 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Referer': 'http://fanyi.baidu.com/?aldtype=16047',
    # 'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cookie': 'BAIDUID=D1620A70988D2694BE528E5CEFE5B5F3:FG=1; BIDUPSID=D1620A70988D2694BE528E5CEFE5B5F3; PSTM=1526524899; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; H_PS_PSSID=; locale=zh; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1527210729,1527556520; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1527556520',
}

request = urllib.request.Request(url=post_url, headers=headers)

formdata = urllib.parse.urlencode(formdata).encode()

response = urllib.request.urlopen(request, formdata)

print(response.read().decode())    
  • urllib.parse
parse.quote#将字符串类型转化为unicode类型
parse.urlcode#传入的数据是字典格式的,
  • urllib.response
impport urllib.response
#状态码
    urllib.response.status
#请求头信息:

    urllib.response.headers

zzZ摇篮曲
28 声望2 粉丝

程序猿?菜鸟?我们不只是spider!!!