先上效果图:
实现思路:
需要花时间往下看fiddler抓包路径 ! ! !
- 下一个Fiddler,百度网盘链接:https://pan.baidu.com/s/1hdfWI3CM1tLKShvdWF5t0w 提取码:iaoe。 当然你也可以去官网下载
博主认为比较全面的 fiddler 使用教程: https://www.cnblogs.com/yyhh/p/5140852.html
- 去爱奇艺官网获取视频路径
例如:https://www.iqiyi.com/v_19rrz...
- 找到一个解析网站"https://www.8090g.cn/jiexi",解析网站不同,代码实现就不一样。不过思路还是可以参考的
感谢:@一杯敬朝阳 一杯敬月光 提供
- 打开fiddler,浏览器输入路径"https://www.8090g.cn/jiexi/?url=https://www.iqiyi.com/v_19rrz...l"
这里可以看到,网页嵌套了一个,iframe 。而iframe的路径是"http://8090.winzoo.cn/jiexi/?url=https://www.iqiyi.com/v_19rrz...l"
- 通过fiddler发现有调用"http://8090.winzoo.cn/jiexi/?url=https://www.iqiyi.com/v_19rrz...l"
查看内容发现内容(这是获取 .m3u8文件的参数):
{'url':'https://www.iqiyi.com/v_19rrz2aweg.html','referer':'aHR0cHM6Ly93d3cuODA5MGcuY24vamlleGkvP3VybD1odHRwczovL3d3dy5pcWl5aS5jb20vdl8xOXJyejJhd2VnLmh0bWw=','ref':form,'time':'1594721051','type':'','other':y.encode(other_l),'ref':form,'ios':''}
- 发现.m3u8文件
- 根据.m3u8文件的路径,输入对应参数,获取.ts文件
- 将.ts文件写入一个文件中
- 将.ts文件转换为.mp4文件类型
以下是实现代码:iqiyiVodeoDownload.py
import requests
import json
import os
from urllib import request
import re
import base64
import shutil
def downloadsVideoMethod():
headers = {
"Referer": "https://www.8090g.cn/jiexi/?url=" + aqy_url,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36 Edg/83.0.478.61",
"Sec-Fetch-Dest": "iframe",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "cross-site",
"Upgrade-Insecure-Requests": "1",
"Connection": "keep-alive",
"Host": "8090.winzoo.cn"
}
target = "http://8090.winzoo.cn/jiexi/?url=" + aqy_url
# 尽量模仿浏览器访问接口
req = requests.session().get(url=target, headers=headers, verify=False)
req.encoding = "utf-8"
# html中截取 {'url':'https://www.iqiyi.com/v_19rrz2aweg.html',
# 'referer':'aHR0cHM6Ly93d3cuODA5MGcuY24vamlleGkvP3VybD1odHRwczovL3d3dy5pcWl5aS5jb20vdl8xOXJyejJhd2VnLmh0bWw=',
# 'ref':form,'time':'1594721051','type':'','other':y.encode(other_l),'ref':form,'ios':''}
param = re.findall('"api.php",(.+),function\(data\){if\(data.code=="200"\)', req.text)[0]
referer = param[param.index("'referer':'") + 11:param.index("','ref'")]
other_href = "https://8090.winzoo.cn/jiexi/?url=" + aqy_url
other = base64.b64encode(other_href[0: other_href.index('=') + 1].encode())
data = {
"url": aqy_url,
"referer": referer,
"ref": 0,
"time": "1594376655",
"type": "",
"other": other,
"ios": ""
}
# 获取.m3u8 路径
video_url = "https://8090.winzoo.cn/jiexi/api.php"
resp = requests.post(url=video_url, data=data, verify=False)
print("resp", resp.text)
# 因为值是字符串,所以这里把字符串转换为字典
resp_param = json.loads(resp.text)
file_path = "http:" + resp_param["url"]
print("file_path: ", file_path)
save_dir = "G:/ProjectPy/file/"
fileLoad(file_path=file_path, save_dir=save_dir, file_name=file_name)
def fileLoad(file_path, save_dir, file_name=None):
"""
通过.m3u8 路径获取.ts文件,且写入一个自定义的ts文件中。最后转换为.mp4文件
:param file_path: .m3u8文件
:param save_dir: .mp4文件存储路径
:param file_name .mp4文件名称
:return: 包含TS链接的文件
"""
if file_name is None or file_name == "":
file_name = file_path.split("/")[-1]
url_list = file_path.split("/")
ts_referer = url_list[0] + "//" + url_list[2]
print(ts_referer)
ts_save_path = os.path.join(save_dir, "%s.ts" % file_name)
print("save_path: ", ts_save_path)
print("视频下载中.....")
# m3u8下载
m3u8_list_path = getTsList(file_path)
for line in m3u8_list_path:
ts_url = str(line.decode("utf-8")).strip()
if not ".ts" in ts_url:
continue
else:
writeTs(ts_url, ts_save_path, ts_referer)
# ts 转mp4
mp4_save_path = os.path.join(save_dir, "%s.mp4" % file_name)
shutil.move(ts_save_path, mp4_save_path)
print("\n Successfully downloaded")
def getTsList(m3u8_path):
"""
通过.m3u8 路径获取.ts文件
:param m3u8_path: .m3u8路径
:return:
"""
try:
return request.urlopen(m3u8_path)
except Exception as e:
print("\033[1;31m getTsList error. url is (%s) reason is :\033[0m" % m3u8_path, e)
def writeTs(url, save_path, referer):
"""
将.ts文件写入save_path中
:param url: .ts文件
:param save_path: 自定义文件路径
:param referer: referer
:return:
"""
headers = {
"Referer": referer,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36 Edg/83.0.478.61",
}
try:
resp = requests.get(url, headers=headers)
with open(save_path, mode="ab") as f:
f.write(resp.content)
f.flush()
except Exception as e:
print("\033[1;31m getTsList error. url is (%s) reason is :\033[0m" % url, e)
if __name__ == "__main__":
aqy_url = "https://www.iqiyi.com/v_19rrk1p5ws.html"
file_name = "宇宙高深处解析"
downloadsVideoMethod()
还有待处理问题:
- [ ] 有一部分爱奇艺视频,根据该脚本下载不了。可能是该视频不是分片传(.m3u8文件),直接是.mp4, 或者也是其他原因
- [ ] 还有一部分爱奇艺视频,只能爬取一部分,例如:冰川时代4(HDR).mp4。我就只爬取了一部分
- [ ] 还有一个异常,但异常是偶发的。我就忽略了。如果大家有解决方案,欢迎分享哈~
pydev debugger: process 12912 is connecting
Connected to pydev debugger (build 201.6668.115)
Traceback (most recent call last):
File "G:\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 670, in urlopen
httplib_response = self._make_request(
File "G:\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 381, in _make_request
self._validate_conn(conn)
File "G:\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 976, in _validate_conn
conn.connect()
File "G:\Python\Python38\lib\site-packages\urllib3\connection.py", line 361, in connect
self.sock = ssl_wrap_socket(
File "G:\Python\Python38\lib\site-packages\urllib3\util\ssl_.py", line 377, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "G:\Python\Python38\lib\ssl.py", line 500, in wrap_socket
return self.sslsocket_class._create(
File "G:\Python\Python38\lib\ssl.py", line 1040, in _create
self.do_handshake()
File "G:\Python\Python38\lib\ssl.py", line 1309, in do_handshake
self._sslobj.do_handshake()
ConnectionResetError: [WinError 10054] 远程主机强迫关闭了一个现有的连接。
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "G:\Python\Python38\lib\site-packages\requests\adapters.py", line 439, in send
resp = conn.urlopen(
File "G:\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 724, in urlopen
retries = retries.increment(
File "G:\Python\Python38\lib\site-packages\urllib3\util\retry.py", line 403, in increment
raise six.reraise(type(error), error, _stacktrace)
File "G:\Python\Python38\lib\site-packages\urllib3\packages\six.py", line 734, in reraise
raise value.with_traceback(tb)
File "G:\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 670, in urlopen
httplib_response = self._make_request(
File "G:\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 381, in _make_request
self._validate_conn(conn)
File "G:\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 976, in _validate_conn
conn.connect()
File "G:\Python\Python38\lib\site-packages\urllib3\connection.py", line 361, in connect
self.sock = ssl_wrap_socket(
File "G:\Python\Python38\lib\site-packages\urllib3\util\ssl_.py", line 377, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "G:\Python\Python38\lib\ssl.py", line 500, in wrap_socket
return self.sslsocket_class._create(
File "G:\Python\Python38\lib\ssl.py", line 1040, in _create
self.do_handshake()
File "G:\Python\Python38\lib\ssl.py", line 1309, in do_handshake
self._sslobj.do_handshake()
urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "G:\Python\Python38\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "G:\Python\Python38\lib\site-packages\requests\sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "G:\Python\Python38\lib\site-packages\requests\sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "G:\Python\Python38\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None))
Process finished with exit code 1
参考博客: https://www.cnblogs.com/chen0307/articles/9679139.html
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。