代码如下:
from urllib.request import urlopen
from urllib.request import Request
from urllib import parse
from bs4 import BeautifulSoup as bs
import re
# req = Request('http://www.baidu.com')
req = Request('https://www.csdn.net/')
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36')
resp = urlopen(req)
html_doc = resp.read().decode('utf-8')
# html_doc = '<html><head><title>哈哈哈哈哈</title></head><body></body></html>'
soup = bs(html_doc,'xml')
# print(soup.title.string)
# for link in soup.findAll('a'):
# print(link.string)
**links = soup.findAll('a',href=re.compile("^(https://www.csdn.net/)")**
for link in links:
if re.search("^(_blank)$",link['target'])
print(link.get_text())
报错信息:
加※行语法错误,请大神看看哪里写的不对,在线等### 问题描述
问题出现的环境背景及自己尝试过哪些方法
相关代码
// 请把代码文本粘贴到下方(请勿用图片代替代码)
links = soup.findAll('a',href=re.compile("^(https://www.csdn.net/)")
结尾少了个括号