from bs4 import BeautifulSoup
def getHtml(url):
import urllib
import urllib.request
print("第二种方法")
request = urllib.request.Request(url)
request.add_header("user-agent", "Mozilla/5.0")
response2 = urllib.request.urlopen(url)
print(response2.getcode())
# print(response2)
# response2.read()
# print(len(response2.read()))
data=response2.read().decode("utf-8")
return data
def urllist():
baseurl="http://www.sanye.cx/?cate=69"
lists=[]
for x in range(1,11):
url=baseurl+"&page="+str(x)
lists.append(url)
# lists.reverse()
return lists
def logtext(content):
f=open('sanye.md','a+',encoding="utf-8")
f.write(content)
f.write("\r\n")
def parsedata(data):
soup=BeautifulSoup(data,'html.parser')
print(soup.title.text)
div=soup.find(class_='list')
ul=div.find('ul')
lis=ul.find_all ('li')
for li in lis:
a=li.find('h2').find('a')
name=a.get_text()
print(name)
name="##"+name
logtext(name)
def run():
urllists=urllist()
print(urllists)
for url in urllists:
data=getHtml(url)
parsedata(data)
run()
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。