python爬虫时显示 [WinError 10061] 由于目标计算机积极拒绝,无法连接。
import csv
import random
import time
# import pandas as pd
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
plt.rcParams["font.sans-serif"] = ["SimHei"]
plt.rcParams["axes.unicode_minus"] = False
class Spider(object):
def __init__(self):
pass
@staticmethod
def get_data_and_save(city, year, month):
with open(f'{city}_数据.csv', 'w', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(
['日期', '质量等级', 'AQI指数', '当天AQI排名', 'PM2.5', 'PM10', 'So2', 'No2', 'Co', 'O3'])
if month < 10:
url = f'https://tianqihoubao.com/aqi/{city}-{year}0{month}.html'
else:
url = f'https://tianqihoubao.com/aqi/{city}-{year}{month}.html'
res = requests.get(url, verify=True).text
soup = BeautifulSoup(res, 'html.parser')
for attr in soup.find_all('tr')[1:]:
one_day_data = list()
for index in range(0, 10):
one_day_data.append(attr.find_all('td')[index].get_text().strip())
csv_writer.writerow(one_day_data)
def crawl_air_quality(self, years, months, cities):
for city in cities:
for year in years:
for month in months:
self.get_data_and_save(city, year, month)
time.sleep(2 + random.random())
print(city, year, month)
if __name__ == '__main__':
spider = Spider()
# 获取数据,保存数据
spider.crawl_air_quality([2022], [1, 2, 3], ['beijing', 'shanghai', 'guangzhou'])