import requests, time
from threading import Thread
class MyThread(Thread):
def __init__(self, url):
Thread.__init__(self)
self.url = url
def run(self):
open_url(self.url)
def open_url(url):
r = requests.get(url[:-1])
print(r.status_code)
print(url)
# return url
if __name__ == '__main__':
with open("E:/all_domain.txt",'r') as f:
t_start = time.time()
threads = []
for url in f:
t = MyThread(url)
threads.append(t)
t.start()
print(len(threads))
for i in threads:
i.join()
t_end = time.time()
print('the thread way take %s s' % (t_end - t_start))
上面是我写的一个多进程。f文件里面大概有18万条数据,执行多线程的时候难道打开18万个线程吗?要怎么改写控制线程数量?譬如说先100个线程,100个线程结束后在开100个线程。
线程是在一个cpu核心里面运行,有办法做到多核心多线程吗?要如何改写呢?
谢谢!
在开头加
threadmax = threading.BoundedSemaphore(100)
在open_url方法最后一行加入
threadmax.release()
在for url in f:和第一行t = MyThread(url)之间加入
threadmax.acquire()