整理一下以前写的代码, 发现一个小工具, 下载高清桌面图片, 使用的是多线程调用wget方式下载
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | import re import os import requests from threading import Thread # 请求头 def request_head(site): host = site.split( "/" )[ 2 ] heads = { "Content-Type" : "text/html" , "Accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3" , "Accept-Encoding" : "gzip, deflate, br" , "Accept-Language" : "en-US,en;q=0.9" , "Host" : host, "Referer" : site, "User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36" , } return heads # 发起请求 def request_spider(url): try : res = requests.get(url = url, headers = request_head(url)) if res.status_code = = 200 : res.encoding = "utf-8" # print(url) return res.text except Exception as e: print ( "发起请求出错:" , e) def dowload_spider(url): file_path = os.path.dirname(os.path.abspath(__file__)) res = request_spider(url) img_list = re.findall(r 'data-progressive="(.*?)"' , res) if os.path.exists(f '{file_path}/images' ): os.chdir(f '{file_path}/images' ) else : os.mkdir(f '{file_path}/images' ) os.chdir(f '{file_path}/images' ) for item in img_list: if not os.path.exists(item.rsplit( '/' )[ - 1 ]): os.system(f 'wget {item}' ) if __name__ = = '__main__' : works = [] for page in range ( 1 , 102 ): thread = Thread(target = dowload_spider, args = (url. format (page),)) thread.start() works.append(thread) for work in works: work.join() print ( "程序完成!" ) |