#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Tongwudi

import requests
from bs4 import BeautifulSoup
import time
import os
import urllib.request
import threading

start =time.clock()
urls=[]
#获取图片url,并存到urls数组中
def main():
    ad = ‘http://www.doutula.com/photo/list/?page=’
    for i in range(0,1470):
        url = ad + str(i)
        heads = {‘User-Agent’:’Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36′,
                 ‘Upgrade-Insecure-Requests’: ‘1’
                 }
        doutu = requests.get(url,headers=heads)
        cu = BeautifulSoup(doutu.content,”lxml”)
        jd = cu.find_all(‘img’,attrs={‘class’: ‘img-responsive lazy image_dta’})
        for link in cu.find_all(‘img’,attrs={‘class’: ‘img-responsive lazy image_dta’}):
            cu=link[‘data-original’]
            urls.append(cu)

#下载图片到指定路径
def xiazai(url):
    CU = url.split(‘/’)
    ha = CU.pop()
    path = os.path.join(‘images’, ha)
    print(“正在下载%s” %ha)
    urllib.request.urlretrieve(url,filename=path)

#多线程下载
def async_run():
    f = open(‘斗图.txt’, ‘w’)
    for url in urls:
        th = threading.Thread(target=xiazai, args=[url])
        th.start()
        time.sleep(0.2)
        f.write(url + ‘\n’)
    f.close()

if __name__ == ‘__main__’:
        main()
        print(urls)
        async_run()
        end = time.clock()
        print(‘运行时间为: %s 秒’ % (end – start))

发表评论

电子邮件地址不会被公开。 必填项已用*标注