两个网站的图片都可以下载,但需要使用相对应的程序

打不开网站需要翻墙

网站1

TIPS

1、网站对图片添加了防盗链,于是,在获得了真正的图片地址后,需要在header里面添加referer来解决限制

效果图

py代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

'''
@author=lthero
'''
import re
import time
from bs4 import BeautifulSoup
import requests
import threading
import random
import string

headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/98.0.4758.102 Safari/537.36 '
,'referer':'',"cookie ":'ftwwwtuao8xyz=1; HstCfa4220059=1644384603290; HstCmu4220059=1644384603290; '
'c_ref_4220059=https%3A%2F%2Fwww.google.com%2F; timezone=8; HstCnv4220059=3; ftwwwtuao8xyz=1; '
'HstCns4220059=8; HstCla4220059=1646142002976; HstPn4220059=13; HstPt4220059=169'}


class myThread(threading.Thread):
def __init__(self, url, theNum, file_path):
threading.Thread.__init__(self)
self.url = url
self.file_path = file_path
self.theNum = theNum

def ranstr(self, num):
salt = ''.join(random.sample(string.ascii_letters + string.digits, num))
return salt

def open_url(self, url):
response = requests.get(url, headers)
return response.text

def run(self):
soup = BeautifulSoup(self.open_url(self.url + str(self.theNum)), 'lxml')
pattern = re.compile(r'https://.*[1-9]\d*\.jpg')
obj_images = soup.find_all(src=pattern)
each_download_url = pattern.findall(str(obj_images[0]))[0]
img = requests.get(each_download_url, headers=headers)
with open('%s/%s.jpg' % (self.file_path, self.ranstr(6)), 'wb') as f:
f.write(img.content)


# 分页加载的
class picDown():
def __init__(self, url, save_path):
headers['referer']=url
self.page = '?page='
self.url = url + self.page
self.save_path = save_path
for i in range(1, 35):
thread1 = myThread(self.url, i, self.save_path)
thread1.start()
time.sleep(0.2)


if __name__ == '__main__':
# 测试网址
# https://www.tuao8.xyz/post/1316.html
# https://www.tuao8.xyz/post/2254.html
picDown(url=input("输入网址:"),
save_path=input("输入要保存的文件夹位置: "))

打包的程序

网址:https://wwa.lanzouy.com/iw9M000v8ani

密码:dtrc

运行时,要输入的网址在上面中的“测试网址

或者浏览器直接访问网站,同网站的其它帖子也可以下载

文件夹位置一定要存在,否则出错

如果cookie不能用,自行替换吧……


网站2

不同于网站1,网站2的所有图片在同一个url里面

网站2只写了代码,没生成对应程序

运行时,要输入的网址在上面中的“测试网址

或者浏览器直接访问网站,同网站的其它帖子也可以下载

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import re
import time
from bs4 import BeautifulSoup
import requests
import threading
import random
import string

headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}


class myThread(threading.Thread):
def __init__(self, the_url, file_path):
threading.Thread.__init__(self)
self.url = the_url
self.file_path = file_path

def ranstr(self, num):
salt = ''.join(random.sample(string.ascii_letters + string.digits, num))
return salt

def run(self):
img = requests.get(self.url, headers=headers)
with open('%s/%s.jpg' % (self.file_path, self.ranstr(6)), 'wb') as f:
f.write(img.content)


class picDown():
def __init__(self, url, save_path):
self.url = url
self.url_images = []
self.save_path = save_path
soup = BeautifulSoup(self.open_url(), 'lxml')

pattern = re.compile(r'https://.*\.jpg')
obj_images = soup.find_all(href=pattern)
for i in obj_images:
each_url = pattern.findall(str(i))
self.url_images.append(each_url[0])
self.download()

def open_url(self):
response = requests.get(self.url, headers)
print(response.status_code)
return response.text

def download(self):
for each_url in self.url_images:
thread1 = myThread(each_url, self.save_path)
thread1.start()
time.sleep(0.1)


if __name__ == '__main__':
# 测试网址
# http://wushengguang.club/index.php/archives/%5BYuzuki%5D+E+Cup+Natural+Beauty+Girl+-+%2866P%29.html
picDown(url=input("输入网址:"),
save_path=input("输入要保存的文件夹位置: "))