import random
import re
import ctypes
import shutil
import os
import requests
from bs4 import BeautifulSoup
SOURCE = "https://en.wikipedia.org/wiki/Wikipedia:Featured_pictures/Artwork/Paintings"
BASE = []
IMAGE_SOURCES = []
HEADERS = {"User-Agent": "wikipedia-art-scraper/0.0.1 - https://basicoctopus.com"}
r = requests.get(SOURCE, headers=HEADERS)
soup = BeautifulSoup(r.content, "html.parser")
images = soup.find_all("a", "image")
for image in images:
BASE.append("https://en.wikipedia.org" + image.get("href"))
# for image in IMAGES:
# print(image)
base = random.choice(BASE)
print(base)
r = requests.get(base, headers=HEADERS)
soup = BeautifulSoup(r.content, "html.parser")
images = soup.find_all("a", "mw-thumbnail-link")
# for image in images:
# print(images)
last = images[-1]
print(last.get("href"))
uri = last.get("href")
uri = uri.replace("//", "https://")
r = requests.get(uri, stream=True, headers=HEADERS)
filename = uri.split("/")[-1]
re.sub("[^a-zA-Z]", "", filename)
print("test: %s", filename)
with open(filename, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
filepath = os.path.join(os.getcwd(), filename)
ctypes.windll.user32.SystemParametersInfoW(20, 0, filepath, 0)