136 lines
4.3 KiB
Python
136 lines
4.3 KiB
Python
import json
|
|
import requests
|
|
import os
|
|
import time
|
|
import random
|
|
import shutil
|
|
from urllib.parse import urlparse
|
|
|
|
# Variables para configurar el modo de funcionamiento del programa
|
|
json_file = r"/home/sergio/zx/zxart/picture.json"
|
|
destination_path = r"/home/sergio/zx/zxart/pictures/"
|
|
cache_path = r"/home/sergio/zx/zxart/cache/pictures/"
|
|
wait = False # Establece una pausa aleatoria entre descargas
|
|
min_wait = 1 # Segundos mínimos a esperar entre descargas
|
|
max_wait = 3 # Segundos máximos a esperar entre descargas
|
|
tags = [
|
|
"Loading Screen",
|
|
"Game",
|
|
] # Tags de las imagenes seleccionadas. Vacío para todas
|
|
|
|
|
|
# Obtiene la lista de direcciones desde un fichero json
|
|
def get_urls():
|
|
urls = []
|
|
|
|
# Abre el fichero json y lo importa en un diccionario
|
|
f = open(json_file)
|
|
data = json.load(f)
|
|
|
|
# Procesa el diccionario para obtener la lista de direcciones
|
|
# Se distingue el caso de descargar todas las imagenes o solo las que tienen ciertas etiquetas
|
|
if len(tags) > 0:
|
|
for i in data["zxPicture"]:
|
|
if "tags" in i:
|
|
for tag in tags:
|
|
if tag in i["tags"]:
|
|
if "originalUrl" in i:
|
|
if i["originalUrl"][-3:] == "scr":
|
|
urls.append(i["originalUrl"])
|
|
else:
|
|
for i in data["zxPicture"]:
|
|
if "originalUrl" in i:
|
|
if i["originalUrl"][-3:] == "scr":
|
|
urls.append(i["originalUrl"])
|
|
|
|
# Elimina los direcciones duplicadas
|
|
urls = list(dict.fromkeys(urls))
|
|
|
|
# Ordena la lista de direcciones
|
|
urls.sort()
|
|
|
|
# Cierra el fichero
|
|
f.close()
|
|
|
|
# Devuelve el resultado
|
|
return urls
|
|
|
|
|
|
# Obtiene el nombre del fichero a partir de una url completa
|
|
def url_filename(url):
|
|
pos = url.rfind("/") + 1
|
|
filename = url[pos:]
|
|
filename = filename.replace("filename:", "pic_")
|
|
return filename
|
|
|
|
|
|
# Descarga un fichero desde una url a un destino específico
|
|
def download_file(url, dest):
|
|
try:
|
|
r = requests.get(url)
|
|
with open(dest, "wb") as f:
|
|
f.write(r.content)
|
|
|
|
except requests.exceptions.Timeout:
|
|
# Maybe set up for a retry, or continue in a retry loop
|
|
print("Timeout: {}".format(url))
|
|
|
|
except requests.exceptions.TooManyRedirects:
|
|
# Tell the user their URL was bad and try a different one
|
|
print("Bad URL: {}".format(url))
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
# catastrophic error. bail.
|
|
raise SystemExit(e)
|
|
|
|
|
|
# Descarga los ficheros a partir de una lista de direcciones
|
|
def get_files(urls):
|
|
count = 0
|
|
total = len(urls)
|
|
for url in urls:
|
|
count = count + 1
|
|
downloaded_file = url_filename(url)
|
|
destination_file = os.path.join(destination_path, downloaded_file)
|
|
cache_file = os.path.join(cache_path, downloaded_file)
|
|
# Comprueba si el fichero existe en el destino
|
|
if not os.path.isfile(destination_file):
|
|
# Si no existe, comprueba si existe en la caché
|
|
if os.path.isfile(cache_file):
|
|
shutil.copyfile(cache_file, destination_file)
|
|
print(
|
|
"cached : {:{width}} ({} / {})".format(
|
|
downloaded_file, count, total, width=50
|
|
)
|
|
)
|
|
# Si no está en la caché, lo descarga a la caché
|
|
else:
|
|
download_file(url, cache_file)
|
|
# Si la ha descargado a la caché, la copia al destino
|
|
if os.path.isfile(cache_file):
|
|
shutil.copyfile(cache_file, destination_file)
|
|
# download_file(url, destination_file)
|
|
print(
|
|
"downloaded : {:{width}} ({} / {})".format(
|
|
downloaded_file, count, total, width=50
|
|
)
|
|
)
|
|
if wait:
|
|
time.sleep(random.randint(min_wait, max_wait))
|
|
# Si el fichero ya existe, no hace nada
|
|
else:
|
|
print(
|
|
"skipping : {:{width}} ({} / {})".format(
|
|
downloaded_file, count, total, width=50
|
|
)
|
|
)
|
|
|
|
|
|
def main():
|
|
urls = get_urls()
|
|
get_files(urls)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|