Reubicados los archivos en carpetas

2023-10-28 13:24:05 +02:00
parent e8efb7c4d0
commit 7c7bca5667
45 changed files with 93 additions and 80 deletions
@@ -0,0 +1,135 @@
+import json
+import requests
+import os
+import time
+import random
+import shutil
+from urllib.parse import urlparse
+
+# Variables para configurar el modo de funcionamiento del programa
+json_file = r"/home/sergio/zx/zxart/picture.json"
+destination_path = r"/home/sergio/zx/zxart/pictures/"
+cache_path = r"/home/sergio/zx/zxart/cache/pictures/"
+wait = False  # Establece una pausa aleatoria entre descargas
+min_wait = 1  # Segundos mínimos a esperar entre descargas
+max_wait = 3  # Segundos máximos a esperar entre descargas
+tags = [
+    "Loading Screen",
+    "Game",
+]  # Tags de las imagenes seleccionadas. Vacío para todas
+
+
+# Obtiene la lista de direcciones desde un fichero json
+def get_urls():
+    urls = []
+
+    # Abre el fichero json y lo importa en un diccionario
+    f = open(json_file)
+    data = json.load(f)
+
+    # Procesa el diccionario para obtener la lista de direcciones
+    # Se distingue el caso de descargar todas las imagenes o solo las que tienen ciertas etiquetas
+    if len(tags) > 0:
+        for i in data["zxPicture"]:
+            if "tags" in i:
+                for tag in tags:
+                    if tag in i["tags"]:
+                        if "originalUrl" in i:
+                            if i["originalUrl"][-3:] == "scr":
+                                urls.append(i["originalUrl"])
+    else:
+        for i in data["zxPicture"]:
+            if "originalUrl" in i:
+                if i["originalUrl"][-3:] == "scr":
+                    urls.append(i["originalUrl"])
+
+    # Elimina los direcciones duplicadas
+    urls = list(dict.fromkeys(urls))
+
+    # Ordena la lista de direcciones
+    urls.sort()
+
+    # Cierra el fichero
+    f.close()
+
+    # Devuelve el resultado
+    return urls
+
+
+# Obtiene el nombre del fichero a partir de una url completa
+def url_filename(url):
+    pos = url.rfind("/") + 1
+    filename = url[pos:]
+    filename = filename.replace("filename:", "pic_")
+    return filename
+
+
+# Descarga un fichero desde una url a un destino específico
+def download_file(url, dest):
+    try:
+        r = requests.get(url)
+        with open(dest, "wb") as f:
+            f.write(r.content)
+
+    except requests.exceptions.Timeout:
+        # Maybe set up for a retry, or continue in a retry loop
+        print("Timeout: {}".format(url))
+
+    except requests.exceptions.TooManyRedirects:
+        # Tell the user their URL was bad and try a different one
+        print("Bad URL: {}".format(url))
+
+    except requests.exceptions.RequestException as e:
+        # catastrophic error. bail.
+        raise SystemExit(e)
+
+
+# Descarga los ficheros a partir de una lista de direcciones
+def get_files(urls):
+    count = 0
+    total = len(urls)
+    for url in urls:
+        count = count + 1
+        downloaded_file = url_filename(url)
+        destination_file = os.path.join(destination_path, downloaded_file)
+        cache_file = os.path.join(cache_path, downloaded_file)
+        # Comprueba si el fichero existe en el destino
+        if not os.path.isfile(destination_file):
+            # Si no existe, comprueba si existe en la caché
+            if os.path.isfile(cache_file):
+                shutil.copyfile(cache_file, destination_file)
+                print(
+                    "cached     : {:{width}} ({} / {})".format(
+                        downloaded_file, count, total, width=50
+                    )
+                )
+            # Si no está en la caché, lo descarga a la caché
+            else:
+                download_file(url, cache_file)
+                # Si la ha descargado a la caché, la copia al destino
+                if os.path.isfile(cache_file):
+                    shutil.copyfile(cache_file, destination_file)
+                    # download_file(url, destination_file)
+                    print(
+                        "downloaded : {:{width}} ({} / {})".format(
+                            downloaded_file, count, total, width=50
+                        )
+                    )
+                    if wait:
+                        time.sleep(random.randint(min_wait, max_wait))
+        # Si el fichero ya existe, no hace nada
+        else:
+            print(
+                "skipping   : {:{width}} ({} / {})".format(
+                    downloaded_file, count, total, width=50
+                )
+            )
+
+
+def main():
+    urls = get_urls()
+    get_files(urls)
+
+
+if __name__ == "__main__":
+    main()