zxdb.py: la puta merda esta fa lo que vol en el request.get

2023-09-04 13:51:22 +02:00
parent e0576c7579
commit ee3fcad28d
1 changed files with 174 additions and 38 deletions
@@ -5,16 +5,29 @@ import mysql.connector
 import requests
 import time
 import random
+import zipfile
 import shutil
 from mysql.connector import errorcode
 from urllib.parse import urlparse
 from urllib.request import urlretrieve


-destination_path = r'/home/sergio/zx/loading_screens/'
-url_prefix = r'https://spectrumcomputing.co.uk/'
-cache_path = r'/home/sergio/zx/cache/'
-files = []
+url_prefix = [
+    r"https://spectrumcomputing.co.uk",
+    r"https://archive.org/download/World_of_Spectrum_June_2017_Mirror/World%20of%20Spectrum%20June%202017%20Mirror.zip/World%20of%20Spectrum%20June%202017%20Mirrorb",
+]
+destination_path = r"/home/sergio/zx/zxdb/games/"
+cache_path = r"/home/sergio/zx/zxdb/cache/games/"
+wait = False  # Establece una pausa aleatoria entre descargas
+min_wait = 3  # Segundos mínimos a esperar entre descargas
+max_wait = 5  # Segundos máximos a esperar entre descargas
+elements = []
+filetypes = [
+    "Tape image",
+    "Disk image",
+    "Snapshot image",
+    "POK pokes file",
+]  # Tipos de fichero que se guardan en la carpeta raíz del juego


 def select1(cursor):
@@ -30,7 +43,37 @@ def select2(cursor):
    query = "select file_link from downloads where filetype_id=1"
    cursor.execute(query)
    for file_link in cursor:
-        files.append(url_prefix + str(file_link)[3:-3])
+        elements.append(url_prefix[0] + str(file_link)[3:-3])
+
+
+def select3(cursor):
+    query = """
+        SELECT DISTINCT
+            e.title, l.name, r.release_year, d.file_link, f.text
+        FROM
+            ((((((publishers p
+                INNER JOIN entries e ON
+                    p.entry_id = e.id)
+                INNER JOIN labels l ON
+                    p.label_id = l.id)
+                INNER JOIN genretypes g ON
+                    e.genretype_id = g.id)
+                INNER JOIN downloads d ON
+                    e.id = d.entry_id)
+                INNER JOIN filetypes f ON
+                    d.filetype_id = f.id)                    
+                INNER JOIN releases r ON
+                    e.id = r.entry_id AND
+                    p.release_seq = r.release_seq)
+        WHERE
+            e.title = 'Afterburner' AND
+            r.release_seq = 0 AND
+            g.text like '%Game%'
+        ORDER BY
+            e.title;"""
+    cursor.execute(query)
+    for row in cursor:
+        elements.append(list(row))


 def connect():
@@ -45,7 +88,7 @@ def connect():
    try:
        connection = mysql.connector.connect(**config)
        cursor = connection.cursor()
-        select2(cursor)
+        select3(cursor)

    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
@@ -61,6 +104,15 @@ def connect():
            cursor.close()


+def add_prefix():
+    global elements
+    for i in range(len(elements)):
+        if elements[i][3].startswith("/zxdb"):
+            elements[i][3] = url_prefix[0] + str(elements[i][3])
+        elif elements[i][3].startswith("/pub"):
+            elements[i][3] = url_prefix[1] + str(elements[i][3][4:])
+
+
 def url_filename(url):
    parsed_url = urlparse(url)
    path = parsed_url.path
@@ -68,48 +120,132 @@ def url_filename(url):
    return filename


-def download_file(file, dest):
-    r = requests.get(file)
-    with open(dest, "wb") as f:
-        f.write(r.content)
+def download_file(url, dest):
+    try:
+        print("")
+        print("URL        : {}".format(url))
+        r = requests.get(url)
+        print("STATUS_CODE: {}".format(r.status_code))
+        print("RESPONSE   : {}".format(r.reason))
+        if r.status_code != 200:
+            return False
+        with open(dest, "wb") as f:
+            f.write(r.content)
+        return True
+
+    except requests.exceptions.Timeout:
+        # Maybe set up for a retry, or continue in a retry loop
+        print("Timeout: {}".format(url))
+
+    except requests.exceptions.TooManyRedirects:
+        # Tell the user their URL was bad and try a different one
+        print("Bad URL: {}".format(url))
+
+    except requests.exceptions.RequestException as e:
+        # catastrophic error. bail.
+        raise SystemExit(e)


-def download_all():
-    cont = 0
-    total = len(files)
-    for file in files:
-        cont = cont + 1
-        downloaded_file = url_filename(file)
-        destination_filename = os.path.join(destination_path, downloaded_file)
-        if not os.path.isfile(destination_filename):
-            download_file(file, destination_filename)
-            print("downloaded : {:{width}} ({} / {})".format(downloaded_file, cont, total, width=50))
-            time.sleep(random.randint(4, 8))
-        else:
-            print("skipping   : {:{width}} ({} / {})".format(downloaded_file, cont, total, width=50))
+def unzip_file(src, dst):
+    with zipfile.ZipFile(src, "r") as zip_ref:
+        zip_ref.extractall(dst)
+

 def get_files():
-    cont = 0
-    total = len(files)
-    for file in files:
-        cont = cont + 1
-        downloaded_file = url_filename(file)
-        destination_filename = os.path.join(destination_path, downloaded_file)
-        cache_filename = os.path.join(cache_path, downloaded_file)
-        if not os.path.isfile(destination_filename):
-            if os.path.isfile(cache_filename):
-                shutil.copyfile(cache_filename, destination_filename)
-                print("cached     : {:{width}} ({} / {})".format(downloaded_file, cont, total, width=50))
+    # Variables para la presentación en pantalla de la descarga
+    current_file = 0
+    total_files = len(elements)
+    last_game_folder = ""
+    for element in elements:
+        # Fichero a descargar
+        downloaded_file = url_filename(element[3])
+
+        # Carpeta del juego en destino y en caché
+        game_folder = element[0] + " (" + str(element[2]) + ")(" + element[1] + ")"
+        destination_folder = os.path.join(destination_path, game_folder)
+        if not os.path.isdir(destination_folder):
+            os.mkdir(destination_folder)
+        cache_folder = os.path.join(cache_path, game_folder)
+        if not os.path.isdir(cache_folder):
+            os.mkdir(cache_folder)
+
+        # Carpeta de tipo de fichero en destino y en caché
+        if element[4] not in filetypes:
+            filetype_folder = normalize_path(element[4])
+            destination_folder = os.path.join(
+                destination_path, game_folder, filetype_folder
+            )
+            if not os.path.isdir(destination_folder):
+                os.mkdir(destination_folder)
+            cache_folder = os.path.join(cache_path, game_folder, filetype_folder)
+            if not os.path.isdir(cache_folder):
+                os.mkdir(cache_folder)
+
+        # Ruta completa hasta el fichero de destino y de caché
+        destination_file = os.path.join(destination_folder, downloaded_file)
+        cache_file = os.path.join(cache_folder, downloaded_file)
+
+        # Actualiza las variables de presentación
+        current_file = current_file + 1
+
+        if game_folder != last_game_folder:
+            print("\n{}".format(game_folder))
+            last_game_folder = game_folder
+
+        # Comprueba si ya existe el fichero a descargar
+        if not os.path.isfile(destination_file):
+            # Comprueba si ya existe el fichero en la cache
+            if os.path.isfile(cache_file):
+                if cache_file.endswith(".zip"):
+                    unzip_file(cache_file, destination_folder)
+                else:
+                    shutil.copyfile(cache_file, destination_file)
+                print(
+                    "({:{width}} / {}) : cached     : {} ({})".format(
+                        current_file, total_files, downloaded_file, element[4], width=2
+                    )
+                )
+            # El fichero no está en la cache
            else:
-                download_file(file, destination_filename)
-                print("downloaded : {:{width}} ({} / {})".format(downloaded_file, cont, total, width=50))
-                time.sleep(random.randint(4, 8))
+                status = "not found "
+                if download_file(element[3], cache_file):
+                    status = "downloaded"
+                print(
+                    "({:{width}} / {}) : {} : {} ({})".format(
+                        current_file, total_files, status, downloaded_file, element[4], width=2
+                    )
+                )
+                if os.path.isfile(cache_file):
+                    if cache_file.endswith(".zip"):
+                        unzip_file(cache_file, destination_folder)
+                    else:
+                        shutil.copyfile(cache_file, destination_file)
+                if wait:
+                    time.sleep(random.randint(min_wait, max_wait))
+        # El fichero ya existe en el destino
        else:
-            print("skipping   : {:{width}} ({} / {})".format(downloaded_file, cont, total, width=50))
+            print(
+                "({:{width}} / {}) : skipping   : {} ({})".format(
+                    current_file, total_files, downloaded_file, element[4], width=2
+                )
+            )
+
+
+def normalize_path(path):
+    illegal_chars = ["<", ">", ":", '"', "/", "\\", "|", "?", "*"]
+    replace_with = "_"
+    for char in illegal_chars:
+        path = path.replace(char, replace_with)
+    return path


 def main():
    connect()
+    add_prefix()
+
+    #for element in elements:
+    #    print(element)
+
    get_files()