me-suzy · December 29, 2025 17:24
diff --git a/CĂUTARE ÎN WAYBACK MACHINE.py b/CĂUTARE ÎN WAYBACK MACHINE.py
 import requests
 import sys
 import os
 from urllib.parse import quote, unquote

 def cauta_in_wayback(url_pattern):
    """Caută snapshot-uri în Wayback Machine"""

    print(f"\nCaut: {url_pattern}")

    cdx_url = f"https://web.archive.org/cdx/search/cdx?url={quote(url_pattern, safe='')}&output=json&fl=timestamp,original,statuscode,mimetype,length"

    try:
        response = requests.get(cdx_url, timeout=60)
        if response.status_code == 200:
            data = response.json()
            if len(data) > 1:
                return data[1:]  # Skip header
    except Exception as e:
        print(f"  Eroare: {e}")

    return []

 def descarca_din_wayback(timestamp, original_url, output_filename=None):
    """Descarcă fișierul din Wayback Machine"""

    # if_ = raw file, fără banner-ul Wayback
    wayback_url = f"https://web.archive.org/web/{timestamp}if_/{original_url}"

    print(f"\nDescarc de la:\n{wayback_url}")

    try:
        response = requests.get(wayback_url, timeout=300, stream=True)

        if response.status_code == 200:
            if not output_filename:
                output_filename = unquote(original_url.split("/")[-1])
                # Curăță numele de caractere invalide
                output_filename = "".join(c if c not in '<>:"/\\|?*' else '_' for c in output_filename)

            total_size = int(response.headers.get('content-length', 0))
            downloaded = 0

            with open(output_filename, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
                    downloaded += len(chunk)
                    if total_size > 0:
                        pct = (downloaded / total_size) * 100
                        print(f"\r  Progres: {pct:.1f}% ({downloaded:,} / {total_size:,} bytes)", end='')

            print(f"\n\n✓ SALVAT: {output_filename}")
            print(f"  Dimensiune: {os.path.getsize(output_filename):,} bytes")
            return True
        else:
            print(f"Eroare HTTP: {response.status_code}")
            return False

    except Exception as e:
        print(f"Eroare la descărcare: {e}")
        return False

 def recupereaza_de_pe_archive_org(item_name, filename=None):
    """
    Recuperează un fișier șters de pe archive.org

    item_name: numele item-ului (ex: matei-horia-enciclopedia-statelor-lumii)
    filename: numele fișierului (optional, ex: Matei Horia Enciclopedia statelor lumii.pdf)
    """

    print("=" * 70)
    print("RECUPERARE FIȘIER DIN WAYBACK MACHINE")
    print("=" * 70)
    print(f"\nItem: {item_name}")
    if filename:
        print(f"Fișier: {filename}")

    # Construiește pattern-uri posibile de URL-uri archive.org
    url_patterns = [
        f"archive.org/details/{item_name}*",
        f"archive.org/download/{item_name}*",
        f"ia800*.us.archive.org/*/{item_name}*",
        f"ia600*.us.archive.org/*/{item_name}*",
    ]

    if filename:
        encoded_filename = quote(filename)
        url_patterns.extend([
            f"archive.org/download/{item_name}/{encoded_filename}",
            f"archive.org/download/{item_name}/{filename}",
        ])

    all_snapshots = []

    print("\n" + "-" * 70)
    print("CĂUTARE ÎN WAYBACK MACHINE...")
    print("-" * 70)

    for pattern in url_patterns:
        results = cauta_in_wayback(pattern)
        if results:
            print(f"  → Găsite {len(results)} rezultate")
            all_snapshots.extend(results)

    if not all_snapshots:
        print("\n❌ Nu s-a găsit nimic în Wayback Machine.")
        print("\nÎncearcă să cauți manual:")
        print(f"  https://web.archive.org/web/*/archive.org/download/{item_name}/*")
        return False

    # Elimină duplicate și sortează
    unique_snapshots = {}
    for snap in all_snapshots:
        timestamp, original = snap[0], snap[1]
        status = snap[2] if len(snap) > 2 else "200"
        mimetype = snap[3] if len(snap) > 3 else ""
        size = snap[4] if len(snap) > 4 else "0"

        key = original
        if key not in unique_snapshots or timestamp > unique_snapshots[key][0]:
            unique_snapshots[key] = (timestamp, original, status, mimetype, size)

    snapshots = sorted(unique_snapshots.values(), key=lambda x: x[0], reverse=True)

    # Filtrează doar PDF-uri și status 200
    pdf_snapshots = [s for s in snapshots if s[2] == "200" and ('.pdf' in s[1].lower() or 'pdf' in s[3].lower())]

    print("\n" + "-" * 70)
    print(f"REZULTATE: {len(snapshots)} snapshot-uri totale, {len(pdf_snapshots)} PDF-uri valide")
    print("-" * 70)

    if pdf_snapshots:
        print("\nPDF-uri găsite:")
        for i, (ts, orig, status, mime, size) in enumerate(pdf_snapshots[:20], 1):
            date_str = f"{ts[:4]}-{ts[4:6]}-{ts[6:8]}"
            fname = unquote(orig.split("/")[-1])[:50]
            size_mb = f"{int(size)/1024/1024:.1f}MB" if size.isdigit() and int(size) > 0 else "?"
            print(f"  [{i}] {date_str} | {size_mb:>8} | {fname}")

        # Descarcă primul (cel mai recent)
        print("\n" + "-" * 70)
        ts, orig, _, _, _ = pdf_snapshots[0]
        print(f"Descarc cel mai recent snapshot ({ts[:4]}-{ts[4:6]}-{ts[6:8]})...")

        return descarca_din_wayback(ts, orig)

    else:
        # Afișează toate rezultatele
        print("\nToate snapshot-urile găsite:")
        for i, (ts, orig, status, mime, size) in enumerate(snapshots[:30], 1):
            date_str = f"{ts[:4]}-{ts[4:6]}-{ts[6:8]}"
            print(f"  [{i}] {date_str} | Status:{status} | {mime} | {orig[:60]}")

        print("\n⚠ Nu s-au găsit PDF-uri cu status 200.")
        print("Verifică manual URL-urile de mai sus.")
        return False


 if __name__ == "__main__":
    # ============================================
    # CONFIGURARE - Modifică aici datele tale:
    # ============================================

    ITEM_NAME = "matei-horia-enciclopedia-statelor-lumii"
    FILE_NAME = "Matei Horia Enciclopedia statelor lumii.pdf"

    # Sau varianta cu .pdf în item name dacă așa era:
    # ITEM_NAME = "matei-horia-enciclopedia-statelor-lumii.pdf"
    # FILE_NAME = None

    # ============================================

    recupereaza_de_pe_archive_org(ITEM_NAME, FILE_NAME)

    print("\n" + "=" * 70)
    input("Apasă Enter pentru a închide...")
	import requests
	import sys
	import os
	from urllib.parse import quote, unquote

	def cauta_in_wayback(url_pattern):
	"""Caută snapshot-uri în Wayback Machine"""

	print(f"\nCaut: {url_pattern}")

	cdx_url = f"https://web.archive.org/cdx/search/cdx?url={quote(url_pattern, safe='')}&output=json&fl=timestamp,original,statuscode,mimetype,length"

	try:
	response = requests.get(cdx_url, timeout=60)
	if response.status_code == 200:
	data = response.json()
	if len(data) > 1:
	return data[1:] # Skip header
	except Exception as e:
	print(f" Eroare: {e}")

	return []

	def descarca_din_wayback(timestamp, original_url, output_filename=None):
	"""Descarcă fișierul din Wayback Machine"""

	# if_ = raw file, fără banner-ul Wayback
	wayback_url = f"https://web.archive.org/web/{timestamp}if_/{original_url}"

	print(f"\nDescarc de la:\n{wayback_url}")

	try:
	response = requests.get(wayback_url, timeout=300, stream=True)

	if response.status_code == 200:
	if not output_filename:
	output_filename = unquote(original_url.split("/")[-1])
	# Curăță numele de caractere invalide
	output_filename = "".join(c if c not in '<>:"/\\\|?*' else '_' for c in output_filename)

	total_size = int(response.headers.get('content-length', 0))
	downloaded = 0

	with open(output_filename, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)
	downloaded += len(chunk)
	if total_size > 0:
	pct = (downloaded / total_size) * 100
	print(f"\r Progres: {pct:.1f}% ({downloaded:,} / {total_size:,} bytes)", end='')

	print(f"\n\n✓ SALVAT: {output_filename}")
	print(f" Dimensiune: {os.path.getsize(output_filename):,} bytes")
	return True
	else:
	print(f"Eroare HTTP: {response.status_code}")
	return False

	except Exception as e:
	print(f"Eroare la descărcare: {e}")
	return False

	def recupereaza_de_pe_archive_org(item_name, filename=None):
	"""
	Recuperează un fișier șters de pe archive.org

	item_name: numele item-ului (ex: matei-horia-enciclopedia-statelor-lumii)
	filename: numele fișierului (optional, ex: Matei Horia Enciclopedia statelor lumii.pdf)
	"""

	print("=" * 70)
	print("RECUPERARE FIȘIER DIN WAYBACK MACHINE")
	print("=" * 70)
	print(f"\nItem: {item_name}")
	if filename:
	print(f"Fișier: {filename}")

	# Construiește pattern-uri posibile de URL-uri archive.org
	url_patterns = [
	f"archive.org/details/{item_name}*",
	f"archive.org/download/{item_name}*",
	f"ia800.us.archive.org//{item_name}*",
	f"ia600.us.archive.org//{item_name}*",
	]

	if filename:
	encoded_filename = quote(filename)
	url_patterns.extend([
	f"archive.org/download/{item_name}/{encoded_filename}",
	f"archive.org/download/{item_name}/{filename}",
	])

	all_snapshots = []

	print("\n" + "-" * 70)
	print("CĂUTARE ÎN WAYBACK MACHINE...")
	print("-" * 70)

	for pattern in url_patterns:
	results = cauta_in_wayback(pattern)
	if results:
	print(f" → Găsite {len(results)} rezultate")
	all_snapshots.extend(results)

	if not all_snapshots:
	print("\n❌ Nu s-a găsit nimic în Wayback Machine.")
	print("\nÎncearcă să cauți manual:")
	print(f" https://web.archive.org/web//archive.org/download/{item_name}/")
	return False

	# Elimină duplicate și sortează
	unique_snapshots = {}
	for snap in all_snapshots:
	timestamp, original = snap[0], snap[1]
	status = snap[2] if len(snap) > 2 else "200"
	mimetype = snap[3] if len(snap) > 3 else ""
	size = snap[4] if len(snap) > 4 else "0"

	key = original
	if key not in unique_snapshots or timestamp > unique_snapshots[key][0]:
	unique_snapshots[key] = (timestamp, original, status, mimetype, size)

	snapshots = sorted(unique_snapshots.values(), key=lambda x: x[0], reverse=True)

	# Filtrează doar PDF-uri și status 200
	pdf_snapshots = [s for s in snapshots if s[2] == "200" and ('.pdf' in s[1].lower() or 'pdf' in s[3].lower())]

	print("\n" + "-" * 70)
	print(f"REZULTATE: {len(snapshots)} snapshot-uri totale, {len(pdf_snapshots)} PDF-uri valide")
	print("-" * 70)

	if pdf_snapshots:
	print("\nPDF-uri găsite:")
	for i, (ts, orig, status, mime, size) in enumerate(pdf_snapshots[:20], 1):
	date_str = f"{ts[:4]}-{ts[4:6]}-{ts[6:8]}"
	fname = unquote(orig.split("/")[-1])[:50]
	size_mb = f"{int(size)/1024/1024:.1f}MB" if size.isdigit() and int(size) > 0 else "?"
	print(f" [{i}] {date_str} \| {size_mb:>8} \| {fname}")

	# Descarcă primul (cel mai recent)
	print("\n" + "-" * 70)
	ts, orig, _, _, _ = pdf_snapshots[0]
	print(f"Descarc cel mai recent snapshot ({ts[:4]}-{ts[4:6]}-{ts[6:8]})...")

	return descarca_din_wayback(ts, orig)

	else:
	# Afișează toate rezultatele
	print("\nToate snapshot-urile găsite:")
	for i, (ts, orig, status, mime, size) in enumerate(snapshots[:30], 1):
	date_str = f"{ts[:4]}-{ts[4:6]}-{ts[6:8]}"
	print(f" [{i}] {date_str} \| Status:{status} \| {mime} \| {orig[:60]}")

	print("\n⚠ Nu s-au găsit PDF-uri cu status 200.")
	print("Verifică manual URL-urile de mai sus.")
	return False


	if __name__ == "__main__":
	# ============================================
	# CONFIGURARE - Modifică aici datele tale:
	# ============================================

	ITEM_NAME = "matei-horia-enciclopedia-statelor-lumii"
	FILE_NAME = "Matei Horia Enciclopedia statelor lumii.pdf"

	# Sau varianta cu .pdf în item name dacă așa era:
	# ITEM_NAME = "matei-horia-enciclopedia-statelor-lumii.pdf"
	# FILE_NAME = None

	# ============================================

	recupereaza_de_pe_archive_org(ITEM_NAME, FILE_NAME)

	print("\n" + "=" * 70)
	input("Apasă Enter pentru a închide...")
No results found