Skip to content

Instantly share code, notes, and snippets.

@me-suzy
Created February 12, 2026 11:55
Show Gist options
  • Select an option

  • Save me-suzy/1852f2f85d2b355427b5e32a55d88b47 to your computer and use it in GitHub Desktop.

Select an option

Save me-suzy/1852f2f85d2b355427b5e32a55d88b47 to your computer and use it in GitHub Desktop.
gaseste-html-care-lipsesc-din-categorii.py
# -*- coding: utf-8 -*-
r"""
Script care scaneaza folderul Principal\ro\ si subfolderele DESPRE, Python Files,
si gaseste fisierele HTML care NU sunt referite/linkate in niciun fisier de tip categorii.
"""
import os
import re
# Foldere de scanat
ROOT_FOLDER = r"e:\Carte\BB\17 - Site Leadership\Principal\ro"
SCAN_FOLDERS = [
ROOT_FOLDER,
os.path.join(ROOT_FOLDER, "DESPRE"),
os.path.join(ROOT_FOLDER, "Python Files"),
]
# Lista fisierelor de tip categorii (din folderul ro)
CATEGORY_FILES = [
"index.html",
"lideri-si-atitudine.html",
"leadership-magic.html",
"leadership-de-succes.html",
"hr-resurse-umane.html",
"legile-conducerii.html",
"leadership-total.html",
"leadership-de-durata.html",
"principiile-conducerii.html",
"leadership-plus.html",
"calitatile-unui-lider.html",
"leadership-de-varf.html",
"leadership-impact.html",
"dezvoltare-personala.html",
"aptitudini-si-abilitati-de-leadership.html",
"leadership-real.html",
"leadership-de-baza.html",
"leadership-360.html",
"leadership-pro.html",
"leadership-expert.html",
"leadership-know-how.html",
"jurnal-de-leadership.html",
"alpha-leadership.html",
"leadership-on-off.html",
"leadership-deluxe.html",
"leadership-xxl.html",
"leadership-50-extra.html",
"leadership-fusion.html",
"leadership-v8.html",
"leadership-x3-silver.html",
"leadership-q2-sensitive.html",
"leadership-t7-hybrid.html",
"leadership-n6-celsius.html",
"leadership-s4-quartz.html",
"leadership-gt-accent.html",
"leadership-fx-intensive.html",
"leadership-iq-light.html",
"leadership-7th-edition.html",
"leadership-xs-analytics.html",
"leadership-z3-extended.html",
"leadership-ex-elite.html",
"leadership-w3-integra.html",
"leadership-sx-experience.html",
"leadership-y5-superzoom.html",
"performance-ex-flash.html",
"leadership-mindware.html",
"leadership-r2-premiere.html",
"leadership-y4-titanium.html",
"leadership-quantum-xx.html",
"python-scripts-examples.html",
]
# Regex pentru href cu .html (local sau URL)
HREF_HTML_PATTERN = re.compile(r'href\s*=\s*["\']([^"\']*\.html[^"\']*)["\']', re.IGNORECASE)
def extract_html_refs_from_content(content):
"""Extrage toate referintele .html din continut (href)."""
refs = set()
for match in HREF_HTML_PATTERN.finditer(content):
url = match.group(1).strip()
# Extrage doar numele fisierului (partea din dreapta dupa /)
if "/" in url:
filename = url.split("/")[-1]
else:
filename = url
# Elimina #anchor
if "#" in filename:
filename = filename.split("#")[0]
if filename.lower().endswith(".html"):
refs.add(filename.lower())
return refs
def get_all_html_refs_from_category_files():
"""Citeste toate fisierele categorii si colecteaza toate referintele .html."""
all_refs = set()
category_paths = [os.path.join(ROOT_FOLDER, f) for f in CATEGORY_FILES]
for path in category_paths:
if not os.path.exists(path):
continue
try:
with open(path, "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
refs = extract_html_refs_from_content(content)
all_refs.update(refs)
except Exception as e:
print(f" EROARE citire {path}: {e}")
return all_refs
def get_all_html_files_in_scan_folders():
"""Returneaza lista tuturor fisierelor .html din folderele scanate."""
seen = set()
html_files = []
for folder in SCAN_FOLDERS:
if not os.path.exists(folder):
continue
for root, dirs, files in os.walk(folder):
for f in files:
if f.lower().endswith(".html"):
full_path = os.path.join(root, f)
if full_path in seen:
continue
seen.add(full_path)
rel_path = os.path.relpath(full_path, ROOT_FOLDER)
html_files.append((full_path, rel_path, f))
return html_files
def main():
print("=" * 70)
print("Gaseste fisiere HTML care NU sunt referite in niciun fisier categorii")
print("=" * 70)
print("\n1. Colectare referinte din fisierele categorii...")
refs_in_categories = get_all_html_refs_from_category_files()
print(f" Total referinte .html gasite in categorii: {len(refs_in_categories)}")
print("\n2. Scanare fisiere HTML din foldere...")
html_files = get_all_html_files_in_scan_folders()
print(f" Total fisiere .html gasite: {len(html_files)}")
print("\n3. Identificare fisiere NEREFERITE in categorii...")
not_referenced = []
for full_path, rel_path, filename in html_files:
if filename.lower() not in refs_in_categories:
not_referenced.append((full_path, rel_path, filename))
# Sortare dupa cale relativa
not_referenced.sort(key=lambda x: x[1].lower())
print(f"\n Rezultat: {len(not_referenced)} fisiere HTML nu sunt referite in niciun fisier categorii.")
print("\n" + "-" * 70)
if not_referenced:
for full_path, rel_path, filename in not_referenced:
print(f" {rel_path}")
else:
print(" (niciunul)")
print("\n" + "=" * 70)
print("Gata.")
print("=" * 70)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment