ph33nx · December 8, 2025 18:32
diff --git a/lrc_to_srt.py b/lrc_to_srt.py
 """
 LRC to SRT Subtitle Converter in Python  
 ======================================

 Author: ph33nx  
 GitHub: https://github.com/ph33nx  
 License: MIT

 Description:
 ------------
 A clean and efficient Python script that converts **LRC files**  
 (including *aligned_words.lrc* format) into **proper SRT subtitle files** for  
 Final Cut Pro, DaVinci Resolve, Premiere Pro, YouTube, or any video editor.

 This tool fixes messy LRC formats such as:
 - Double tags like `[00:23.37][Verse 1]`
 - Line labels (Verse/Chorus/Bridge)  
 - Multi-line word-by-word timestamps  
 - Isolated fragments  
 - Empty metadata lines  

 The script:
 - Preserves the main timestamp  
 - Removes labels (Verse, Chorus, etc.)
 - Merges scattered lyric fragments into full lines  
 - Generates clean, properly timed SRT entries  
 - Optimizes subtitles for lyric videos, karaoke videos, and music reels

 Perfect for:
 ------------
 ✓ Suno AI music creators  
 ✓ YouTube lyric video editors  
 ✓ FCPX editors needing auto-generated subtitles  
 ✓ Anyone converting LRC → SRT in Python  
 ✓ Musicians looking to auto-sync lyrics with audio  
 ✓ Karaoke creators  
 ✓ Video editors wanting stylable captions  

 SEO Keywords:
 -------------
 python lrc to srt converter, suno ai subtitles, subtitle generator python,
 lyrics to srt python script, lrc parser python, auto subtitle sync,
 convert aligned_words.lrc, suno.ai caption generator, lrc cleanup script,
 subtitle timing python, srt builder python, karaoke timing python

 Usage:
 ------
 python lrc_to_srt.py input.lrc output.srt

 This repository is MIT licensed — feel free to fork, modify, and use commercially.
 """

 import re
 import sys
 from datetime import timedelta

 def parse_timestamp(ts):
    m, s = ts.split(":")
    if "." in s:
        s, ms = s.split(".")
        ms = int(ms.ljust(3, "0"))
    else:
        ms = 0
    return timedelta(minutes=int(m), seconds=int(s), milliseconds=ms)

 def format_srt_timestamp(td):
    total_ms = int(td.total_seconds() * 1000)
    hours = total_ms // 3600000
    minutes = (total_ms % 3600000) // 60000
    seconds = (total_ms % 60000) // 1000
    ms = total_ms % 1000
    return f"{hours:02}:{minutes:02}:{seconds:02},{ms:03}"

 def lrc_to_srt(lrc_path, srt_path):
    # pattern for one timestamp + maybe junk after
    timestamp_only = re.compile(r"^\[(\d{2}:\d{2}\.\d+)\](.*)")
    timestamps = []
    lines = []

    with open(lrc_path, "r", encoding="utf-8") as f:
        raw = f.readlines()

    # Pass 1: clean the LRC (remove second tag, keep first)
    cleaned = []
    last_timestamp = None

    for line in raw:
        line = line.strip()
        if not line:
            continue

        # Case: double-tag line like [00:23.37][Verse 1]
        double = re.match(r"^\[(\d{2}:\d{2}\.\d+)\]\[.*?\]$", line)
        if double:
            ts = double.group(1)
            last_timestamp = ts
            continue  # next line should become lyric for this ts

        # Case: normal timestamp line
        m = timestamp_only.match(line)
        if m:
            ts, text = m.groups()
            text = text.strip()

            # If text like "[Verse]" or empty → ignore, wait for next lyric
            if text.startswith("[") and text.endswith("]"):
                last_timestamp = ts
                continue

            if text:
                cleaned.append((ts, text))
            else:
                last_timestamp = ts
            continue

        # Case: lyric-only line following a double-tag or label line
        if last_timestamp:
            cleaned.append((last_timestamp, line.strip()))
            last_timestamp = None
        else:
            continue

    # Now merge multi-word entries under same timestamp
    merged = {}
    for ts, text in cleaned:
        if ts not in merged:
            merged[ts] = text
        else:
            merged[ts] += " " + text

    ordered_ts = sorted(merged.keys(), key=lambda t: parse_timestamp(t))

    # Build SRT entries
    out_entries = []
    for i, ts in enumerate(ordered_ts):
        start = parse_timestamp(ts)
        if i < len(ordered_ts) - 1:
            end = parse_timestamp(ordered_ts[i+1]) - timedelta(milliseconds=20)
        else:
            end = start + timedelta(seconds=3)

        out_entries.append((start, end, merged[ts]))

    # Write SRT
    with open(srt_path, "w", encoding="utf-8") as out:
        for i, (start, end, text) in enumerate(out_entries, 1):
            out.write(f"{i}\n")
            out.write(f"{format_srt_timestamp(start)} --> {format_srt_timestamp(end)}\n")
            out.write(f"{text}\n\n")

    print(f"SRT created successfully: {srt_path}")

 if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python lrc_to_srt.py input.lrc output.srt")
        sys.exit(1)

    lrc_to_srt(sys.argv[1], sys.argv[2])
	"""
	LRC to SRT Subtitle Converter in Python
	======================================

	Author: ph33nx
	GitHub: https://github.com/ph33nx
	License: MIT

	Description:
	------------
	A clean and efficient Python script that converts LRC files
	(including aligned_words.lrc format) into proper SRT subtitle files for
	Final Cut Pro, DaVinci Resolve, Premiere Pro, YouTube, or any video editor.

	This tool fixes messy LRC formats such as:
	- Double tags like `[00:23.37][Verse 1]`
	- Line labels (Verse/Chorus/Bridge)
	- Multi-line word-by-word timestamps
	- Isolated fragments
	- Empty metadata lines

	The script:
	- Preserves the main timestamp
	- Removes labels (Verse, Chorus, etc.)
	- Merges scattered lyric fragments into full lines
	- Generates clean, properly timed SRT entries
	- Optimizes subtitles for lyric videos, karaoke videos, and music reels

	Perfect for:
	------------
	✓ Suno AI music creators
	✓ YouTube lyric video editors
	✓ FCPX editors needing auto-generated subtitles
	✓ Anyone converting LRC → SRT in Python
	✓ Musicians looking to auto-sync lyrics with audio
	✓ Karaoke creators
	✓ Video editors wanting stylable captions

	SEO Keywords:
	-------------
	python lrc to srt converter, suno ai subtitles, subtitle generator python,
	lyrics to srt python script, lrc parser python, auto subtitle sync,
	convert aligned_words.lrc, suno.ai caption generator, lrc cleanup script,
	subtitle timing python, srt builder python, karaoke timing python

	Usage:
	------
	python lrc_to_srt.py input.lrc output.srt

	This repository is MIT licensed — feel free to fork, modify, and use commercially.
	"""

	import re
	import sys
	from datetime import timedelta

	def parse_timestamp(ts):
	m, s = ts.split(":")
	if "." in s:
	s, ms = s.split(".")
	ms = int(ms.ljust(3, "0"))
	else:
	ms = 0
	return timedelta(minutes=int(m), seconds=int(s), milliseconds=ms)

	def format_srt_timestamp(td):
	total_ms = int(td.total_seconds() * 1000)
	hours = total_ms // 3600000
	minutes = (total_ms % 3600000) // 60000
	seconds = (total_ms % 60000) // 1000
	ms = total_ms % 1000
	return f"{hours:02}:{minutes:02}:{seconds:02},{ms:03}"

	def lrc_to_srt(lrc_path, srt_path):
	# pattern for one timestamp + maybe junk after
	timestamp_only = re.compile(r"^\[(\d{2}:\d{2}\.\d+)\](.*)")
	timestamps = []
	lines = []

	with open(lrc_path, "r", encoding="utf-8") as f:
	raw = f.readlines()

	# Pass 1: clean the LRC (remove second tag, keep first)
	cleaned = []
	last_timestamp = None

	for line in raw:
	line = line.strip()
	if not line:
	continue

	# Case: double-tag line like [00:23.37][Verse 1]
	double = re.match(r"^\[(\d{2}:\d{2}\.\d+)\]\[.*?\]$", line)
	if double:
	ts = double.group(1)
	last_timestamp = ts
	continue # next line should become lyric for this ts

	# Case: normal timestamp line
	m = timestamp_only.match(line)
	if m:
	ts, text = m.groups()
	text = text.strip()

	# If text like "[Verse]" or empty → ignore, wait for next lyric
	if text.startswith("[") and text.endswith("]"):
	last_timestamp = ts
	continue

	if text:
	cleaned.append((ts, text))
	else:
	last_timestamp = ts
	continue

	# Case: lyric-only line following a double-tag or label line
	if last_timestamp:
	cleaned.append((last_timestamp, line.strip()))
	last_timestamp = None
	else:
	continue

	# Now merge multi-word entries under same timestamp
	merged = {}
	for ts, text in cleaned:
	if ts not in merged:
	merged[ts] = text
	else:
	merged[ts] += " " + text

	ordered_ts = sorted(merged.keys(), key=lambda t: parse_timestamp(t))

	# Build SRT entries
	out_entries = []
	for i, ts in enumerate(ordered_ts):
	start = parse_timestamp(ts)
	if i < len(ordered_ts) - 1:
	end = parse_timestamp(ordered_ts[i+1]) - timedelta(milliseconds=20)
	else:
	end = start + timedelta(seconds=3)

	out_entries.append((start, end, merged[ts]))

	# Write SRT
	with open(srt_path, "w", encoding="utf-8") as out:
	for i, (start, end, text) in enumerate(out_entries, 1):
	out.write(f"{i}\n")
	out.write(f"{format_srt_timestamp(start)} --> {format_srt_timestamp(end)}\n")
	out.write(f"{text}\n\n")

	print(f"SRT created successfully: {srt_path}")

	if __name__ == "__main__":
	if len(sys.argv) != 3:
	print("Usage: python lrc_to_srt.py input.lrc output.srt")
	sys.exit(1)

	lrc_to_srt(sys.argv[1], sys.argv[2])