BrianAker · August 13, 2024 04:51
diff --git a/decode_series_name.pl b/decode_series_name.pl
 #!/usr/bin/perl
 use strict;
 use warnings;
 use File::Path qw(make_path);
 use File::Copy qw(move);

 # Script version number
 my $VERSION = "1.1.16-2024.08.12.00.00";

 # Initialize default minimum group size
 my $min_group_size = 2;
 my $create_shift = 0;  # Flag to determine if we should create directories and move files
 my $show_help = 0;     # Flag to determine if help should be shown
 my $debug_mode = 0;    # Flag to determine if debug information should be printed

 # Declare the global hash for storing series information
 my %series_groups;

 # Custom argument parsing to handle +N, -N, --create-shift, --help, and --debug options
 foreach my $arg (@ARGV) {
    if ($arg =~ /^\+(\d+)$/) {
        $min_group_size = $1;
    } elsif ($arg =~ /^-(\d+)$/) {
        $min_group_size = -$1;
    } elsif ($arg eq '--create-shift') {
        $create_shift = 1;
    } elsif ($arg eq '--help') {
        $show_help = 1;
    } elsif ($arg eq '--debug') {
        $debug_mode = 1;
    }
 }

 # Remove the custom options from @ARGV
 @ARGV = grep { !/^[+-]\d+$/ && $_ ne '--create-shift' && $_ ne '--help' && $_ ne '--debug' } @ARGV;

 # Function to print help message
 sub print_help {
    print <<"END_HELP";
 Usage: $0 [options] *.cbz

 Options:
  +N              Only print groups with N or more files.
  -N              Only print groups with fewer than N files.
  --create-shift  Create directories based on the series name and move files into them.
  --debug         Print debug information such as Resolution, ScanInformation, Publisher, Title, Volume, Chapter, Publishing Date, and LanguageISO.
  --help          Display this help message.

 Version: $VERSION
 END_HELP
 }

 # Show help if no arguments are provided or if --help is used
 if ($show_help || !@ARGV) {
    print_help();
    exit;
 }

 # Function to extract the series name, title, publisher, ScanInformation, LanguageISO, volume, chapter, and publishing date from a cbz filename
 sub extract_series_name {
    my ($filename, $info) = @_;

    # Extract and remove ScanInformation (e.g., [ScanGroup]) at the start of the filename
    if ($filename =~ s/^\[([^\]]+)\]\s*//) {
        $info->{scan_info} = $1;
    }

    # Remove the file extension (.cbz)
    $filename =~ s/\.cbz$//i;

    # Extract and remove resolution info (e.g., (x3200))
    if ($filename =~ s/\(x(\d+)\)//) {
        $info->{resolution} = $1;
    }

    # Extract and remove the exact string "ENGLISH" surrounded by [] or () and store in LanguageISO
    if ($filename =~ s/[\[\(]ENGLISH[\]\)]//i) {
        $info->{language_iso} = "EN";
    }

    # Extract and remove (PNG) or [PNG] in any case and store in format
    if ($filename =~ s/[\(\[]png[\)\]]//i) {
        $info->{format} = "PNG";
    }
    # Extract and remove publisher information anchored at the end of the string (either [] or ())
    if ($filename =~ s/\s*[\[\(]([^\]\)]+)[\]\)]\s*$//) {
        $info->{publisher_info} = $1;
    }

    # Extract and remove volume information (e.g., Vol. 5, Vol. 2.0, v02, v02.5)
    if ($filename =~ s/\b(?:Volume|Vol|Vol\.)\s*(\d+(\.\d+)?|v0{0,4}(\d{1,5}(?:\.\d+)?))\b//i) {
        $info->{volume} = $3 || $1;  # Store only the numeric part
    } elsif ($filename =~ s/\bv0{0,4}(\d{1,5}(\.\d+)?)\b//i) {
        $info->{volume} = $1;  # Store only the numeric part
    }

    # Remove chapter/episode/operation information before extracting the title
    if ($filename =~ s/\b(?:Chapter|Ch\.?|Part|Ep|Ep\.|Episode|Op|Op\.)\s*(\d+|EX\d+)\b//i) {
        $info->{chapter} = $1;
    } elsif ($filename =~ s/\b(EX\d+)\b//i) {
        $info->{chapter} = $1;
    }

    # Going from right to left, look for a title after " - " and remove it from the series name
    if ($filename =~ s/\s-\s(.+)$//) {  # Match " - " and capture everything after it
        $info->{title} = $1;
    }

    # If the title is still empty and the filename does not start with "re:" (case-insensitive)
    if (!$info->{title} && $filename !~ /^re꞉/i && $filename =~ s/꞉\s(.+)$//) {
        $info->{title} = $1;
    }

    # If the title is still empty, look for titles surrounded by ~ ~
    if (!$info->{title} && $filename =~ s/~([^~]+)~//) {
        $info->{title} = $1;
    }

    # Extract and remove publishing year/month (e.g., 2024-02)
    if ($filename =~ s/\b(20\d{2}-\d{2})\b$//) {
        $info->{publishing_date} = $1;
    }

    # Assign the cleaned filename to series_name
    $info->{series_name} = $filename;

    # Trim trailing spaces
    $info->{series_name} =~ s/\s+$//;
 }

 # Main script to decode series names from multiple cbz filenames
 foreach my $filename (@ARGV) {
    # Check if the provided file has a .cbz extension
    if ($filename =~ /\.cbz$/i) {
        my %info;
        extract_series_name($filename, \%info);
        my $normalized_name = uc($info{series_name});  # Convert series name to uppercase for comparison

        # Store the original series name and its corresponding file information
        push @{$series_groups{$normalized_name}{files}}, {
            filename => $filename,
            %info,
        };

        # Increment the count for this original series name
        $series_groups{$normalized_name}{count}{$info{series_name}}++;
    } else {
        print "Warning: '$filename' is not a .cbz file. Skipping...\n";
    }
 }

 # Determine the most common original series name for each group
 foreach my $normalized_name (keys %series_groups) {
    my $most_common_name = (sort { $series_groups{$normalized_name}{count}{$b} <=> $series_groups{$normalized_name}{count}{$a} } keys %{$series_groups{$normalized_name}{count}})[0];
    $series_groups{$normalized_name}{most_common_name} = $most_common_name;
 }

 # Sort series names and process the files based on the min_group_size
 foreach my $normalized_name (sort keys %series_groups) {
    my @files = @{$series_groups{$normalized_name}{files}};
    next if ($min_group_size > 0 && @files < $min_group_size);   # Skip groups with fewer than the specified minimum number of files
    next if ($min_group_size < 0 && @files >= abs($min_group_size));  # Skip groups with equal to or more than the specified negative minimum number of files

    my $most_common_name = $series_groups{$normalized_name}{most_common_name};

    print "Series: $most_common_name\n";
    foreach my $file_info (@files) {
        my $file = $file_info->{filename};
        my $resolution = $file_info->{resolution};
        my $scan_info = $file_info->{scan_info};
        my $publisher_info = $file_info->{publisher_info};
        my $title = $file_info->{title};
        my $volume = $file_info->{volume};
        my $chapter = $file_info->{chapter};
        my $publishing_date = $file_info->{publishing_date};
        my $language_iso = $file_info->{language_iso};

        print "  File: $file\n";

        if ($debug_mode) {
            print "    Resolution: $resolution\n" if $resolution;
            print "    ScanInformation: $scan_info\n" if $scan_info;
            print "    Publisher: $publisher_info\n" if $publisher_info;
            print "    Title: $title\n" if $title;
            print "    Volume: $volume\n" if $volume;
            print "    Chapter: $chapter\n" if $chapter;
            print "    Publishing Date: $publishing_date\n" if $publishing_date;
            print "    LanguageISO: $language_iso\n" if $language_iso;
        }
    }
    print "\n";

    # If --create-shift is specified, create the directory and move the files
    if ($create_shift) {
        my $dir_name = $most_common_name;  # Use the most common series name directly as the directory name
        unless (-d $dir_name) {
            make_path($dir_name) or die "Failed to create directory '$dir_name': $!";
        }
        foreach my $file_info (@files) {
            my $file = $file_info->{filename};
            move($file, "$dir_name/") or die "Failed to move file '$file' to '$dir_name': $!";
        }
        print "Moved files to directory: $dir_name\n";
    }
 }
	#!/usr/bin/perl
	use strict;
	use warnings;
	use File::Path qw(make_path);
	use File::Copy qw(move);

	# Script version number
	my $VERSION = "1.1.16-2024.08.12.00.00";

	# Initialize default minimum group size
	my $min_group_size = 2;
	my $create_shift = 0; # Flag to determine if we should create directories and move files
	my $show_help = 0; # Flag to determine if help should be shown
	my $debug_mode = 0; # Flag to determine if debug information should be printed

	# Declare the global hash for storing series information
	my %series_groups;

	# Custom argument parsing to handle +N, -N, --create-shift, --help, and --debug options
	foreach my $arg (@ARGV) {
	if ($arg =~ /^\+(\d+)$/) {
	$min_group_size = $1;
	} elsif ($arg =~ /^-(\d+)$/) {
	$min_group_size = -$1;
	} elsif ($arg eq '--create-shift') {
	$create_shift = 1;
	} elsif ($arg eq '--help') {
	$show_help = 1;
	} elsif ($arg eq '--debug') {
	$debug_mode = 1;
	}
	}

	# Remove the custom options from @ARGV
	@ARGV = grep { !/^[+-]\d+$/ && $_ ne '--create-shift' && $_ ne '--help' && $_ ne '--debug' } @ARGV;

	# Function to print help message
	sub print_help {
	print <<"END_HELP";
	Usage: $0 [options] *.cbz

	Options:
	+N Only print groups with N or more files.
	-N Only print groups with fewer than N files.
	--create-shift Create directories based on the series name and move files into them.
	--debug Print debug information such as Resolution, ScanInformation, Publisher, Title, Volume, Chapter, Publishing Date, and LanguageISO.
	--help Display this help message.

	Version: $VERSION
	END_HELP
	}

	# Show help if no arguments are provided or if --help is used
	if ($show_help \|\| !@ARGV) {
	print_help();
	exit;
	}

	# Function to extract the series name, title, publisher, ScanInformation, LanguageISO, volume, chapter, and publishing date from a cbz filename
	sub extract_series_name {
	my ($filename, $info) = @_;

	# Extract and remove ScanInformation (e.g., [ScanGroup]) at the start of the filename
	if ($filename =~ s/^\[([^\]]+)\]\s*//) {
	$info->{scan_info} = $1;
	}

	# Remove the file extension (.cbz)
	$filename =~ s/\.cbz$//i;

	# Extract and remove resolution info (e.g., (x3200))
	if ($filename =~ s/\(x(\d+)\)//) {
	$info->{resolution} = $1;
	}

	# Extract and remove the exact string "ENGLISH" surrounded by [] or () and store in LanguageISO
	if ($filename =~ s/[\[\(]ENGLISH[\]\)]//i) {
	$info->{language_iso} = "EN";
	}

	# Extract and remove (PNG) or [PNG] in any case and store in format
	if ($filename =~ s/[\(\[]png[\)\]]//i) {
	$info->{format} = "PNG";
	}
	# Extract and remove publisher information anchored at the end of the string (either [] or ())
	if ($filename =~ s/\s[\[\(]([^\]\)]+)[\]\)]\s$//) {
	$info->{publisher_info} = $1;
	}

	# Extract and remove volume information (e.g., Vol. 5, Vol. 2.0, v02, v02.5)
	if ($filename =~ s/\b(?:Volume\|Vol\|Vol\.)\s*(\d+(\.\d+)?\|v0{0,4}(\d{1,5}(?:\.\d+)?))\b//i) {
	$info->{volume} = $3 \|\| $1; # Store only the numeric part
	} elsif ($filename =~ s/\bv0{0,4}(\d{1,5}(\.\d+)?)\b//i) {
	$info->{volume} = $1; # Store only the numeric part
	}

	# Remove chapter/episode/operation information before extracting the title
	if ($filename =~ s/\b(?:Chapter\|Ch\.?\|Part\|Ep\|Ep\.\|Episode\|Op\|Op\.)\s*(\d+\|EX\d+)\b//i) {
	$info->{chapter} = $1;
	} elsif ($filename =~ s/\b(EX\d+)\b//i) {
	$info->{chapter} = $1;
	}

	# Going from right to left, look for a title after " - " and remove it from the series name
	if ($filename =~ s/\s-\s(.+)$//) { # Match " - " and capture everything after it
	$info->{title} = $1;
	}

	# If the title is still empty and the filename does not start with "re:" (case-insensitive)
	if (!$info->{title} && $filename !~ /^re꞉/i && $filename =~ s/꞉\s(.+)$//) {
	$info->{title} = $1;
	}

	# If the title is still empty, look for titles surrounded by ~ ~
	if (!$info->{title} && $filename =~ s/~([^~]+)~//) {
	$info->{title} = $1;
	}

	# Extract and remove publishing year/month (e.g., 2024-02)
	if ($filename =~ s/\b(20\d{2}-\d{2})\b$//) {
	$info->{publishing_date} = $1;
	}

	# Assign the cleaned filename to series_name
	$info->{series_name} = $filename;

	# Trim trailing spaces
	$info->{series_name} =~ s/\s+$//;
	}

	# Main script to decode series names from multiple cbz filenames
	foreach my $filename (@ARGV) {
	# Check if the provided file has a .cbz extension
	if ($filename =~ /\.cbz$/i) {
	my %info;
	extract_series_name($filename, \%info);
	my $normalized_name = uc($info{series_name}); # Convert series name to uppercase for comparison

	# Store the original series name and its corresponding file information
	push @{$series_groups{$normalized_name}{files}}, {
	filename => $filename,
	%info,
	};

	# Increment the count for this original series name
	$series_groups{$normalized_name}{count}{$info{series_name}}++;
	} else {
	print "Warning: '$filename' is not a .cbz file. Skipping...\n";
	}
	}

	# Determine the most common original series name for each group
	foreach my $normalized_name (keys %series_groups) {
	my $most_common_name = (sort { $series_groups{$normalized_name}{count}{$b} <=> $series_groups{$normalized_name}{count}{$a} } keys %{$series_groups{$normalized_name}{count}})[0];
	$series_groups{$normalized_name}{most_common_name} = $most_common_name;
	}

	# Sort series names and process the files based on the min_group_size
	foreach my $normalized_name (sort keys %series_groups) {
	my @files = @{$series_groups{$normalized_name}{files}};
	next if ($min_group_size > 0 && @files < $min_group_size); # Skip groups with fewer than the specified minimum number of files
	next if ($min_group_size < 0 && @files >= abs($min_group_size)); # Skip groups with equal to or more than the specified negative minimum number of files

	my $most_common_name = $series_groups{$normalized_name}{most_common_name};

	print "Series: $most_common_name\n";
	foreach my $file_info (@files) {
	my $file = $file_info->{filename};
	my $resolution = $file_info->{resolution};
	my $scan_info = $file_info->{scan_info};
	my $publisher_info = $file_info->{publisher_info};
	my $title = $file_info->{title};
	my $volume = $file_info->{volume};
	my $chapter = $file_info->{chapter};
	my $publishing_date = $file_info->{publishing_date};
	my $language_iso = $file_info->{language_iso};

	print " File: $file\n";

	if ($debug_mode) {
	print " Resolution: $resolution\n" if $resolution;
	print " ScanInformation: $scan_info\n" if $scan_info;
	print " Publisher: $publisher_info\n" if $publisher_info;
	print " Title: $title\n" if $title;
	print " Volume: $volume\n" if $volume;
	print " Chapter: $chapter\n" if $chapter;
	print " Publishing Date: $publishing_date\n" if $publishing_date;
	print " LanguageISO: $language_iso\n" if $language_iso;
	}
	}
	print "\n";

	# If --create-shift is specified, create the directory and move the files
	if ($create_shift) {
	my $dir_name = $most_common_name; # Use the most common series name directly as the directory name
	unless (-d $dir_name) {
	make_path($dir_name) or die "Failed to create directory '$dir_name': $!";
	}
	foreach my $file_info (@files) {
	my $file = $file_info->{filename};
	move($file, "$dir_name/") or die "Failed to move file '$file' to '$dir_name': $!";
	}
	print "Moved files to directory: $dir_name\n";
	}
	}
No results found