joninvski · December 12, 2025 14:51
diff --git a/transform_csv.py b/transform_csv.py
 import csv
 import sys

 def transform_csv(input_file, output_file):
    """
    Transform CSV from wide format to long format.
    Converts column-based categories (Top, Middle, Bottom) into row-based format.
    """
    with open(input_file, 'r', encoding='utf-8') as f:
        csv_reader = csv.reader(f)
        all_rows = list(csv_reader)
    
    # Parse the three header rows using proper CSV parsing
    header_row_0 = all_rows[0]  # Top category (PGC, PI, PIP)
    header_row_1 = all_rows[1]  # Middle category (Primária, Secundária, Multipack)
    header_row_2 = all_rows[2]  # Bottom category (Material names)
    
    # Debug: print lengths
    print(f"Header row 0 length: {len(header_row_0)}")
    print(f"Header row 1 length: {len(header_row_1)}")
    print(f"Header row 2 length: {len(header_row_2)}")
    
    # Find the maximum column count
    max_cols = max(len(header_row_0), len(header_row_1), len(header_row_2))
    print(f"Max columns: {max_cols}")
    
    # Pad shorter rows with empty strings
    header_row_0.extend([''] * (max_cols - len(header_row_0)))
    header_row_1.extend([''] * (max_cols - len(header_row_1)))
    header_row_2.extend([''] * (max_cols - len(header_row_2)))
    
    # Strip whitespace
    header_row_0 = [cell.strip() for cell in header_row_0]
    header_row_1 = [cell.strip() for cell in header_row_1]
    header_row_2 = [cell.strip() for cell in header_row_2]
    
    # Create mapping for each value column (columns after the first 3)
    column_mapping = []
    for i in range(3, max_cols):
        column_mapping.append({
            'column_index': i,
            'top_category': header_row_0[i],
            'middle_category': header_row_1[i],
            'bottom_category': header_row_2[i]
        })
    
    print(f"Column mapping length: {len(column_mapping)}")

    # Parse data rows (starting from row 3, which is index 3)
    result_rows = []
    
    for row_idx in range(3, len(all_rows)):
        cells = all_rows[row_idx]
        
        # Skip empty rows
        if not cells or all(cell.strip() == '' for cell in cells):
            continue
        
        # Get identifier values (first 3 columns)
        if len(cells) < 3:
            continue
            
        sector = cells[0].strip()
        subsector = cells[1].strip()
        cae = cells[2].strip()
        
        # For each value column, create a new row
        for col_info in column_mapping:
            col_idx = col_info['column_index']
            
            # Get the value at this column index
            if col_idx < len(cells):
                value = cells[col_idx].strip()
            else:
                value = ''
            
            result_rows.append({
                'Sector': sector,
                'SubSector': subsector,
                'CAE Principal': cae,
                'Top_Category': col_info['top_category'],
                'Middle_Category': col_info['middle_category'],
                'Bottom_Category': col_info['bottom_category'],
                'Value': value
            })
    

    # Write output CSV
    with open(output_file, 'w', encoding='utf-8', newline='') as f:
        fieldnames = ['Sector', 'SubSector', 'CAE Principal', 'Top_Category', 'Middle_Category', 'Bottom_Category', 'Value']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(result_rows)
    
    print(f"Transformation complete! Output saved to: {output_file}")
    print(f"Total output rows: {len(result_rows)}")

 if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python transform_csv.py <input_file> <output_file>")
        print("Example: python transform_csv.py input.csv output.csv")
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    
    transform_csv(input_file, output_file)
	import csv
	import sys

	def transform_csv(input_file, output_file):
	"""
	Transform CSV from wide format to long format.
	Converts column-based categories (Top, Middle, Bottom) into row-based format.
	"""
	with open(input_file, 'r', encoding='utf-8') as f:
	csv_reader = csv.reader(f)
	all_rows = list(csv_reader)

	# Parse the three header rows using proper CSV parsing
	header_row_0 = all_rows[0] # Top category (PGC, PI, PIP)
	header_row_1 = all_rows[1] # Middle category (Primária, Secundária, Multipack)
	header_row_2 = all_rows[2] # Bottom category (Material names)

	# Debug: print lengths
	print(f"Header row 0 length: {len(header_row_0)}")
	print(f"Header row 1 length: {len(header_row_1)}")
	print(f"Header row 2 length: {len(header_row_2)}")

	# Find the maximum column count
	max_cols = max(len(header_row_0), len(header_row_1), len(header_row_2))
	print(f"Max columns: {max_cols}")

	# Pad shorter rows with empty strings
	header_row_0.extend([''] * (max_cols - len(header_row_0)))
	header_row_1.extend([''] * (max_cols - len(header_row_1)))
	header_row_2.extend([''] * (max_cols - len(header_row_2)))

	# Strip whitespace
	header_row_0 = [cell.strip() for cell in header_row_0]
	header_row_1 = [cell.strip() for cell in header_row_1]
	header_row_2 = [cell.strip() for cell in header_row_2]

	# Create mapping for each value column (columns after the first 3)
	column_mapping = []
	for i in range(3, max_cols):
	column_mapping.append({
	'column_index': i,
	'top_category': header_row_0[i],
	'middle_category': header_row_1[i],
	'bottom_category': header_row_2[i]
	})

	print(f"Column mapping length: {len(column_mapping)}")

	# Parse data rows (starting from row 3, which is index 3)
	result_rows = []

	for row_idx in range(3, len(all_rows)):
	cells = all_rows[row_idx]

	# Skip empty rows
	if not cells or all(cell.strip() == '' for cell in cells):
	continue

	# Get identifier values (first 3 columns)
	if len(cells) < 3:
	continue

	sector = cells[0].strip()
	subsector = cells[1].strip()
	cae = cells[2].strip()

	# For each value column, create a new row
	for col_info in column_mapping:
	col_idx = col_info['column_index']

	# Get the value at this column index
	if col_idx < len(cells):
	value = cells[col_idx].strip()
	else:
	value = ''

	result_rows.append({
	'Sector': sector,
	'SubSector': subsector,
	'CAE Principal': cae,
	'Top_Category': col_info['top_category'],
	'Middle_Category': col_info['middle_category'],
	'Bottom_Category': col_info['bottom_category'],
	'Value': value
	})


	# Write output CSV
	with open(output_file, 'w', encoding='utf-8', newline='') as f:
	fieldnames = ['Sector', 'SubSector', 'CAE Principal', 'Top_Category', 'Middle_Category', 'Bottom_Category', 'Value']
	writer = csv.DictWriter(f, fieldnames=fieldnames)
	writer.writeheader()
	writer.writerows(result_rows)

	print(f"Transformation complete! Output saved to: {output_file}")
	print(f"Total output rows: {len(result_rows)}")

	if __name__ == "__main__":
	if len(sys.argv) != 3:
	print("Usage: python transform_csv.py <input_file> <output_file>")
	print("Example: python transform_csv.py input.csv output.csv")
	sys.exit(1)

	input_file = sys.argv[1]
	output_file = sys.argv[2]

	transform_csv(input_file, output_file)
No results found