Skip to content

Instantly share code, notes, and snippets.

@mzhang77
Created April 25, 2025 21:24
Show Gist options
  • Select an option

  • Save mzhang77/eb7f42e180db5dd5d0dc0330b9da05bd to your computer and use it in GitHub Desktop.

Select an option

Save mzhang77/eb7f42e180db5dd5d0dc0330b9da05bd to your computer and use it in GitHub Desktop.
import mysql.connector
import time
from concurrent.futures import ThreadPoolExecutor
def prepare_test_data(cursor, conn):
cursor.execute("DROP TABLE IF EXISTS institutions")
cursor.execute("""
CREATE TABLE institutions (
id INT PRIMARY KEY,
data VARCHAR(255)
)
""")
insert_data = [(i, f"original_data_{i}") for i in range(1, 40001)]
cursor.executemany("INSERT INTO institutions (id, data) VALUES (%s, %s)", insert_data)
conn.commit()
def run_update(chunk):
conn = mysql.connector.connect(
host="127.0.0.1",
user="root",
port="4000",
database="test"
)
cursor = conn.cursor()
sql = "UPDATE institutions SET data = CASE " + \
" ".join(f"WHEN id={i} THEN '{val}'" for i, val in chunk) + \
" ELSE data END " + \
f"WHERE id IN ({','.join(str(i) for i, _ in chunk)})"
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
# Main execution
conn = mysql.connector.connect(
host="127.0.0.1",
user="root",
port="4000",
database="test"
)
cursor = conn.cursor()
# Uncomment if you want to reset and reload data
prepare_test_data(cursor, conn)
cursor.close()
conn.close()
# Prepare update values
update_pairs = [(i, f"updated_data_{i}") for i in range(1, 40001)]
# Split into chunks
chunk_size = 1000
chunks = [update_pairs[i:i + chunk_size] for i in range(0, len(update_pairs), chunk_size)]
start_time = time.time()
# Parallel update using ThreadPoolExecutor
#with ThreadPoolExecutor(max_workers=8) as executor:
# executor.map(run_update, chunks)
#end_time = time.time()
#print(f"Parallel update completed in {end_time - start_time:.2f} seconds.")
# Serial update for comparison
start_time_serial = time.time()
for chunk in chunks:
run_update(chunk)
end_time_serial = time.time()
print(f"Serial update completed in {end_time_serial - start_time_serial:.2f} seconds.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment