Created
May 30, 2020 02:32
-
-
Save ianjmacintosh/7333ba5dd77c00c4e36dc3986124e4d6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import csv | |
| import re | |
| STRs = [] | |
| profiles = [] | |
| sample_object = dict() | |
| def profile_matches_sample(profile, sample): | |
| # print(f"Reviewing {profile.get('name')}") | |
| for key in sample: | |
| if profile.get(key) != sample.get(key): | |
| # print(f"Cant be a match! {key} for {profile.get('name')} is {profile.get(key)} -- sample has {sample.get(key)}") | |
| return False | |
| return True | |
| def generate_sample_object(sample): | |
| for key in STRs: | |
| if key != "name": | |
| sample_object[key] = 0 | |
| matches = re.finditer("(" + key + ")+", sample) | |
| for match in matches: | |
| repetitions = (match.end() - match.start()) / len(key) | |
| if repetitions > sample_object.get(key): | |
| sample_object[key] = repetitions | |
| def main(): | |
| # Confirm input is good: a CSV and a TXT | |
| # If there aren't 2 arguments, exit | |
| if len(sys.argv) != 3: | |
| sys.exit("Usage: dna.py [database] [sample]") | |
| # If argument 1 is not a CSV and a real file, exit | |
| with open(sys.argv[1], newline="") as csvfile: | |
| database = csv.reader(csvfile) | |
| for row in database: | |
| # Using header row, make a list of each STR | |
| if (row[0] == "name"): | |
| for STR in row: | |
| STRs.append(STR) | |
| else: | |
| this_dict = dict() | |
| i = 0 | |
| for field in STRs: | |
| if i == 0: | |
| # Handle name | |
| this_dict[field] = row[i] | |
| else: | |
| this_dict[field] = int(row[i]) | |
| i += 1 | |
| profiles.append(this_dict) | |
| # Store the sample as a dict from the provided TXT file | |
| with open(sys.argv[2], newline="") as txtfile: | |
| for line in txtfile: | |
| generate_sample_object(line) | |
| break | |
| # Review each profile in the database, comparing each STR count against the sample | |
| for profile in profiles: | |
| if profile_matches_sample(profile, sample_object): | |
| # If all STRs match the sample, report a match | |
| print(f"{profile.get('name')}") | |
| exit() | |
| # If no STRs match sample, report no match | |
| print("No match") | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment