import csv import sys # Ensure correct usage if len(sys.argv) != 3: sys.exit("Usage: python dna.py database.csv sequence.txt") # setting array with search STRs str_search_big = ["AGATC", "TTTTTTCT", "AATG", "TCTAG", "GATA", "TATC", "GAAA", "TCTG"] str_search_small = ["AGATC", "AATG", "TATC"] if sys.argv[1].count("small") > 0: str_search = str_search_small else: str_search = str_search_big search_len = len(str_search) # searches for longest repeated STR sequence def str_repeat(seq, dna_str, dbcount): match_count = 0 pattern = dna_str while dna_str in seq: match_count += 1 dna_str += pattern # if longest sequence doesn't match with DB, remove this person from match list if match_count != int(dbcount): try: match.remove(key["name"]) except: return # initialiaze variables filename = sys.argv[1] database = [] match = [] # Read database into memory from file with open(filename) as file: reader = csv.DictReader(file) for row in reader: database.append(row) # populate match list with all possible names match.append(row["name"]) # get sequence string from txt sequence = open(sys.argv[2], "r") sequence = sequence.read() # loop through all searchable STRs for i in range(search_len): # loop through all database entries for key in database: str_repeat(sequence, str_search[i], key[str_search[i]]) # if only one person is left at the end, print his name if len(match) == 1: print(match[0]) else: print("No match")