CS50_Labs/Lab6/dna/dna.py

61 lines
1.5 KiB
Python

import csv
import sys
# Ensure correct usage
if len(sys.argv) != 3:
sys.exit("Usage: python dna.py database.csv sequence.txt")
# setting array with search STRs
str_search_big = ["AGATC", "TTTTTTCT", "AATG", "TCTAG", "GATA", "TATC", "GAAA", "TCTG"]
str_search_small = ["AGATC", "AATG", "TATC"]
if sys.argv[1].count("small") > 0:
str_search = str_search_small
else:
str_search = str_search_big
search_len = len(str_search)
# searches for longest repeated STR sequence
def str_repeat(seq, dna_str, dbcount):
match_count = 0
pattern = dna_str
while dna_str in seq:
match_count += 1
dna_str += pattern
# if longest sequence doesn't match with DB, remove this person from match list
if match_count != int(dbcount):
try:
match.remove(key["name"])
except:
return
# initialiaze variables
filename = sys.argv[1]
database = []
match = []
# Read database into memory from file
with open(filename) as file:
reader = csv.DictReader(file)
for row in reader:
database.append(row)
# populate match list with all possible names
match.append(row["name"])
# get sequence string from txt
sequence = open(sys.argv[2], "r")
sequence = sequence.read()
# loop through all searchable STRs
for i in range(search_len):
# loop through all database entries
for key in database:
str_repeat(sequence, str_search[i], key[str_search[i]])
# if only one person is left at the end, print his name
if len(match) == 1:
print(match[0])
else:
print("No match")