61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
import csv
|
|
import sys
|
|
|
|
# Ensure correct usage
|
|
if len(sys.argv) != 3:
|
|
sys.exit("Usage: python dna.py database.csv sequence.txt")
|
|
|
|
# setting array with search STRs
|
|
str_search_big = ["AGATC", "TTTTTTCT", "AATG", "TCTAG", "GATA", "TATC", "GAAA", "TCTG"]
|
|
str_search_small = ["AGATC", "AATG", "TATC"]
|
|
|
|
if sys.argv[1].count("small") > 0:
|
|
str_search = str_search_small
|
|
else:
|
|
str_search = str_search_big
|
|
|
|
search_len = len(str_search)
|
|
|
|
|
|
# searches for longest repeated STR sequence
|
|
def str_repeat(seq, dna_str, dbcount):
|
|
match_count = 0
|
|
pattern = dna_str
|
|
while dna_str in seq:
|
|
match_count += 1
|
|
dna_str += pattern
|
|
# if longest sequence doesn't match with DB, remove this person from match list
|
|
if match_count != int(dbcount):
|
|
try:
|
|
match.remove(key["name"])
|
|
except:
|
|
return
|
|
|
|
|
|
# initialiaze variables
|
|
filename = sys.argv[1]
|
|
database = []
|
|
match = []
|
|
|
|
# Read database into memory from file
|
|
with open(filename) as file:
|
|
reader = csv.DictReader(file)
|
|
for row in reader:
|
|
database.append(row)
|
|
# populate match list with all possible names
|
|
match.append(row["name"])
|
|
# get sequence string from txt
|
|
sequence = open(sys.argv[2], "r")
|
|
sequence = sequence.read()
|
|
# loop through all searchable STRs
|
|
for i in range(search_len):
|
|
# loop through all database entries
|
|
for key in database:
|
|
str_repeat(sequence, str_search[i], key[str_search[i]])
|
|
|
|
# if only one person is left at the end, print his name
|
|
if len(match) == 1:
|
|
print(match[0])
|
|
else:
|
|
print("No match")
|