61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
|
import csv
|
||
|
import sys
|
||
|
|
||
|
# Ensure correct usage
|
||
|
if len(sys.argv) != 3:
|
||
|
sys.exit("Usage: python dna.py database.csv sequence.txt")
|
||
|
|
||
|
# setting array with search STRs
|
||
|
str_search_big = ["AGATC", "TTTTTTCT", "AATG", "TCTAG", "GATA", "TATC", "GAAA", "TCTG"]
|
||
|
str_search_small = ["AGATC", "AATG", "TATC"]
|
||
|
|
||
|
if sys.argv[1].count("small") > 0:
|
||
|
str_search = str_search_small
|
||
|
else:
|
||
|
str_search = str_search_big
|
||
|
|
||
|
search_len = len(str_search)
|
||
|
|
||
|
|
||
|
# searches for longest repeated STR sequence
|
||
|
def str_repeat(seq, dna_str, dbcount):
|
||
|
match_count = 0
|
||
|
pattern = dna_str
|
||
|
while dna_str in seq:
|
||
|
match_count += 1
|
||
|
dna_str += pattern
|
||
|
# if longest sequence doesn't match with DB, remove this person from match list
|
||
|
if match_count != int(dbcount):
|
||
|
try:
|
||
|
match.remove(key["name"])
|
||
|
except:
|
||
|
return
|
||
|
|
||
|
|
||
|
# initialiaze variables
|
||
|
filename = sys.argv[1]
|
||
|
database = []
|
||
|
match = []
|
||
|
|
||
|
# Read database into memory from file
|
||
|
with open(filename) as file:
|
||
|
reader = csv.DictReader(file)
|
||
|
for row in reader:
|
||
|
database.append(row)
|
||
|
# populate match list with all possible names
|
||
|
match.append(row["name"])
|
||
|
# get sequence string from txt
|
||
|
sequence = open(sys.argv[2], "r")
|
||
|
sequence = sequence.read()
|
||
|
# loop through all searchable STRs
|
||
|
for i in range(search_len):
|
||
|
# loop through all database entries
|
||
|
for key in database:
|
||
|
str_repeat(sequence, str_search[i], key[str_search[i]])
|
||
|
|
||
|
# if only one person is left at the end, print his name
|
||
|
if len(match) == 1:
|
||
|
print(match[0])
|
||
|
else:
|
||
|
print("No match")
|