#!/usr/bin/env python import sys import re #set a boolean flag to control the while loop fileentered = True while fileentered == True: #ask the user to input a filename to check filename = raw_input('Please enter a file to check: ') #check the input size to see if anything was entered if len(filename) >= 1: #try to open the file, if it fails go to except try: seqlist = open(filename, 'r').readlines() sequence = ''.join(seqlist) sequence = sequence.replace('\n', '') totalA = sequence.count('A') totalC = sequence.count('C') totalG = sequence.count('G') totalT = sequence.count('T') #create a RegEx to check for extraneous nucleotides otherletter = re.compile('[BDEFHIJKLMNOPQRSUVXZ]+') #and search for them extra = re.findall(otherletter, sequence) output = open(filename+'.count', 'w') output.write('Count report for file ' + filename + '\n') output.write('A = ' + str(totalA) + '\n') output.write('C = ' + str(totalC) + '\n') output.write('G = ' + str(totalG) + '\n') output.write('T = ' + str(totalT) + '\n') #check if any 'wrong' nucleotide was found if len(extra) > 0: output.write('Also were found ' + str(len(extra)) + ' errors\n') for i in extra: output.write(i + ' ') else: output.write('No error found') print 'Result file saved on ' + filename + '.count' except: #this deals with a nonexistent file print 'File not found. Please try again.' else: sys.exit()