#!/usr/bin/env python import random import sys #function that simulates the DNA sequence def simulate_sequence(length): dna = ['A', 'C', 'G', 'T'] sequence = '' for i in range(length): sequence += random.choice(dna) return sequence #function that determines the nucleotide frequencies def nucleotide_percentage(sequence): print str(sequence.count('A')) + ' As ', print str(sequence.count('C')) + ' Cs ', print str(sequence.count('G')) + ' Gs ', print str(sequence.count('T')) + ' Ts' #function that determines the sequence identities def sequence_identity(set): iden = [] count = 0.0 for x in range(len(set)-1): print str(x), str(x+1) for n in range(len(set[x])): if set[x][n] == set[x+1][n]: count += 1 iden.append(count/len(set[x])) count = 0.0 return iden #get three values from the parameters setsize = int(sys.argv[1]) #number of sequences to simulate minlength = int(sys.argv[2]) #minimum sequence length maxlength = int(sys.argv[3]) #maximum sequence length sequenceset = [] for i in range(setsize): rlength = random.randint(minlength, maxlength) sequenceset.append(simulate_sequence(rlength)) identity = sequence_identity(sequenceset) #print the results for i in range(len(sequenceset)): print sequenceset[i] if i < len(sequenceset)-1: print 'sequence identity to next sequence : ' + str(identity[i]) nucleotide_percentage(sequenceset[i]) print