import sys defcalculateGC(sequence:str)->Tuple[int,int]: """Calculate the GC content of a DNA sequence""" gc = 0 allnumber = 0 for i in sequence: if i != 'N'and i != 'n': allnumber += 1 if i == 'G'or i == 'C'or i == 'g'or i == 'c': gc += 1 return gc, allnumber
defmain(file:str): """Main function""" gcNum = 0 allNum = 0 withopen(file,'r') as f: for line in f: line=line.strip() if line.startswith('>'): continue else: gc, allnumber = calculateGC(line) gcNum += gc allNum += allnumber print(f'GC content is: {gcNum/allNum:.3f}')
main(sys.argv[1])
然后运行
1
codon build --release -o calGC calGC.py
最后速度为:
1 2 3
time ./calGC ../../Project/DataBase/hg38.fa GC content is: 0.410 ./calGC ../../Project/DataBase/hg38.fa 22.30s user 2.75s system 111% cpu 22.421 total
function lineGC(seq::String) GCnumber=count(x->(x=='G'||x=='C'||x=='g'||x=='c'),seq) lineNum=count(x->(x!='N' && x!='n'),seq) (GCnumber,lineNum) end
function calGC(fs) GCnumber=zero(Int) lineNum=zero(Int) open(fs,"r") do IOstream for line in eachline(IOstream) if startswith(line,">") continue else GC,all=lineGC(line) GCnumber+=GC lineNum+=all end end end round(GCnumber/lineNum;digits=3) end
deflineGC(seq): tmp=[base for base in seq if base =="G"or base =="g"or base == "C"or base == "c"] gcNumber=len(tmp) tmp2=[base for base in seq if base !="N"and base !="n"] allNumber=len(tmp2) return (gcNumber,allNumber)
withopen(sys.argv[1],'r') as f: gcNum=0 allNum=0 for line in f: if line.startswith(">"): continue else: gc,alln=lineGC(line.strip("\n")) gcNum=gcNum+gc allNum=allNum+alln
withopen(sys.argv[1],'r') as f: gcNum=0 allNum=0 for line in f: if line.startswith(">"): continue else: gc,alln=lineGC(line.strip("\n")) gcNum=gcNum+gc allNum=allNum+alln
function lineGC(seq) GCnumber=count(x->(x==DNA_G||x==DNA_C),seq) lineNum=length(seq)-count(isambiguous,seq) GCnumber,lineNum end
function calGC(fs) GCnumber=zero(Int) lineNum=zero(Int) reader=open(FASTA.Reader,fs) for record in reader GC,all=lineGC(FASTA.sequence(record)) GCnumber+=GC lineNum+=all end close(reader) round(GCnumber/lineNum;digits=3) end