-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconsensus_sequence.py
More file actions
52 lines (39 loc) · 1.38 KB
/
consensus_sequence.py
File metadata and controls
52 lines (39 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import sys
from Bio.Align import MultipleSeqAlignment
from Bio import AlignIO
def calculate_consensus_sequence(clustal_omega_file):
"""
Function to calculate consensus sequence
Parameters
----------
clustal_omega_file : str
path to clustal omega output file
Return
----------
None
"""
consensus = None
# Calculate consensus sequence and print out
# Since there is 1 alignment in the file, we can use AlignIO.read
# Clustal Omega typically produces a single alignment
alignment = AlignIO.read(clustal_omega_file, "fasta")
# Get consensus using modern BioPython API
# Use simple majority voting for each position
consensus_str = ""
for i in range(alignment.get_alignment_length()):
column = alignment[:, i]
# Count occurrences of each character
counts = {}
for char in column:
if char != '-': # Ignore gaps
counts[char] = counts.get(char, 0) + 1
# Get most common character (or 'X' if all gaps)
if counts:
consensus_str += max(counts, key=counts.get)
else:
consensus_str += 'X'
consensus = consensus_str
print(consensus)
if __name__ == "__main__":
calculate_consensus_sequence(sys.argv[1])
# Example: calculate_consensus_sequence("./msa_output.fasta")