bmlsa/src/bmlsa/cli.py
Harrison 78b4a74bc8
All checks were successful
ydeng/bmlsa/pipeline/head This commit looks good
Removed erroneous line from 'cli.py'
2023-04-28 11:08:51 -05:00

191 lines
5.4 KiB
Python

import os
import argparse
from Bio import SeqIO
import logging
from bmlsa.aligner import align_many_to_one_ssw
from bmlsa.io import read_annotations_from_csv, save_alignments_to_csv
logger = logging.getLogger(__name__)
DEFAULT_ALIGNMENT_PARAMETERS = {
"BLASTp": {
"extend_gap_score": -1,
"open_gap_score": -11,
"substitution_matrix": "BLOSUM62",
"alignment_mode": "local",
}
}
def run(args):
if (
not (args.extend_gap_score and args.open_gap_score and args.alignment_mode)
and not args.behave_as
):
logger.error(
'Must either specify all of "--extend-gap-score", "--open-gap-score", '
'"--alignment-mode", or, at least "--behave-as". See help (-h) for '
"more information."
)
exit(1)
queries = read_annotations_from_csv(
args.annotations,
args.id_header,
args.seq_header,
args.name_header,
args.desc_header,
args.start_header,
args.end_header,
)
scoring_parameter = (
{
"extend_gap_score": args.extend_gap_score,
"open_gap_score": args.open_gap_score,
"alignment_mode": args.alignment_mode,
"substitution_matrix": args.substitution_matrix
if args.substitution_matrix is not None
else "BLOSUM62",
}
if args.behave_as not in DEFAULT_ALIGNMENT_PARAMETERS
else DEFAULT_ALIGNMENT_PARAMETERS[args.behave_as]
)
with open(args.sequence, "r") as sequence_fd:
for sequence in SeqIO.parse(sequence_fd, "fasta"):
aligned_annotations = align_many_to_one_ssw(
str(sequence.seq), queries, **scoring_parameter
)
save_alignments_to_csv(
aligned_annotations,
os.path.join(
args.output,
sequence.id.replace("|", "+").replace(".", "_") + ".csv",
),
)
def main():
arg_parser = argparse.ArgumentParser("bmlsa")
arg_parser.add_argument(
"annotations",
type=str,
help=(
"Path to CSV containing the sequences to align as well as the "
"annotations for the respective sequences."
),
metavar="a",
)
arg_parser.add_argument(
"sequence",
type=str,
help=(
"Path to the sequence to use as reference in FASTA format. "
"If multiple sequences are present in the same FASTA file, "
"each will be used as a separate reference sequence for separate "
"runs automatically."
),
metavar="s",
)
arg_parser.add_argument(
"output", type=str, help="Path to output location", metavar="o"
)
arg_parser.add_argument(
"-I",
"--id-header",
type=str,
help="The header of the column for the ID of the sequence to align to "
"the reference sequence.",
required=True,
)
arg_parser.add_argument(
"-N",
"--name-header",
type=str,
help="The header of the column for the name of the sequence to align to "
"the reference sequence.",
required=False,
)
arg_parser.add_argument(
"-D",
"--desc-header",
type=str,
help="The header of the column for the description of the sequence to "
"align to the reference sequence.",
required=False,
)
arg_parser.add_argument(
"-T",
"--start-header",
type=str,
help="The header of the column for the start position of the sequence to "
"align to the reference sequence.",
required=False,
)
arg_parser.add_argument(
"-E",
"--end-header",
type=str,
help="The header of the column for end position of the sequence to "
"align to the reference sequence.",
required=False,
)
arg_parser.add_argument(
"-S",
"--seq-header",
type=str,
help="The header of the column for the actual sequence to align to the "
"reference sequence.",
required=True,
)
arg_parser.add_argument(
"-e",
"--extend-gap-score",
type=int,
help="The scoring for extending a gap.",
required=False,
default=None,
)
arg_parser.add_argument(
"-o",
"--open-gap-score",
type=int,
help="The scoring for opening a gap.",
required=False,
default=None,
)
arg_parser.add_argument(
"-M",
"--alignment-mode",
type=str,
help="The alignment mode.",
choices=["local", "global"],
required=False,
)
arg_parser.add_argument(
"-m",
"--substitution-matrix",
type=str,
help="The name of the substitution matrix.",
required=False,
default=None,
)
arg_parser.add_argument(
"-B",
"--behave-as",
type=str,
help="Use built-in parameters for alignment scoring. If this is specified "
"along either of the scoring arguments, the alignment parameter arguments "
'("--open-gap-score" and/or "--extend-gab-score" and "--alignment-mode") '
"will override any defaults set by this argument.",
required=False,
default=None,
)
args = arg_parser.parse_args()
run(args)
if __name__ == "__main__":
main()