From 43787805e09681a106a80d3efeb9bb043f245b0f Mon Sep 17 00:00:00 2001 From: Harrison Date: Fri, 21 Apr 2023 12:16:29 -0500 Subject: [PATCH] Restructured code and renamed project --- .gitignore | 25 +++++++++++++-- .vscode/launch.json | 5 +-- .vscode/settings.json | 3 ++ fasta_filter.py | 65 ------------------------------------- filterfasta/__init__.py | 0 filterfasta/filterfasta.py | 66 ++++++++++++++++++++++++++++++++++++++ tox.ini | 3 ++ 7 files changed, 97 insertions(+), 70 deletions(-) create mode 100644 .vscode/settings.json delete mode 100755 fasta_filter.py create mode 100644 filterfasta/__init__.py create mode 100755 filterfasta/filterfasta.py create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index 4934ad1..a31886c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,21 @@ -# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode -# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode +# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* ### Python ### # Byte-compiled / optimized / DLL files @@ -192,4 +208,7 @@ pyrightconfig.json .history .ionide -# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python + +# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) + diff --git a/.vscode/launch.json b/.vscode/launch.json index 30abaec..bac4f26 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,11 +8,12 @@ "name": "Run Fasta Filter", "type": "python", "request": "launch", - "program": "fasta_filter.py", + "program": "${workspaceFolder}/filterfasta/filterfasta.py", "args": [ "reference_standards.fas", "reference_standards_filtered.fas", - "-c", "rt" + "-c", + "rt" ], "console": "integratedTerminal", "justMyCode": true diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..de288e1 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.formatting.provider": "black" +} \ No newline at end of file diff --git a/fasta_filter.py b/fasta_filter.py deleted file mode 100755 index 0268ed9..0000000 --- a/fasta_filter.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python3 - -import os -from Bio import SeqIO -import argparse - - -def main(args): - kept_records = [] - for s_record in SeqIO.parse(args.input, "fasta"): - if args.property != "all" and not ( - args.contains in getattr(s_record, args.property) - ): - kept_records.append(s_record) - elif args.property == "all" and not ( - args.contains in s_record.id - or args.contains in s_record.name - or args.contains in s_record.description): - kept_records.append(s_record) - SeqIO.write( - kept_records, - os.path.abspath(args.output), - "fasta" - ) - - -if __name__ == "__main__": - argparser = argparse.ArgumentParser( - "Removes sequences where id contains certain string.") - argparser.add_argument( - "input", - type=str, - metavar="i", - help="The input fasta file." - ) - - argparser.add_argument( - "output", - type=str, - metavar="o", - help="The output file path." - ) - - argparser.add_argument( - "-c", - "--contains", - type=str, - default=None, - required=False, - help="The string to search for." - ) - - argparser.add_argument( - "-p", - "--property", - type=str, - default="all", - required=False, - help=""" - The part of the fasta file to look through. Valid options are: id, - name, description, or all. - """ - ) - - main(argparser.parse_args()) diff --git a/filterfasta/__init__.py b/filterfasta/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/filterfasta/filterfasta.py b/filterfasta/filterfasta.py new file mode 100755 index 0000000..8f21616 --- /dev/null +++ b/filterfasta/filterfasta.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +import os +from Bio import SeqIO +import argparse + + +def filter(input: str, a_property: str, contains: str): + kept_records = [] + for s_record in SeqIO.parse(input, "fasta"): + if a_property != "all" and not (contains in getattr(s_record, a_property)): + kept_records.append(s_record) + elif a_property == "all" and not ( + contains in s_record.id + or contains in s_record.name + or contains in s_record.description + ): + kept_records.append(s_record) + return kept_records + + +def write_to_fasta(kept_records, output: str): + SeqIO.write(kept_records, os.path.abspath(output), "fasta") + + +def run(args): + kept = filter(args.input, args.property, args.contains) + write_to_fasta(kept, args.output) + + +def main(): + argparser = argparse.ArgumentParser( + "Removes sequences where id contains certain string." + ) + argparser.add_argument("input", type=str, metavar="i", help="The input fasta file.") + + argparser.add_argument( + "output", type=str, metavar="o", help="The output file path." + ) + + argparser.add_argument( + "-c", + "--contains", + type=str, + default=None, + required=False, + help="The string to search for.", + ) + + argparser.add_argument( + "-p", + "--property", + type=str, + default="all", + required=False, + help=""" + The part of the fasta file to look through. Valid options are: 'id', + 'name', 'description', or 'all'. + """, + ) + args = argparser.parse_args() + run(args) + + +if __name__ == "__main__": + main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..e0ea542 --- /dev/null +++ b/tox.ini @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 88 +extend-ignore = E203 \ No newline at end of file