Restructured code and renamed project

This commit is contained in:
Harrison Deng 2023-04-21 12:16:29 -05:00
parent 5a034f59d2
commit 43787805e0
7 changed files with 97 additions and 70 deletions

25
.gitignore vendored
View File

@ -1,5 +1,21 @@
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### Python ###
# Byte-compiled / optimized / DLL files
@ -192,4 +208,7 @@ pyrightconfig.json
.history
.ionide
# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)

5
.vscode/launch.json vendored
View File

@ -8,11 +8,12 @@
"name": "Run Fasta Filter",
"type": "python",
"request": "launch",
"program": "fasta_filter.py",
"program": "${workspaceFolder}/filterfasta/filterfasta.py",
"args": [
"reference_standards.fas",
"reference_standards_filtered.fas",
"-c", "rt"
"-c",
"rt"
],
"console": "integratedTerminal",
"justMyCode": true

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.formatting.provider": "black"
}

View File

@ -1,65 +0,0 @@
#!/usr/bin/env python3
import os
from Bio import SeqIO
import argparse
def main(args):
kept_records = []
for s_record in SeqIO.parse(args.input, "fasta"):
if args.property != "all" and not (
args.contains in getattr(s_record, args.property)
):
kept_records.append(s_record)
elif args.property == "all" and not (
args.contains in s_record.id
or args.contains in s_record.name
or args.contains in s_record.description):
kept_records.append(s_record)
SeqIO.write(
kept_records,
os.path.abspath(args.output),
"fasta"
)
if __name__ == "__main__":
argparser = argparse.ArgumentParser(
"Removes sequences where id contains certain string.")
argparser.add_argument(
"input",
type=str,
metavar="i",
help="The input fasta file."
)
argparser.add_argument(
"output",
type=str,
metavar="o",
help="The output file path."
)
argparser.add_argument(
"-c",
"--contains",
type=str,
default=None,
required=False,
help="The string to search for."
)
argparser.add_argument(
"-p",
"--property",
type=str,
default="all",
required=False,
help="""
The part of the fasta file to look through. Valid options are: id,
name, description, or all.
"""
)
main(argparser.parse_args())

0
filterfasta/__init__.py Normal file
View File

66
filterfasta/filterfasta.py Executable file
View File

@ -0,0 +1,66 @@
#!/usr/bin/env python3
import os
from Bio import SeqIO
import argparse
def filter(input: str, a_property: str, contains: str):
kept_records = []
for s_record in SeqIO.parse(input, "fasta"):
if a_property != "all" and not (contains in getattr(s_record, a_property)):
kept_records.append(s_record)
elif a_property == "all" and not (
contains in s_record.id
or contains in s_record.name
or contains in s_record.description
):
kept_records.append(s_record)
return kept_records
def write_to_fasta(kept_records, output: str):
SeqIO.write(kept_records, os.path.abspath(output), "fasta")
def run(args):
kept = filter(args.input, args.property, args.contains)
write_to_fasta(kept, args.output)
def main():
argparser = argparse.ArgumentParser(
"Removes sequences where id contains certain string."
)
argparser.add_argument("input", type=str, metavar="i", help="The input fasta file.")
argparser.add_argument(
"output", type=str, metavar="o", help="The output file path."
)
argparser.add_argument(
"-c",
"--contains",
type=str,
default=None,
required=False,
help="The string to search for.",
)
argparser.add_argument(
"-p",
"--property",
type=str,
default="all",
required=False,
help="""
The part of the fasta file to look through. Valid options are: 'id',
'name', 'description', or 'all'.
""",
)
args = argparser.parse_args()
run(args)
if __name__ == "__main__":
main()

3
tox.ini Normal file
View File

@ -0,0 +1,3 @@
[flake8]
max-line-length = 88
extend-ignore = E203