Restructured code and renamed project
This commit is contained in:
parent
5a034f59d2
commit
43787805e0
25
.gitignore
vendored
25
.gitignore
vendored
@ -1,5 +1,21 @@
|
|||||||
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
|
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||||
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode
|
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||||
|
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
|
||||||
|
|
||||||
|
### Linux ###
|
||||||
|
*~
|
||||||
|
|
||||||
|
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||||
|
.fuse_hidden*
|
||||||
|
|
||||||
|
# KDE directory preferences
|
||||||
|
.directory
|
||||||
|
|
||||||
|
# Linux trash folder which might appear on any partition or disk
|
||||||
|
.Trash-*
|
||||||
|
|
||||||
|
# .nfs files are created when an open file is removed but is still being accessed
|
||||||
|
.nfs*
|
||||||
|
|
||||||
### Python ###
|
### Python ###
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
@ -192,4 +208,7 @@ pyrightconfig.json
|
|||||||
.history
|
.history
|
||||||
.ionide
|
.ionide
|
||||||
|
|
||||||
# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode
|
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||||
|
|
||||||
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
|
||||||
|
5
.vscode/launch.json
vendored
5
.vscode/launch.json
vendored
@ -8,11 +8,12 @@
|
|||||||
"name": "Run Fasta Filter",
|
"name": "Run Fasta Filter",
|
||||||
"type": "python",
|
"type": "python",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "fasta_filter.py",
|
"program": "${workspaceFolder}/filterfasta/filterfasta.py",
|
||||||
"args": [
|
"args": [
|
||||||
"reference_standards.fas",
|
"reference_standards.fas",
|
||||||
"reference_standards_filtered.fas",
|
"reference_standards_filtered.fas",
|
||||||
"-c", "rt"
|
"-c",
|
||||||
|
"rt"
|
||||||
],
|
],
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": true
|
"justMyCode": true
|
||||||
|
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"python.formatting.provider": "black"
|
||||||
|
}
|
@ -1,65 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import os
|
|
||||||
from Bio import SeqIO
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
|
||||||
kept_records = []
|
|
||||||
for s_record in SeqIO.parse(args.input, "fasta"):
|
|
||||||
if args.property != "all" and not (
|
|
||||||
args.contains in getattr(s_record, args.property)
|
|
||||||
):
|
|
||||||
kept_records.append(s_record)
|
|
||||||
elif args.property == "all" and not (
|
|
||||||
args.contains in s_record.id
|
|
||||||
or args.contains in s_record.name
|
|
||||||
or args.contains in s_record.description):
|
|
||||||
kept_records.append(s_record)
|
|
||||||
SeqIO.write(
|
|
||||||
kept_records,
|
|
||||||
os.path.abspath(args.output),
|
|
||||||
"fasta"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
argparser = argparse.ArgumentParser(
|
|
||||||
"Removes sequences where id contains certain string.")
|
|
||||||
argparser.add_argument(
|
|
||||||
"input",
|
|
||||||
type=str,
|
|
||||||
metavar="i",
|
|
||||||
help="The input fasta file."
|
|
||||||
)
|
|
||||||
|
|
||||||
argparser.add_argument(
|
|
||||||
"output",
|
|
||||||
type=str,
|
|
||||||
metavar="o",
|
|
||||||
help="The output file path."
|
|
||||||
)
|
|
||||||
|
|
||||||
argparser.add_argument(
|
|
||||||
"-c",
|
|
||||||
"--contains",
|
|
||||||
type=str,
|
|
||||||
default=None,
|
|
||||||
required=False,
|
|
||||||
help="The string to search for."
|
|
||||||
)
|
|
||||||
|
|
||||||
argparser.add_argument(
|
|
||||||
"-p",
|
|
||||||
"--property",
|
|
||||||
type=str,
|
|
||||||
default="all",
|
|
||||||
required=False,
|
|
||||||
help="""
|
|
||||||
The part of the fasta file to look through. Valid options are: id,
|
|
||||||
name, description, or all.
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
main(argparser.parse_args())
|
|
0
filterfasta/__init__.py
Normal file
0
filterfasta/__init__.py
Normal file
66
filterfasta/filterfasta.py
Executable file
66
filterfasta/filterfasta.py
Executable file
@ -0,0 +1,66 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
|
from Bio import SeqIO
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def filter(input: str, a_property: str, contains: str):
|
||||||
|
kept_records = []
|
||||||
|
for s_record in SeqIO.parse(input, "fasta"):
|
||||||
|
if a_property != "all" and not (contains in getattr(s_record, a_property)):
|
||||||
|
kept_records.append(s_record)
|
||||||
|
elif a_property == "all" and not (
|
||||||
|
contains in s_record.id
|
||||||
|
or contains in s_record.name
|
||||||
|
or contains in s_record.description
|
||||||
|
):
|
||||||
|
kept_records.append(s_record)
|
||||||
|
return kept_records
|
||||||
|
|
||||||
|
|
||||||
|
def write_to_fasta(kept_records, output: str):
|
||||||
|
SeqIO.write(kept_records, os.path.abspath(output), "fasta")
|
||||||
|
|
||||||
|
|
||||||
|
def run(args):
|
||||||
|
kept = filter(args.input, args.property, args.contains)
|
||||||
|
write_to_fasta(kept, args.output)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
argparser = argparse.ArgumentParser(
|
||||||
|
"Removes sequences where id contains certain string."
|
||||||
|
)
|
||||||
|
argparser.add_argument("input", type=str, metavar="i", help="The input fasta file.")
|
||||||
|
|
||||||
|
argparser.add_argument(
|
||||||
|
"output", type=str, metavar="o", help="The output file path."
|
||||||
|
)
|
||||||
|
|
||||||
|
argparser.add_argument(
|
||||||
|
"-c",
|
||||||
|
"--contains",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
required=False,
|
||||||
|
help="The string to search for.",
|
||||||
|
)
|
||||||
|
|
||||||
|
argparser.add_argument(
|
||||||
|
"-p",
|
||||||
|
"--property",
|
||||||
|
type=str,
|
||||||
|
default="all",
|
||||||
|
required=False,
|
||||||
|
help="""
|
||||||
|
The part of the fasta file to look through. Valid options are: 'id',
|
||||||
|
'name', 'description', or 'all'.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
args = argparser.parse_args()
|
||||||
|
run(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user