Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
ae96f2f9df | |||
15ad241303 | |||
b56c75849c | |||
09cc9787fd | |||
8e1b30ae7d | |||
a3189c6f3d |
1
Jenkinsfile
vendored
1
Jenkinsfile
vendored
@ -40,7 +40,6 @@ pipeline {
|
||||
when {
|
||||
branch '**/main'
|
||||
}
|
||||
|
||||
environment {
|
||||
CREDS = credentials('username-password-rs-git')
|
||||
}
|
||||
|
@ -35,13 +35,13 @@ This CLI can be installed with `pip`. Please ensure [pip is installed](https://p
|
||||
|
||||
Let's say you have a fasta called `seq.fasta` which contains several sequences. You know all sequences in `seq.fasta` are Bordetella pertussis sequences, and you know you have the sequences for the necessary targets of your scheme in each of them. You want to retrieve MLST profiles for all of them. This can be done by:
|
||||
|
||||
1. Running `autobigs info -l` to list all available `seqdef` databases and find the database associated with Bordetella (you should see one called `pubmlst_bordetella_seqdef`).
|
||||
1. Running `autoBIGS info -l` to list all available `seqdef` databases and find the database associated with Bordetella (you should see one called `pubmlst_bordetella_seqdef`).
|
||||
|
||||
2. Then, run `autobigs info -lscheme pubmlst_bordetella_seqdef` to get the available typing schemes and their associated IDs. In this example, let's assume we want a normal MLST scheme. In this case, we would pay attention to the number next to `MLST` (it should be `3`).
|
||||
2. Then, run `autoBIGS info -lschemes pubmlst_bordetella_seqdef` to get the available typing schemes and their associated IDs. In this example, let's assume we want a normal MLST scheme. In this case, we would pay attention to the number next to `MLST` (it should be `3`). Alternatively, we can look at the name of the schemes and use those too (in this case, it is simply `MLST`).
|
||||
|
||||
3. Then, run `autobigs st -h` and familiarize yourself with the parameters needed for sequence typing.
|
||||
3. Then, run `autoBIGS st -h` and familiarize yourself with the parameters needed for sequence typing.
|
||||
|
||||
4. Namely, you should find that you will need to run `autobigs st seq.fasta pubmlst_bordetella_seqdef 3 output.csv`. You can optionally include multiple `FASTA` files, and `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type.
|
||||
4. Namely, you should find that you will need to run `autoBIGS st seq.fasta pubmlst_bordetella_seqdef -sid 3 output.csv` (alternatively, `-sid 3` may be replaced with `-sn MLST`). You can optionally include multiple `FASTA` files, and `--stop-on-fail` to stop typing if one of your sequences fail to retrieve any type.
|
||||
|
||||
5. Sit tight, and wait. The `output.csv` will contain your results once completed.
|
||||
|
||||
|
@ -8,7 +8,7 @@ dynamic = ["version"]
|
||||
readme = "README.md"
|
||||
license = {text = "GPL-3.0-or-later"}
|
||||
dependencies = [
|
||||
"autoBIGS-engine==0.13.*"
|
||||
"autoBIGS-engine==0.14.*"
|
||||
]
|
||||
requires-python = ">=3.12"
|
||||
description = "A CLI tool to rapidly fetch fetch MLST profiles given sequences for various diseases."
|
||||
|
@ -53,18 +53,19 @@ async def run(args: Namespace):
|
||||
writer.writerows(sorted_seqdef_dbs)
|
||||
print("\nDatabase output written to {0}".format(args.csv_output))
|
||||
|
||||
csv_scheme_rows = []
|
||||
for bigsdb_scheme_name in args.list_bigsdb_schemes:
|
||||
schemes = await bigsdb_index.get_schemes_for_seqdefdb(bigsdb_scheme_name)
|
||||
csv_scheme_rows.extend([(name, id, bigsdb_scheme_name) for name, id in sorted(schemes.items())])
|
||||
print("The following are the known schemes for \"{0}\", and their associated IDs:".format(bigsdb_scheme_name))
|
||||
print("\n".join(["{0}: {1}".format(name, id) for name, id, database in csv_scheme_rows]))
|
||||
if args.csv_output:
|
||||
with open(args.csv_output, "w") as csv_out_handle:
|
||||
writer = csv.writer(csv_out_handle)
|
||||
writer.writerow(("Name", "ID", "Database Name"))
|
||||
writer.writerows(csv_scheme_rows)
|
||||
print("\nscheme list output written to {0}".format(args.csv_output))
|
||||
if args.list_bigsdb_schemes:
|
||||
csv_scheme_rows = []
|
||||
for bigsdb_scheme_name in args.list_bigsdb_schemes:
|
||||
schemes = await bigsdb_index.get_schemes_for_seqdefdb(bigsdb_scheme_name)
|
||||
csv_scheme_rows.extend([(name, id, bigsdb_scheme_name) for name, id in sorted(schemes.items())])
|
||||
print("The following are the known schemes for \"{0}\", and their associated IDs:".format(bigsdb_scheme_name))
|
||||
print("\n".join(["{0}: {1}".format(name, id) for name, id, database in csv_scheme_rows]))
|
||||
if args.csv_output:
|
||||
with open(args.csv_output, "w") as csv_out_handle:
|
||||
writer = csv.writer(csv_out_handle)
|
||||
writer.writerow(("Name", "ID", "Database Name"))
|
||||
writer.writerows(csv_scheme_rows)
|
||||
print("\nscheme list output written to {0}".format(args.csv_output))
|
||||
|
||||
def run_asynchronously(args: Namespace):
|
||||
asyncio.run(run(args))
|
||||
|
@ -79,7 +79,7 @@ async def run(args: Namespace):
|
||||
if not isinstance(mlst_profiler, BIGSdbMLSTProfiler):
|
||||
raise TypeError("MLST profiler type invalid")
|
||||
mlst_profiles = mlst_profiler.profile_multiple_strings(gen_strings, args.stop_on_fail)
|
||||
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out)
|
||||
failed = await write_mlst_profiles_as_csv(mlst_profiles, args.out, await bigsdb_index.get_scheme_loci(args.seqdefdb, selected_scheme_id))
|
||||
if len(failed) > 0:
|
||||
print(f"A total of {len(failed)} IDs failed (no profile found):\n{"\n".join(failed)}")
|
||||
print(f"Completed fetching from {args.seqdefdb} for {scheme_name_lookup[selected_scheme_id]}s for {len(fastas)} sequences.")
|
||||
|
Loading…
x
Reference in New Issue
Block a user