generated from ydeng/python-program
Basic filtering system and tests completed
Some checks failed
ydeng/modvcfsamples/pipeline/head There was a failure building this commit
Some checks failed
ydeng/modvcfsamples/pipeline/head There was a failure building this commit
This commit is contained in:
parent
f2bdbaa0b0
commit
515d790844
17
.vscode/settings.json
vendored
Normal file
17
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"python.testing.pytestArgs": [],
|
||||||
|
"python.testing.unittestEnabled": false,
|
||||||
|
"python.testing.pytestEnabled": true,
|
||||||
|
"python.linting.pylintArgs": [
|
||||||
|
"--rcfile=setup.cfg"
|
||||||
|
],
|
||||||
|
"python.analysis.autoSearchPaths": true,
|
||||||
|
"python.analysis.extraPaths": [
|
||||||
|
"src"
|
||||||
|
],
|
||||||
|
"cSpell.words": [
|
||||||
|
"pytest",
|
||||||
|
"pyvcf",
|
||||||
|
"vcfs"
|
||||||
|
],
|
||||||
|
}
|
5
Jenkinsfile
vendored
5
Jenkinsfile
vendored
@ -9,7 +9,6 @@ pipeline {
|
|||||||
}
|
}
|
||||||
stage("unit tests") {
|
stage("unit tests") {
|
||||||
steps {
|
steps {
|
||||||
// TODO Update the
|
|
||||||
sh returnStatus: true, script: "python -m pytest --junitxml=unit_tests.xml --cov-report xml:test_coverage.xml --cov=program"
|
sh returnStatus: true, script: "python -m pytest --junitxml=unit_tests.xml --cov-report xml:test_coverage.xml --cov=program"
|
||||||
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
|
xunit checksName: '', tools: [JUnit(excludesPattern: '', pattern: 'test_results.xml', stopProcessingIfError: true)]
|
||||||
cobertura autoUpdateHealth: false, autoUpdateStability: false, coberturaReportFile: 'test_coverage.xml', failUnhealthy: false, failUnstable: false, maxNumberOfBuilds: 64, lineCoverageTargets: '50, 0, 0', methodCoverageTargets: '50, 0, 0', onlyStable: false, sourceEncoding: 'ASCII', zoomCoverageChart: false
|
cobertura autoUpdateHealth: false, autoUpdateStability: false, coberturaReportFile: 'test_coverage.xml', failUnhealthy: false, failUnstable: false, maxNumberOfBuilds: 64, lineCoverageTargets: '50, 0, 0', methodCoverageTargets: '50, 0, 0', onlyStable: false, sourceEncoding: 'ASCII', zoomCoverageChart: false
|
||||||
@ -18,7 +17,7 @@ pipeline {
|
|||||||
stage("build") {
|
stage("build") {
|
||||||
steps {
|
steps {
|
||||||
sh "python -m build"
|
sh "python -m build"
|
||||||
// Additional build steps go here
|
// TODO Additional build steps go here
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("test installation") {
|
stage("test installation") {
|
||||||
@ -30,7 +29,7 @@ pipeline {
|
|||||||
stage("archive") {
|
stage("archive") {
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl'
|
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl'
|
||||||
// Additional archival or documentation steps go here
|
// TODO Additional archival or documentation steps go here
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("publish") {
|
stage("publish") {
|
||||||
|
@ -8,3 +8,4 @@ dependencies:
|
|||||||
- pytest
|
- pytest
|
||||||
- twine
|
- twine
|
||||||
- sphinx
|
- sphinx
|
||||||
|
- pyvcf
|
||||||
|
14
setup.cfg
14
setup.cfg
@ -1,17 +1,17 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
name = sample
|
name = modvcfsamples
|
||||||
version = 0.0.1
|
version = 0.0.1
|
||||||
|
|
||||||
[options]
|
[options]
|
||||||
package_dir =
|
package_dir =
|
||||||
= src
|
= ./src
|
||||||
# install_requires =
|
install_requires =
|
||||||
# cachier ==2.1
|
pyvcf ==0.6.8
|
||||||
|
|
||||||
|
|
||||||
# [options.entry_points]
|
[options.entry_points]
|
||||||
# console_scripts =
|
console_scripts =
|
||||||
# avariantas = program.program:entry_function
|
modvcfsamples = modvcfsamples.cli:main
|
||||||
|
|
||||||
[tool:pytest]
|
[tool:pytest]
|
||||||
pythonpath = src
|
pythonpath = src
|
||||||
|
39
src/modvcfsamples/cli.py
Normal file
39
src/modvcfsamples/cli.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from modvcfsamples import sample
|
||||||
|
|
||||||
|
def run(vcfs: list[str], only: list[str], output_dir: str):
|
||||||
|
for vcf in vcfs:
|
||||||
|
vcf_records = sample.get_records_from_vcf(vcf)
|
||||||
|
sample.filter_all_sample_datatypes(vcf_records, *only)
|
||||||
|
sample.write_records_to_vcf(vcf_records, os.path.join(output_dir, os.path.basename(vcf)))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"vcfs",
|
||||||
|
help="The VCFs to run filtering on",
|
||||||
|
nargs="+",
|
||||||
|
metavar="I",
|
||||||
|
type=str
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"output_dir",
|
||||||
|
help="The output directory",
|
||||||
|
metavar="O",
|
||||||
|
type=str
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--only",
|
||||||
|
"-n",
|
||||||
|
help="Remove everything but the sample datatype",
|
||||||
|
action="append",
|
||||||
|
type=str
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
run(args.vcfs, args.only, args.output_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
34
src/modvcfsamples/sample.py
Normal file
34
src/modvcfsamples/sample.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import vcf
|
||||||
|
from collections import namedtuple
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
def get_records_from_vcf(path: str):
|
||||||
|
vcf_records = []
|
||||||
|
with open(path, "r") as vcf_stream:
|
||||||
|
reader = vcf.Reader(vcf_stream)
|
||||||
|
for record in reader:
|
||||||
|
vcf_records.append(record)
|
||||||
|
|
||||||
|
return vcf_records
|
||||||
|
|
||||||
|
def filter_sample_datatype(record, *datatypes: str):
|
||||||
|
call_data = namedtuple("Data", *datatypes)
|
||||||
|
filtered_calls = []
|
||||||
|
modified_record = deepcopy(record)
|
||||||
|
for call in record.samples:
|
||||||
|
kept_data = {}
|
||||||
|
for datatype in datatypes:
|
||||||
|
kept_data[datatype] = call[datatype]
|
||||||
|
filtered_calls.append(call_data(**kept_data))
|
||||||
|
modified_record.samples = filtered_calls
|
||||||
|
return modified_record
|
||||||
|
|
||||||
|
def filter_all_sample_datatypes(records: list, *datatypes: str):
|
||||||
|
for record in records:
|
||||||
|
yield filter_sample_datatype(record, *datatypes)
|
||||||
|
|
||||||
|
def write_records_to_vcf(records: list, path: str):
|
||||||
|
with open(path, "w") as vcf_stream:
|
||||||
|
writer = vcf.Writer(vcf_stream, records[0])
|
||||||
|
for record in records:
|
||||||
|
writer.write_record(record)
|
@ -1 +0,0 @@
|
|||||||
# TODO Do stuff!
|
|
38
tests/modvcfsamples/test_sample.py
Normal file
38
tests/modvcfsamples/test_sample.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from modvcfsamples.sample import filter_sample_datatype, filter_all_sample_datatypes, get_records_from_vcf
|
||||||
|
import os
|
||||||
|
|
||||||
|
def test_get_records_from_vcf_not_none():
|
||||||
|
records = get_records_from_vcf(os.path.abspath("tests/resources/test_files_shortened.vcf"))
|
||||||
|
assert len(records) > 0
|
||||||
|
|
||||||
|
def test_filter_sample_datatype_not_none():
|
||||||
|
records = get_records_from_vcf(os.path.abspath("tests/resources/test_files_shortened.vcf"))
|
||||||
|
filter_for = ["GT"]
|
||||||
|
modified_record = filter_sample_datatype(records[0], *filter_for)
|
||||||
|
assert modified_record is not None
|
||||||
|
|
||||||
|
def test_filter_sample_datatype_only_filtered():
|
||||||
|
records = get_records_from_vcf(os.path.abspath("tests/resources/test_files_shortened.vcf"))
|
||||||
|
filter_for = ["GT"]
|
||||||
|
modified_record = filter_sample_datatype(records[0], *filter_for)
|
||||||
|
for sample in modified_record.samples:
|
||||||
|
assert len(sample) <= len(filter_for)
|
||||||
|
for key, _ in sample._asdict().items():
|
||||||
|
assert key in filter_for
|
||||||
|
|
||||||
|
def test_filter_all_sample_datatypes_not_empty():
|
||||||
|
records = get_records_from_vcf(os.path.abspath("tests/resources/test_files_shortened.vcf"))
|
||||||
|
filter_for = ["GT"]
|
||||||
|
modified_records = list(filter_all_sample_datatypes(records, *filter_for))
|
||||||
|
assert len(modified_records) == 11
|
||||||
|
|
||||||
|
def test_filter_all_sample_datatypes_filtered():
|
||||||
|
records = get_records_from_vcf(os.path.abspath("tests/resources/test_files_shortened.vcf"))
|
||||||
|
filter_for = ["GT"]
|
||||||
|
modified_records = list(filter_all_sample_datatypes(records, *filter_for))
|
||||||
|
for modified_record in modified_records:
|
||||||
|
for sample in modified_record.samples:
|
||||||
|
assert len(sample) <= len(filter_for)
|
||||||
|
for key, _ in sample._asdict().items():
|
||||||
|
assert key in filter_for
|
||||||
|
|
@ -1,2 +0,0 @@
|
|||||||
# TODO Test program!
|
|
||||||
|
|
17
tests/resources/test_files_shortened.vcf
Normal file
17
tests/resources/test_files_shortened.vcf
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
##fileformat=VCFv4.1
|
||||||
|
##fileDate=10122015_22h01m13s
|
||||||
|
##source=SHAPEIT2.v837
|
||||||
|
##log_file=shapeit_10122015_22h01m13s_3f764d75-2fbb-42df-ab75-8c2dfd5731ce.log
|
||||||
|
##FORMAT=<ID=GT,Number=1,Type=String,Description="Phased Genotype">
|
||||||
|
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Gambian Chinese French Brazilian Nigerian Pakistani English Colombian Indian Japanese
|
||||||
|
1 846808 rs4475691 C T 100 . AC=1276;AN=5008;DP=20368 GT:GQ:DP 1|1:70:60 0|0:30:10 0|0:20:40 ./.:0:0 0|0:70:60 0|0:90:30 0|0:80:70 0|0:50:80 ./.:0:0 0|0:100:80
|
||||||
|
1 846854 rs111957712 G A 100 . AC=114;AN=5008;DP=20538 GT:GQ:DP ./.:0:0 0|0:40:30 0|0:80:30 0|0:0:20 0|0:40:80 0|0:10:30 ./.:0:0 ./.:0:0 0|0:70:0 0|0:90:30
|
||||||
|
1 846864 rs950122 G C 100 . AC=1116;AN=5008;DP=20582 GT:GQ:DP 1|1:60:0 0|0:20:0 0|0:90:70 0|0:80:50 0|0:80:30 0|0:10:80 0|0:100:20 0|0:30:40 ./.:0:0 0|0:30:90
|
||||||
|
1 847228 rs3905286 C T 100 . AC=1215;AN=5008;DP=20731 GT:GQ:DP 1|1:70:80 0|0:90:80 0|0:60:50 0|0:60:90 0|0:30:40 0|0:70:10 0|0:100:80 0|0:100:50 0|1:70:100 0|0:40:40
|
||||||
|
1 847297 rs11507768 G A 100 . AC=359;AN=5008;DP=20809 GT:GQ:DP 1|0:0:60 0|0:10:30 0|0:80:60 ./.:0:0 0|0:10:100 0|0:10:100 ./.:0:0 0|0:100:40 0|0:20:20 0|0:10:0
|
||||||
|
1 847491 rs28407778 G A 100 . AC=1262;AN=5008;DP=16939 GT:GQ:DP 1|1:0:100 0|0:70:40 0|0:0:60 0|0:90:90 0|0:90:90 ./.:0:0 0|0:70:60 0|0:70:0 0|1:90:100 0|0:60:60
|
||||||
|
1 848023 rs144407116 C A 100 . AC=52;AN=5008;DP=22562 GT:GQ:DP 0|1:10:60 0|0:20:80 0|0:20:50 0|0:20:90 0|0:90:10 0|0:70:60 0|0:40:30 0|0:60:0 0|0:40:40 0|0:10:10
|
||||||
|
1 848090 rs4246505 G A 100 . AC=857;AN=5008;DP=19301 GT:GQ:DP 0|0:70:70 0|0:90:30 0|0:40:10 0|0:80:20 0|0:50:50 0|0:10:30 ./.:0:0 0|0:20:60 ./.:0:0 0|0:90:0
|
||||||
|
1 848445 rs4626817 G A 100 . AC=1255;AN=5008;DP=18444 GT:GQ:DP 1|1:100:40 0|0:80:90 0|0:30:100 0|0:100:60 0|0:40:90 0|0:20:30 0|0:70:100 ./.:0:0 ./.:0:0 0|0:80:30
|
||||||
|
1 848456 rs11507767 A G 100 . AC=1266;AN=5008;DP=18137 GT:GQ:DP 1|1:40:30 ./.:0:0 0|0:60:90 0|0:60:40 0|0:100:80 0|0:50:50 0|0:0:10 0|0:60:0 0|1:100:100 ./.:0:0
|
||||||
|
1 848738 rs3829741 C T 100 . AC=855;AN=5008;DP=16663 GT:GQ:DP 0|0:50:90 0|0:50:50 0|0:50:30 0|0:60:60 0|0:80:40 0|0:50:80 0|0:0:80 0|0:0:30 0|1:10:0 0|0:70:30
|
Loading…
Reference in New Issue
Block a user