Completed basic CLI program
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
This commit is contained in:
parent
2f170e1088
commit
e64e1a038f
26
.vscode/launch.json
vendored
Normal file
26
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Use Test Resources",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceFolder}/csvbyname/csvbyname.py",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": [
|
||||||
|
"${workspaceFolder}/tests/resources",
|
||||||
|
"${workspaceFolder}/output.csv",
|
||||||
|
"-r",
|
||||||
|
"-p",
|
||||||
|
"group_num:group(\\d)-\\w-\\d+\\.txt",
|
||||||
|
"group(\\d)-(?P<sect>\\w)-(?P<patid>\\d+)\\.txt",
|
||||||
|
"-V",
|
||||||
|
"DEBUG"
|
||||||
|
],
|
||||||
|
"justMyCode": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
5
.vscode/settings.json
vendored
5
.vscode/settings.json
vendored
@ -1,6 +1,3 @@
|
|||||||
{
|
{
|
||||||
"python.formatting.provider": "none",
|
"python.formatting.provider": "black"
|
||||||
"[python]": {
|
|
||||||
"editor.defaultFormatter": "ms-python.black-formatter"
|
|
||||||
}
|
|
||||||
}
|
}
|
36
Jenkinsfile
vendored
Normal file
36
Jenkinsfile
vendored
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
pipeline {
|
||||||
|
agent any
|
||||||
|
stages {
|
||||||
|
stage("clean") {
|
||||||
|
steps {
|
||||||
|
sh 'rm -rf ./dist/*'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("install") {
|
||||||
|
steps {
|
||||||
|
sh 'mamba env update --file environment.yml'
|
||||||
|
sh 'echo "mamba activate csvbyname" >> ~/.bashrc'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("build") {
|
||||||
|
steps {
|
||||||
|
sh "python -m build"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("test") {
|
||||||
|
steps {
|
||||||
|
sh "pip install dist/*.whl"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("publish") {
|
||||||
|
when {
|
||||||
|
branch '**/master'
|
||||||
|
}
|
||||||
|
steps {
|
||||||
|
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
||||||
|
sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
0
csvbyname/__init__.py
Normal file
0
csvbyname/__init__.py
Normal file
@ -1,12 +1,48 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
import csv
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from typing import Iterable
|
||||||
|
import exceptions
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
def matcher(path: str, regex_groups: list[str]):
|
def matcher(full_path: str, use_full_path: bool, regex_groups: list[str]):
|
||||||
matches = []
|
captured_properties = {}
|
||||||
for regex in regex_groups:
|
for regex_and_group in regex_groups:
|
||||||
matches[path]
|
match_assume_named = re.match(
|
||||||
|
regex_and_group, full_path if use_full_path else os.path.basename(full_path)
|
||||||
|
)
|
||||||
|
if match_assume_named and len(match_assume_named.groupdict()) > 0:
|
||||||
|
for group, val in match_assume_named.groupdict().items():
|
||||||
|
if group not in captured_properties:
|
||||||
|
captured_properties[group] = val
|
||||||
|
else:
|
||||||
|
raise exceptions.InvalidPropertiesException(
|
||||||
|
f'Duplicate capture group names found: "{group}"'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
unnamed_split = regex_and_group.split(":", 1)
|
||||||
|
if len(unnamed_split) < 2:
|
||||||
|
logger.debug(
|
||||||
|
'File at "%s" could not be matched by regex "%s" '
|
||||||
|
"and will be skipped",
|
||||||
|
full_path,
|
||||||
|
regex_and_group,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
group, regex = unnamed_split
|
||||||
|
unnamed_match = re.match(
|
||||||
|
regex, full_path if use_full_path else os.path.basename(full_path)
|
||||||
|
)
|
||||||
|
if unnamed_match:
|
||||||
|
if group not in captured_properties:
|
||||||
|
captured_properties[group] = unnamed_match.group(1)
|
||||||
|
else:
|
||||||
|
raise exceptions.InvalidPropertiesException(
|
||||||
|
f'Duplicate capture group names found: "{group}"'
|
||||||
|
)
|
||||||
|
return captured_properties
|
||||||
|
|
||||||
|
|
||||||
def collect_files(
|
def collect_files(
|
||||||
@ -17,44 +53,53 @@ def collect_files(
|
|||||||
regex_groups: list[str],
|
regex_groups: list[str],
|
||||||
):
|
):
|
||||||
collected = {}
|
collected = {}
|
||||||
|
pkeys = set()
|
||||||
def matcher(full_path, use_full_path):
|
|
||||||
return [
|
|
||||||
re.match(
|
|
||||||
regex, full_path if use_full_path else os.path.basename(full_path)
|
|
||||||
).groups(1)
|
|
||||||
for regex in regex_groups
|
|
||||||
]
|
|
||||||
|
|
||||||
for item in os.listdir(dir_path):
|
for item in os.listdir(dir_path):
|
||||||
full_path = os.path.join(dir_path, item)
|
full_path = os.path.join(dir_path, item)
|
||||||
if os.path.isdir(full_path):
|
if os.path.isdir(full_path):
|
||||||
if include_folders:
|
if include_folders:
|
||||||
if full_path not in collected:
|
collected[full_path] = matcher(full_path, entire_path, regex_groups)
|
||||||
collected[full_path] = set()
|
pkeys.update(collected[full_path])
|
||||||
collected = collected[full_path] | matcher(full_path, entire_path)
|
if recursive:
|
||||||
collected = collected | collect_files(
|
collected = collected | collect_files(
|
||||||
full_path, include_folders, entire_path, recursive, regex_groups
|
full_path, include_folders, entire_path, recursive, regex_groups
|
||||||
)
|
)
|
||||||
elif os.path.isfile(full_path):
|
elif os.path.isfile(full_path):
|
||||||
if full_path not in collected:
|
collected[full_path] = matcher(full_path, entire_path, regex_groups)
|
||||||
collected[full_path] = set()
|
pkeys.update(collected[full_path])
|
||||||
collected = collected[full_path] | matcher(full_path, entire_path)
|
return collected, pkeys
|
||||||
|
|
||||||
|
|
||||||
def write_collected_to_csv(output_path: str, collected: dict[str, dict[str, str]]):
|
def write_collected_to_csv(
|
||||||
# TODO Finish writing collected files/paths to CSV.
|
output_path: str, collected: dict[str, dict[str, str]], property_keys: Iterable[str]
|
||||||
pass
|
):
|
||||||
|
with open(output_path, "w") as output_fd:
|
||||||
|
s_property_keys = sorted(property_keys)
|
||||||
|
header = ["path", *s_property_keys]
|
||||||
|
writer = csv.writer(output_fd)
|
||||||
|
writer.writerow(header)
|
||||||
|
for full_path, properties in collected.items():
|
||||||
|
writer.writerow(
|
||||||
|
[
|
||||||
|
full_path,
|
||||||
|
*(
|
||||||
|
properties[k] if k in properties else "N/A"
|
||||||
|
for k in s_property_keys
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def run(args):
|
def run(args):
|
||||||
collect_files(
|
logger.info('Collecting files from "%s"', args.directory)
|
||||||
|
collected, pkeys = collect_files(
|
||||||
args.directory,
|
args.directory,
|
||||||
args.include_folders,
|
args.include_folders,
|
||||||
args.entire_path,
|
args.entire_path,
|
||||||
args.recursive,
|
args.recursive,
|
||||||
args.add_regex_property,
|
args.add_re_property,
|
||||||
)
|
)
|
||||||
|
write_collected_to_csv(args.output, collected, pkeys)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -74,7 +119,7 @@ def main():
|
|||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"-l",
|
"-l",
|
||||||
"--include-folder",
|
"--include-folders",
|
||||||
help="Include folders in the cataloguing process",
|
help="Include folders in the cataloguing process",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
required=False,
|
required=False,
|
||||||
@ -98,11 +143,28 @@ def main():
|
|||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"-p",
|
"-p",
|
||||||
"--add-regex-property",
|
"--add-re-property",
|
||||||
help="Add a property in the resulting CSV obtained from the first capture "
|
help="Add a property in the resulting CSV obtained from the first capture "
|
||||||
"group of the given REGEX in the following format:\n property-name:regex",
|
"group of the given REGEX in the following format:\n property-name:regex.\n"
|
||||||
|
"Alternatively, use named REGEX groups.",
|
||||||
nargs="+",
|
nargs="+",
|
||||||
|
type=str,
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"-V",
|
||||||
|
"--verbosity",
|
||||||
|
help="Set the verbosity of the logging",
|
||||||
|
type=str,
|
||||||
|
required=False,
|
||||||
|
default="INFO",
|
||||||
)
|
)
|
||||||
|
|
||||||
args = argparser.parse_args()
|
args = argparser.parse_args()
|
||||||
|
logging.basicConfig(level=args.verbosity.upper())
|
||||||
|
global logger
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
run(args)
|
run(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
2
csvbyname/exceptions.py
Normal file
2
csvbyname/exceptions.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
class InvalidPropertiesException(Exception):
|
||||||
|
pass
|
@ -1,37 +0,0 @@
|
|||||||
name: /home/ydeng/csvbyname/env
|
|
||||||
channels:
|
|
||||||
- conda-forge
|
|
||||||
dependencies:
|
|
||||||
- _libgcc_mutex=0.1=conda_forge
|
|
||||||
- _openmp_mutex=4.5=2_gnu
|
|
||||||
- black=23.3.0=py311h38be061_0
|
|
||||||
- bzip2=1.0.8=h7f98852_4
|
|
||||||
- ca-certificates=2022.12.7=ha878542_0
|
|
||||||
- click=8.1.3=unix_pyhd8ed1ab_2
|
|
||||||
- ld_impl_linux-64=2.40=h41732ed_0
|
|
||||||
- libexpat=2.5.0=hcb278e6_1
|
|
||||||
- libffi=3.4.2=h7f98852_5
|
|
||||||
- libgcc-ng=12.2.0=h65d4601_19
|
|
||||||
- libgomp=12.2.0=h65d4601_19
|
|
||||||
- libnsl=2.0.0=h7f98852_0
|
|
||||||
- libsqlite=3.40.0=h753d276_0
|
|
||||||
- libuuid=2.38.1=h0b41bf4_0
|
|
||||||
- libzlib=1.2.13=h166bdaf_4
|
|
||||||
- mypy_extensions=1.0.0=pyha770c72_0
|
|
||||||
- ncurses=6.3=h27087fc_1
|
|
||||||
- openssl=3.1.0=h0b41bf4_0
|
|
||||||
- packaging=23.1=pyhd8ed1ab_0
|
|
||||||
- pathspec=0.11.1=pyhd8ed1ab_0
|
|
||||||
- pip=23.1=pyhd8ed1ab_0
|
|
||||||
- platformdirs=3.2.0=pyhd8ed1ab_0
|
|
||||||
- python=3.11.3=h2755cc3_0_cpython
|
|
||||||
- python_abi=3.11=3_cp311
|
|
||||||
- readline=8.2=h8228510_1
|
|
||||||
- setuptools=67.6.1=pyhd8ed1ab_0
|
|
||||||
- tk=8.6.12=h27826a3_0
|
|
||||||
- typing-extensions=4.5.0=hd8ed1ab_0
|
|
||||||
- typing_extensions=4.5.0=pyha770c72_0
|
|
||||||
- tzdata=2023c=h71feb2d_0
|
|
||||||
- wheel=0.40.0=pyhd8ed1ab_0
|
|
||||||
- xz=5.2.6=h166bdaf_0
|
|
||||||
prefix: /home/ydeng/csvbyname/env
|
|
8
environment.yml
Normal file
8
environment.yml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
name: csvbyname
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- build=0.7.0
|
||||||
|
- pytest=7.2.2
|
||||||
|
- twine=4.0.2
|
||||||
|
- python=3.9
|
3
pyproject.toml
Normal file
3
pyproject.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
[build-system]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
requires = ["setuptools", "wheel"]
|
10
setup.cfg
Normal file
10
setup.cfg
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
[metadata]
|
||||||
|
name = csvbyname
|
||||||
|
version = 0.0.1
|
||||||
|
|
||||||
|
[options]
|
||||||
|
packages = csvbyname
|
||||||
|
|
||||||
|
[options.entry_points]
|
||||||
|
console_scripts =
|
||||||
|
csvbyname = csvbyname.csvbyname:main
|
Loading…
Reference in New Issue
Block a user