Completed basic CLI program
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good

This commit is contained in:
Harrison Deng 2023-04-21 11:03:28 -05:00
parent 2f170e1088
commit 39864b2ce6
11 changed files with 181 additions and 71 deletions

26
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,26 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Use Test Resources",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/csvbyname/csvbyname.py",
"console": "integratedTerminal",
"args": [
"${workspaceFolder}/tests/resources",
"${workspaceFolder}/output.csv",
"-r",
"-p",
"group_num:group(\\d)-\\w-\\d+\\.txt",
"group(\\d)-(?P<sect>\\w)-(?P<patid>\\d+)\\.txt",
"-V",
"DEBUG"
],
"justMyCode": true
}
]
}

View File

@ -1,6 +1,3 @@
{ {
"python.formatting.provider": "none", "python.formatting.provider": "black"
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
}
} }

36
Jenkinsfile vendored Normal file
View File

@ -0,0 +1,36 @@
pipeline {
agent any
stages {
stage("clean") {
steps {
sh 'rm -rf ./dist/*'
}
}
stage("install") {
steps {
sh 'mamba env update --file environment.yml'
sh 'echo "mamba activate csvbyname" >> ~/.bashrc'
}
}
stage("build") {
steps {
sh "python -m build"
}
}
stage("test") {
steps {
sh "pip install dist/*.whl"
}
}
stage("publish") {
when {
branch '**/master'
}
steps {
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
}
}
}
}
}

0
csvbyname/__init__.py Normal file
View File

View File

@ -1,12 +1,48 @@
import argparse import argparse
import csv
import os import os
import re import re
from typing import Iterable
import exceptions
import logging
def matcher(path: str, regex_groups: list[str]): def matcher(full_path: str, use_full_path: bool, regex_groups: list[str]):
matches = [] captured_properties = {}
for regex in regex_groups: for regex_and_group in regex_groups:
matches[path] match_assume_named = re.match(
regex_and_group, full_path if use_full_path else os.path.basename(full_path)
)
if match_assume_named and len(match_assume_named.groupdict()) > 0:
for group, val in match_assume_named.groupdict().items():
if group not in captured_properties:
captured_properties[group] = val
else:
raise exceptions.InvalidPropertiesException(
f'Duplicate capture group names found: "{group}"'
)
else:
unnamed_split = regex_and_group.split(":", 1)
if len(unnamed_split) < 2:
logger.debug(
'File at "%s" could not be matched by regex "%s" '
"and will be skipped",
full_path,
regex_and_group,
)
continue
group, regex = unnamed_split
unnamed_match = re.match(
regex, full_path if use_full_path else os.path.basename(full_path)
)
if unnamed_match:
if group not in captured_properties:
captured_properties[group] = unnamed_match.group(1)
else:
raise exceptions.InvalidPropertiesException(
f'Duplicate capture group names found: "{group}"'
)
return captured_properties
def collect_files( def collect_files(
@ -17,44 +53,53 @@ def collect_files(
regex_groups: list[str], regex_groups: list[str],
): ):
collected = {} collected = {}
pkeys = set()
def matcher(full_path, use_full_path):
return [
re.match(
regex, full_path if use_full_path else os.path.basename(full_path)
).groups(1)
for regex in regex_groups
]
for item in os.listdir(dir_path): for item in os.listdir(dir_path):
full_path = os.path.join(dir_path, item) full_path = os.path.join(dir_path, item)
if os.path.isdir(full_path): if os.path.isdir(full_path):
if include_folders: if include_folders:
if full_path not in collected: collected[full_path] = matcher(full_path, entire_path, regex_groups)
collected[full_path] = set() pkeys.update(collected[full_path])
collected = collected[full_path] | matcher(full_path, entire_path) if recursive:
collected = collected | collect_files( collected = collected | collect_files(
full_path, include_folders, entire_path, recursive, regex_groups full_path, include_folders, entire_path, recursive, regex_groups
) )
elif os.path.isfile(full_path): elif os.path.isfile(full_path):
if full_path not in collected: collected[full_path] = matcher(full_path, entire_path, regex_groups)
collected[full_path] = set() pkeys.update(collected[full_path])
collected = collected[full_path] | matcher(full_path, entire_path) return collected, pkeys
def write_collected_to_csv(output_path: str, collected: dict[str, dict[str, str]]): def write_collected_to_csv(
# TODO Finish writing collected files/paths to CSV. output_path: str, collected: dict[str, dict[str, str]], property_keys: Iterable[str]
pass ):
with open(output_path, "w") as output_fd:
s_property_keys = sorted(property_keys)
header = ["path", *s_property_keys]
writer = csv.writer(output_fd)
writer.writerow(header)
for full_path, properties in collected.items():
writer.writerow(
[
full_path,
*(
properties[k] if k in properties else "N/A"
for k in s_property_keys
),
]
)
def run(args): def run(args):
collect_files( logger.info('Collecting files from "%s"', args.directory)
collected, pkeys = collect_files(
args.directory, args.directory,
args.include_folders, args.include_folders,
args.entire_path, args.entire_path,
args.recursive, args.recursive,
args.add_regex_property, args.add_re_property,
) )
write_collected_to_csv(args.output, collected, pkeys)
def main(): def main():
@ -74,7 +119,7 @@ def main():
) )
argparser.add_argument( argparser.add_argument(
"-l", "-l",
"--include-folder", "--include-folders",
help="Include folders in the cataloguing process", help="Include folders in the cataloguing process",
action="store_true", action="store_true",
required=False, required=False,
@ -98,11 +143,28 @@ def main():
) )
argparser.add_argument( argparser.add_argument(
"-p", "-p",
"--add-regex-property", "--add-re-property",
help="Add a property in the resulting CSV obtained from the first capture " help="Add a property in the resulting CSV obtained from the first capture "
"group of the given REGEX in the following format:\n property-name:regex", "group of the given REGEX in the following format:\n property-name:regex.\n"
"Alternatively, use named REGEX groups.",
nargs="+", nargs="+",
type=str,
)
argparser.add_argument(
"-V",
"--verbosity",
help="Set the verbosity of the logging",
type=str,
required=False,
default="INFO",
) )
args = argparser.parse_args() args = argparser.parse_args()
logging.basicConfig(level=args.verbosity.upper())
global logger
logger = logging.getLogger(__name__)
run(args) run(args)
if __name__ == "__main__":
main()

2
csvbyname/exceptions.py Normal file
View File

@ -0,0 +1,2 @@
class InvalidPropertiesException(Exception):
pass

View File

@ -1,37 +0,0 @@
name: /home/ydeng/csvbyname/env
channels:
- conda-forge
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- black=23.3.0=py311h38be061_0
- bzip2=1.0.8=h7f98852_4
- ca-certificates=2022.12.7=ha878542_0
- click=8.1.3=unix_pyhd8ed1ab_2
- ld_impl_linux-64=2.40=h41732ed_0
- libexpat=2.5.0=hcb278e6_1
- libffi=3.4.2=h7f98852_5
- libgcc-ng=12.2.0=h65d4601_19
- libgomp=12.2.0=h65d4601_19
- libnsl=2.0.0=h7f98852_0
- libsqlite=3.40.0=h753d276_0
- libuuid=2.38.1=h0b41bf4_0
- libzlib=1.2.13=h166bdaf_4
- mypy_extensions=1.0.0=pyha770c72_0
- ncurses=6.3=h27087fc_1
- openssl=3.1.0=h0b41bf4_0
- packaging=23.1=pyhd8ed1ab_0
- pathspec=0.11.1=pyhd8ed1ab_0
- pip=23.1=pyhd8ed1ab_0
- platformdirs=3.2.0=pyhd8ed1ab_0
- python=3.11.3=h2755cc3_0_cpython
- python_abi=3.11=3_cp311
- readline=8.2=h8228510_1
- setuptools=67.6.1=pyhd8ed1ab_0
- tk=8.6.12=h27826a3_0
- typing-extensions=4.5.0=hd8ed1ab_0
- typing_extensions=4.5.0=pyha770c72_0
- tzdata=2023c=h71feb2d_0
- wheel=0.40.0=pyhd8ed1ab_0
- xz=5.2.6=h166bdaf_0
prefix: /home/ydeng/csvbyname/env

8
environment.yml Normal file
View File

@ -0,0 +1,8 @@
name: csvbyname
channels:
- conda-forge
dependencies:
- build=0.7.0
- pytest=7.2.2
- twine=4.0.2
- python=3.9

3
pyproject.toml Normal file
View File

@ -0,0 +1,3 @@
[build-system]
build-backend = "setuptools.build_meta"
requires = ["setuptools", "wheel"]

10
setup.cfg Normal file
View File

@ -0,0 +1,10 @@
[metadata]
name = csvbyname
version = 0.0.2
[options]
packages = csvbyname
[options.entry_points]
console_scripts =
csvbyname = csvbyname.csvbyname:main

3
setup.py Normal file
View File

@ -0,0 +1,3 @@
from setuptools import setup
setup()