Compare commits

...

20 Commits

Author SHA1 Message Date
21d060be6a Updated CI config to match server CI
All checks were successful
csvbyname/pipeline/head This commit looks good
2024-11-14 20:48:06 +00:00
be4d665301 Updated pipeline to take advantage of latest build container image
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-05-03 08:32:04 -05:00
9e59bc097c Fixed typo in argument help
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-23 15:28:10 -05:00
2edd8a2093 Bumped package version
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-23 14:56:48 -05:00
7a400457fe Fixed double line breaks in output on Windows
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-23 14:56:29 -05:00
59cfe486aa Bumped package version
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-23 14:44:03 -05:00
266a611fea Fixed inconsistent CSV writing function
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-23 14:42:44 -05:00
cb36b8adb3 Version bump
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 15:56:08 -05:00
ded60aa742 Added step to test if command is runnable
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 15:53:07 -05:00
adf734f3c1 Added feature to add basename column to output
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 15:52:20 -05:00
c579c172ef Bumped version number
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 13:55:57 -05:00
e5bab5b12d Fixed packaging
Some checks failed
ydeng/csvbyname/pipeline/head There was a failure building this commit
2023-04-21 13:51:12 -05:00
73ae49cb89 Bumped packacge version number
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 11:21:03 -05:00
958e2b12e3 Added 'archive' stage to Jenkins pipeline
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 11:20:03 -05:00
425ef96e9b Updated '.gitignore' to ignore 'output.csv' 2023-04-21 11:19:46 -05:00
95b60c87a5 Bumped python version in 'environment.yml' 2023-04-21 11:19:27 -05:00
ea374a971e Updated pipeline to use branch 'main' as publishing branch
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 11:11:54 -05:00
e64e1a038f Completed basic CLI program
All checks were successful
ydeng/csvbyname/pipeline/head This commit looks good
2023-04-21 11:08:46 -05:00
2f170e1088 Began implementing the file collection function 2023-04-20 23:40:39 -05:00
f09e0d27fd Added some CLI arguments and program structure 2023-04-20 16:28:54 -05:00
20 changed files with 321 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye"
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "pip3 install --user -r requirements.txt",
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}

1
.gitignore vendored
View File

@@ -212,3 +212,4 @@ pyrightconfig.json
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
output.csv

27
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,27 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Use Test Resources",
"type": "python",
"request": "launch",
"console": "integratedTerminal",
"args": [
"${workspaceFolder}/tests/resources",
"${workspaceFolder}/output.csv",
"-r",
"-n",
"-p",
"group_num:group(\\d)-\\w-\\d+\\.txt",
"group(\\d)-(?P<sect>\\w)-(?P<patid>\\d+)\\.txt",
"-V",
"DEBUG"
],
"justMyCode": true,
"module": "csvbyname.cli"
}
]
}

42
Jenkinsfile vendored Normal file
View File

@@ -0,0 +1,42 @@
pipeline {
agent {
kubernetes {
cloud 'rsys-devel'
defaultContainer 'pip'
inheritFrom 'pip'
}
}
stages {
stage("install") {
steps {
sh 'pip install -r requirements.txt'
}
}
stage("build") {
steps {
sh "python -m build"
}
}
stage("test installation") {
steps {
sh "pip install dist/*.whl"
sh "csvbyname -h"
}
}
stage("archive") {
steps {
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
}
}
stage("publish package") {
when {
branch '**/main'
}
steps {
withCredentials([usernamePassword(credentialsId: '4d6f64be-d26d-4f95-8de3-b6a9b0beb311', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
}
}
}
}
}

0
csvbyname/__init__.py Normal file
View File

95
csvbyname/cli.py Normal file
View File

@@ -0,0 +1,95 @@
import argparse
import logging
from csvbyname.generate import collect_files, write_collected_to_csv
def run(args):
logger.info('Collecting files from "%s"', args.directory)
collected, pkeys = collect_files(
args.directory,
args.include_folders,
args.entire_path,
args.recursive,
args.add_re_property,
)
write_collected_to_csv(args.output, collected, pkeys, args.output_basename)
def main():
argparser = argparse.ArgumentParser(
"csvbyname",
description="Catalogue a directory of files by patterns in their names into a "
"CSV.",
)
argparser.add_argument(
"directory",
type=str,
help="The directory containing the files to obtain catalogue names of",
metavar="i",
)
argparser.add_argument(
"output", type=str, help="The path to the catalogued CSVs.", metavar="o"
)
argparser.add_argument(
"-l",
"--include-folders",
help="Include folders in the cataloguing process",
action="store_true",
required=False,
default=False,
)
argparser.add_argument(
"-e",
"--entire-path",
help="Include the full path when applying the groupings to find properties",
action="store_true",
required=False,
default=False,
)
argparser.add_argument(
"-r",
"--recursive",
help="Catalogue recursively",
action="store_true",
required=False,
default=False,
)
argparser.add_argument(
"-p",
"--add-re-property",
help="Add a property in the resulting CSV obtained from the first capture "
"group of the given REGEX in the following format:\n property-name:regex.\n"
"Alternatively, use named REGEX groups.",
nargs="+",
type=str,
required=True
)
argparser.add_argument(
"-n",
"--output-basename",
help='Adds a column called "basename" to the resulting CSV where it is just '
"the base name of the path instead of the entire path. This is not guaranteed "
"to be unique.",
default=False,
required=False,
action="store_true",
)
argparser.add_argument(
"-V",
"--verbosity",
help="Set the verbosity of the logging",
type=str,
required=False,
default="INFO",
)
args = argparser.parse_args()
logging.basicConfig(level=args.verbosity.upper())
global logger
logger = logging.getLogger(__name__)
run(args)
if __name__ == "__main__":
main()

2
csvbyname/exceptions.py Normal file
View File

@@ -0,0 +1,2 @@
class InvalidPropertiesException(Exception):
pass

95
csvbyname/generate.py Normal file
View File

@@ -0,0 +1,95 @@
import csv
import os
import re
from typing import Iterable
from csvbyname import exceptions
import logging
logger = logging.getLogger(__name__)
def matcher(full_path: str, use_full_path: bool, regex_groups: list[str]):
captured_properties = {}
for regex_and_group in regex_groups:
match_assume_named = re.match(
regex_and_group, full_path if use_full_path else os.path.basename(full_path)
)
if match_assume_named and len(match_assume_named.groupdict()) > 0:
for group, val in match_assume_named.groupdict().items():
if group not in captured_properties:
captured_properties[group] = val
else:
raise exceptions.InvalidPropertiesException(
f'Duplicate capture group names found: "{group}"'
)
else:
unnamed_split = regex_and_group.split(":", 1)
if len(unnamed_split) < 2:
logger.debug(
'File at "%s" could not be matched by regex "%s" '
"and will be skipped",
full_path,
regex_and_group,
)
continue
group, regex = unnamed_split
unnamed_match = re.match(
regex, full_path if use_full_path else os.path.basename(full_path)
)
if unnamed_match:
if group not in captured_properties:
captured_properties[group] = unnamed_match.group(1)
else:
raise exceptions.InvalidPropertiesException(
f'Duplicate capture group names found: "{group}"'
)
return captured_properties
def collect_files(
dir_path: str,
include_folders: bool,
entire_path: bool,
recursive: bool,
regex_groups: list[str],
):
collected = {}
pkeys = set()
for item in os.listdir(dir_path):
full_path = os.path.join(dir_path, item)
if os.path.isdir(full_path):
if include_folders:
collected[full_path] = matcher(full_path, entire_path, regex_groups)
pkeys.update(collected[full_path])
if recursive:
collected = collected | collect_files(
full_path, include_folders, entire_path, recursive, regex_groups
)
elif os.path.isfile(full_path):
collected[full_path] = matcher(full_path, entire_path, regex_groups)
pkeys.update(collected[full_path])
return collected, pkeys
def write_collected_to_csv(
output_path: str,
collected: dict[str, dict[str, str]],
property_keys: Iterable[str],
output_basename: bool,
):
with open(output_path, "w", newline="", encoding="utf-8") as output_fd:
s_property_keys = sorted(property_keys)
header = ["path"]
if output_basename:
header.append("basename")
header.extend(s_property_keys)
writer = csv.writer(output_fd)
writer.writerow(header)
for full_path, properties in collected.items():
row = [full_path]
if output_basename:
row.append(os.path.basename(full_path))
row.extend(
(properties[k] if k in properties else "N/A" for k in s_property_keys)
)
writer.writerow(row)

9
environment.yml Normal file
View File

@@ -0,0 +1,9 @@
name: csvbyname
channels:
- conda-forge
dependencies:
- build=0.7.0
- pytest=7.2.2
- twine=4.0.2
- python=3.11
prefix: ./env

3
pyproject.toml Normal file
View File

@@ -0,0 +1,3 @@
[build-system]
build-backend = "setuptools.build_meta"
requires = ["setuptools", "wheel"]

3
requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
build
pytest
twine

11
setup.cfg Normal file
View File

@@ -0,0 +1,11 @@
[metadata]
name = csvbyname
version = 0.0.6
author = Harrison
[options]
packages = csvbyname
[options.entry_points]
console_scripts =
csvbyname = csvbyname.cli:main

3
setup.py Normal file
View File

@@ -0,0 +1,3 @@
from setuptools import setup
setup()

1
tests/resources/foo.txt Normal file
View File

@@ -0,0 +1 @@
Text

View File

@@ -0,0 +1 @@
Text

View File

@@ -0,0 +1,2 @@
Text

View File

@@ -0,0 +1 @@
Text

View File

@@ -0,0 +1 @@
Text

View File

@@ -0,0 +1 @@
Text

View File

@@ -0,0 +1 @@
Text