Compare commits
20 Commits
a338f78d07
...
develop
Author | SHA1 | Date | |
---|---|---|---|
21d060be6a | |||
be4d665301 | |||
9e59bc097c | |||
2edd8a2093 | |||
7a400457fe | |||
59cfe486aa | |||
266a611fea | |||
cb36b8adb3 | |||
ded60aa742 | |||
adf734f3c1 | |||
c579c172ef | |||
e5bab5b12d | |||
73ae49cb89 | |||
958e2b12e3 | |||
425ef96e9b | |||
95b60c87a5 | |||
ea374a971e | |||
e64e1a038f | |||
2f170e1088 | |||
f09e0d27fd |
22
.devcontainer/devcontainer.json
Normal file
22
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||||
|
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||||
|
{
|
||||||
|
"name": "Python 3",
|
||||||
|
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||||
|
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye"
|
||||||
|
|
||||||
|
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||||
|
// "features": {},
|
||||||
|
|
||||||
|
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||||
|
// "forwardPorts": [],
|
||||||
|
|
||||||
|
// Use 'postCreateCommand' to run commands after the container is created.
|
||||||
|
// "postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||||
|
|
||||||
|
// Configure tool-specific properties.
|
||||||
|
// "customizations": {},
|
||||||
|
|
||||||
|
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||||
|
// "remoteUser": "root"
|
||||||
|
}
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -212,3 +212,4 @@ pyrightconfig.json
|
|||||||
|
|
||||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||||
|
|
||||||
|
output.csv
|
||||||
|
27
.vscode/launch.json
vendored
Normal file
27
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Use Test Resources",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": [
|
||||||
|
"${workspaceFolder}/tests/resources",
|
||||||
|
"${workspaceFolder}/output.csv",
|
||||||
|
"-r",
|
||||||
|
"-n",
|
||||||
|
"-p",
|
||||||
|
"group_num:group(\\d)-\\w-\\d+\\.txt",
|
||||||
|
"group(\\d)-(?P<sect>\\w)-(?P<patid>\\d+)\\.txt",
|
||||||
|
"-V",
|
||||||
|
"DEBUG"
|
||||||
|
],
|
||||||
|
"justMyCode": true,
|
||||||
|
"module": "csvbyname.cli"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
42
Jenkinsfile
vendored
Normal file
42
Jenkinsfile
vendored
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
kubernetes {
|
||||||
|
cloud 'rsys-devel'
|
||||||
|
defaultContainer 'pip'
|
||||||
|
inheritFrom 'pip'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage("install") {
|
||||||
|
steps {
|
||||||
|
sh 'pip install -r requirements.txt'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("build") {
|
||||||
|
steps {
|
||||||
|
sh "python -m build"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("test installation") {
|
||||||
|
steps {
|
||||||
|
sh "pip install dist/*.whl"
|
||||||
|
sh "csvbyname -h"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("archive") {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("publish package") {
|
||||||
|
when {
|
||||||
|
branch '**/main'
|
||||||
|
}
|
||||||
|
steps {
|
||||||
|
withCredentials([usernamePassword(credentialsId: '4d6f64be-d26d-4f95-8de3-b6a9b0beb311', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
||||||
|
sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
0
csvbyname/__init__.py
Normal file
0
csvbyname/__init__.py
Normal file
95
csvbyname/cli.py
Normal file
95
csvbyname/cli.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from csvbyname.generate import collect_files, write_collected_to_csv
|
||||||
|
|
||||||
|
|
||||||
|
def run(args):
|
||||||
|
logger.info('Collecting files from "%s"', args.directory)
|
||||||
|
collected, pkeys = collect_files(
|
||||||
|
args.directory,
|
||||||
|
args.include_folders,
|
||||||
|
args.entire_path,
|
||||||
|
args.recursive,
|
||||||
|
args.add_re_property,
|
||||||
|
)
|
||||||
|
write_collected_to_csv(args.output, collected, pkeys, args.output_basename)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
argparser = argparse.ArgumentParser(
|
||||||
|
"csvbyname",
|
||||||
|
description="Catalogue a directory of files by patterns in their names into a "
|
||||||
|
"CSV.",
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"directory",
|
||||||
|
type=str,
|
||||||
|
help="The directory containing the files to obtain catalogue names of",
|
||||||
|
metavar="i",
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"output", type=str, help="The path to the catalogued CSVs.", metavar="o"
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"-l",
|
||||||
|
"--include-folders",
|
||||||
|
help="Include folders in the cataloguing process",
|
||||||
|
action="store_true",
|
||||||
|
required=False,
|
||||||
|
default=False,
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"-e",
|
||||||
|
"--entire-path",
|
||||||
|
help="Include the full path when applying the groupings to find properties",
|
||||||
|
action="store_true",
|
||||||
|
required=False,
|
||||||
|
default=False,
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"-r",
|
||||||
|
"--recursive",
|
||||||
|
help="Catalogue recursively",
|
||||||
|
action="store_true",
|
||||||
|
required=False,
|
||||||
|
default=False,
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"-p",
|
||||||
|
"--add-re-property",
|
||||||
|
help="Add a property in the resulting CSV obtained from the first capture "
|
||||||
|
"group of the given REGEX in the following format:\n property-name:regex.\n"
|
||||||
|
"Alternatively, use named REGEX groups.",
|
||||||
|
nargs="+",
|
||||||
|
type=str,
|
||||||
|
required=True
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"-n",
|
||||||
|
"--output-basename",
|
||||||
|
help='Adds a column called "basename" to the resulting CSV where it is just '
|
||||||
|
"the base name of the path instead of the entire path. This is not guaranteed "
|
||||||
|
"to be unique.",
|
||||||
|
default=False,
|
||||||
|
required=False,
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"-V",
|
||||||
|
"--verbosity",
|
||||||
|
help="Set the verbosity of the logging",
|
||||||
|
type=str,
|
||||||
|
required=False,
|
||||||
|
default="INFO",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = argparser.parse_args()
|
||||||
|
logging.basicConfig(level=args.verbosity.upper())
|
||||||
|
global logger
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
run(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
2
csvbyname/exceptions.py
Normal file
2
csvbyname/exceptions.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
class InvalidPropertiesException(Exception):
|
||||||
|
pass
|
95
csvbyname/generate.py
Normal file
95
csvbyname/generate.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Iterable
|
||||||
|
from csvbyname import exceptions
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def matcher(full_path: str, use_full_path: bool, regex_groups: list[str]):
|
||||||
|
captured_properties = {}
|
||||||
|
for regex_and_group in regex_groups:
|
||||||
|
match_assume_named = re.match(
|
||||||
|
regex_and_group, full_path if use_full_path else os.path.basename(full_path)
|
||||||
|
)
|
||||||
|
if match_assume_named and len(match_assume_named.groupdict()) > 0:
|
||||||
|
for group, val in match_assume_named.groupdict().items():
|
||||||
|
if group not in captured_properties:
|
||||||
|
captured_properties[group] = val
|
||||||
|
else:
|
||||||
|
raise exceptions.InvalidPropertiesException(
|
||||||
|
f'Duplicate capture group names found: "{group}"'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
unnamed_split = regex_and_group.split(":", 1)
|
||||||
|
if len(unnamed_split) < 2:
|
||||||
|
logger.debug(
|
||||||
|
'File at "%s" could not be matched by regex "%s" '
|
||||||
|
"and will be skipped",
|
||||||
|
full_path,
|
||||||
|
regex_and_group,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
group, regex = unnamed_split
|
||||||
|
unnamed_match = re.match(
|
||||||
|
regex, full_path if use_full_path else os.path.basename(full_path)
|
||||||
|
)
|
||||||
|
if unnamed_match:
|
||||||
|
if group not in captured_properties:
|
||||||
|
captured_properties[group] = unnamed_match.group(1)
|
||||||
|
else:
|
||||||
|
raise exceptions.InvalidPropertiesException(
|
||||||
|
f'Duplicate capture group names found: "{group}"'
|
||||||
|
)
|
||||||
|
return captured_properties
|
||||||
|
|
||||||
|
|
||||||
|
def collect_files(
|
||||||
|
dir_path: str,
|
||||||
|
include_folders: bool,
|
||||||
|
entire_path: bool,
|
||||||
|
recursive: bool,
|
||||||
|
regex_groups: list[str],
|
||||||
|
):
|
||||||
|
collected = {}
|
||||||
|
pkeys = set()
|
||||||
|
for item in os.listdir(dir_path):
|
||||||
|
full_path = os.path.join(dir_path, item)
|
||||||
|
if os.path.isdir(full_path):
|
||||||
|
if include_folders:
|
||||||
|
collected[full_path] = matcher(full_path, entire_path, regex_groups)
|
||||||
|
pkeys.update(collected[full_path])
|
||||||
|
if recursive:
|
||||||
|
collected = collected | collect_files(
|
||||||
|
full_path, include_folders, entire_path, recursive, regex_groups
|
||||||
|
)
|
||||||
|
elif os.path.isfile(full_path):
|
||||||
|
collected[full_path] = matcher(full_path, entire_path, regex_groups)
|
||||||
|
pkeys.update(collected[full_path])
|
||||||
|
return collected, pkeys
|
||||||
|
|
||||||
|
|
||||||
|
def write_collected_to_csv(
|
||||||
|
output_path: str,
|
||||||
|
collected: dict[str, dict[str, str]],
|
||||||
|
property_keys: Iterable[str],
|
||||||
|
output_basename: bool,
|
||||||
|
):
|
||||||
|
with open(output_path, "w", newline="", encoding="utf-8") as output_fd:
|
||||||
|
s_property_keys = sorted(property_keys)
|
||||||
|
header = ["path"]
|
||||||
|
if output_basename:
|
||||||
|
header.append("basename")
|
||||||
|
header.extend(s_property_keys)
|
||||||
|
writer = csv.writer(output_fd)
|
||||||
|
writer.writerow(header)
|
||||||
|
for full_path, properties in collected.items():
|
||||||
|
row = [full_path]
|
||||||
|
if output_basename:
|
||||||
|
row.append(os.path.basename(full_path))
|
||||||
|
row.extend(
|
||||||
|
(properties[k] if k in properties else "N/A" for k in s_property_keys)
|
||||||
|
)
|
||||||
|
writer.writerow(row)
|
9
environment.yml
Normal file
9
environment.yml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
name: csvbyname
|
||||||
|
channels:
|
||||||
|
- conda-forge
|
||||||
|
dependencies:
|
||||||
|
- build=0.7.0
|
||||||
|
- pytest=7.2.2
|
||||||
|
- twine=4.0.2
|
||||||
|
- python=3.11
|
||||||
|
prefix: ./env
|
3
pyproject.toml
Normal file
3
pyproject.toml
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[build-system]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
requires = ["setuptools", "wheel"]
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
build
|
||||||
|
pytest
|
||||||
|
twine
|
11
setup.cfg
Normal file
11
setup.cfg
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[metadata]
|
||||||
|
name = csvbyname
|
||||||
|
version = 0.0.6
|
||||||
|
author = Harrison
|
||||||
|
|
||||||
|
[options]
|
||||||
|
packages = csvbyname
|
||||||
|
|
||||||
|
[options.entry_points]
|
||||||
|
console_scripts =
|
||||||
|
csvbyname = csvbyname.cli:main
|
1
tests/resources/foo.txt
Normal file
1
tests/resources/foo.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Text
|
1
tests/resources/group1-a-11.txt
Normal file
1
tests/resources/group1-a-11.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Text
|
2
tests/resources/group1-a-12.txt
Normal file
2
tests/resources/group1-a-12.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
|
||||||
|
Text
|
1
tests/resources/group1-a-13.txt
Normal file
1
tests/resources/group1-a-13.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Text
|
1
tests/resources/group1-b-10.txt
Normal file
1
tests/resources/group1-b-10.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Text
|
1
tests/resources/group1-b-11.txt
Normal file
1
tests/resources/group1-b-11.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Text
|
1
tests/resources/group1-b-9.txt
Normal file
1
tests/resources/group1-b-9.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Text
|
Reference in New Issue
Block a user