Updated CI config to match server CI

Updated pipeline to take advantage of latest build container image
Fixed typo in argument help
2024-11-14 20:48:06 +00:00 · 2023-05-03 08:32:04 -05:00 · 2023-04-23 15:28:10 -05:00 · 2023-04-23 14:56:48 -05:00 · 2023-04-23 14:56:29 -05:00 · 2023-04-23 14:44:03 -05:00
20 changed files with 321 additions and 0 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,22 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/python
+{
+	"name": "Python 3",
+	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+	"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye"
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Use 'postCreateCommand' to run commands after the container is created.
+	// "postCreateCommand": "pip3 install --user -r requirements.txt",
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "root"
+}
--- a/.gitignore
+++ b/.gitignore
@@ -212,3 +212,4 @@ pyrightconfig.json

 # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)

+output.csv
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,27 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Use Test Resources",
+            "type": "python",
+            "request": "launch",
+            "console": "integratedTerminal",
+            "args": [
+                "${workspaceFolder}/tests/resources",
+                "${workspaceFolder}/output.csv",
+                "-r",
+                "-n",
+                "-p",
+                "group_num:group(\\d)-\\w-\\d+\\.txt",
+                "group(\\d)-(?P<sect>\\w)-(?P<patid>\\d+)\\.txt",
+                "-V",
+                "DEBUG"
+            ],
+            "justMyCode": true,
+            "module": "csvbyname.cli"
+        }
+    ]
+}
--- a/42
+++ b/42
@@ -0,0 +1,42 @@
+pipeline {
+    agent {
+        kubernetes {
+            cloud 'rsys-devel'
+            defaultContainer 'pip'
+            inheritFrom 'pip'
+        }
+    }
+    stages {
+        stage("install") {
+            steps {
+                sh 'pip install -r requirements.txt'
+            }
+        }
+        stage("build") {
+            steps {
+                sh "python -m build"
+            }
+        }
+        stage("test installation") {
+            steps {
+                sh "pip install dist/*.whl"
+                sh "csvbyname -h"
+            }
+        }
+        stage("archive") {
+            steps {
+                archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
+            }
+        }
+        stage("publish package") {
+            when {
+                branch '**/main'
+            }
+            steps {
+                withCredentials([usernamePassword(credentialsId: '4d6f64be-d26d-4f95-8de3-b6a9b0beb311', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
+                    sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
+                }
+            }
+        }
+    }
+}
--- a/csvbyname/init.py
+++ b/csvbyname/init.py
--- a/csvbyname/cli.py
+++ b/csvbyname/cli.py
@@ -0,0 +1,95 @@
+import argparse
+import logging
+
+from csvbyname.generate import collect_files, write_collected_to_csv
+
+
+def run(args):
+    logger.info('Collecting files from "%s"', args.directory)
+    collected, pkeys = collect_files(
+        args.directory,
+        args.include_folders,
+        args.entire_path,
+        args.recursive,
+        args.add_re_property,
+    )
+    write_collected_to_csv(args.output, collected, pkeys, args.output_basename)
+
+
+def main():
+    argparser = argparse.ArgumentParser(
+        "csvbyname",
+        description="Catalogue a directory of files by patterns in their names into a "
+        "CSV.",
+    )
+    argparser.add_argument(
+        "directory",
+        type=str,
+        help="The directory containing the files to obtain catalogue names of",
+        metavar="i",
+    )
+    argparser.add_argument(
+        "output", type=str, help="The path to the catalogued CSVs.", metavar="o"
+    )
+    argparser.add_argument(
+        "-l",
+        "--include-folders",
+        help="Include folders in the cataloguing process",
+        action="store_true",
+        required=False,
+        default=False,
+    )
+    argparser.add_argument(
+        "-e",
+        "--entire-path",
+        help="Include the full path when applying the groupings to find properties",
+        action="store_true",
+        required=False,
+        default=False,
+    )
+    argparser.add_argument(
+        "-r",
+        "--recursive",
+        help="Catalogue recursively",
+        action="store_true",
+        required=False,
+        default=False,
+    )
+    argparser.add_argument(
+        "-p",
+        "--add-re-property",
+        help="Add a property in the resulting CSV obtained from the first capture "
+        "group of the given REGEX in the following format:\n property-name:regex.\n"
+        "Alternatively, use named REGEX groups.",
+        nargs="+",
+        type=str,
+        required=True
+    )
+    argparser.add_argument(
+        "-n",
+        "--output-basename",
+        help='Adds a column called "basename" to the resulting CSV where it is just '
+        "the base name of the path instead of the entire path. This is not guaranteed "
+        "to be unique.",
+        default=False,
+        required=False,
+        action="store_true",
+    )
+    argparser.add_argument(
+        "-V",
+        "--verbosity",
+        help="Set the verbosity of the logging",
+        type=str,
+        required=False,
+        default="INFO",
+    )
+
+    args = argparser.parse_args()
+    logging.basicConfig(level=args.verbosity.upper())
+    global logger
+    logger = logging.getLogger(__name__)
+    run(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/csvbyname/exceptions.py
+++ b/csvbyname/exceptions.py
@@ -0,0 +1,2 @@
+class InvalidPropertiesException(Exception):
+    pass
--- a/csvbyname/generate.py
+++ b/csvbyname/generate.py
@@ -0,0 +1,95 @@
+import csv
+import os
+import re
+from typing import Iterable
+from csvbyname import exceptions
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def matcher(full_path: str, use_full_path: bool, regex_groups: list[str]):
+    captured_properties = {}
+    for regex_and_group in regex_groups:
+        match_assume_named = re.match(
+            regex_and_group, full_path if use_full_path else os.path.basename(full_path)
+        )
+        if match_assume_named and len(match_assume_named.groupdict()) > 0:
+            for group, val in match_assume_named.groupdict().items():
+                if group not in captured_properties:
+                    captured_properties[group] = val
+                else:
+                    raise exceptions.InvalidPropertiesException(
+                        f'Duplicate capture group names found: "{group}"'
+                    )
+        else:
+            unnamed_split = regex_and_group.split(":", 1)
+            if len(unnamed_split) < 2:
+                logger.debug(
+                    'File at "%s" could not be matched by regex "%s" '
+                    "and will be skipped",
+                    full_path,
+                    regex_and_group,
+                )
+                continue
+            group, regex = unnamed_split
+            unnamed_match = re.match(
+                regex, full_path if use_full_path else os.path.basename(full_path)
+            )
+            if unnamed_match:
+                if group not in captured_properties:
+                    captured_properties[group] = unnamed_match.group(1)
+                else:
+                    raise exceptions.InvalidPropertiesException(
+                        f'Duplicate capture group names found: "{group}"'
+                    )
+    return captured_properties
+
+
+def collect_files(
+    dir_path: str,
+    include_folders: bool,
+    entire_path: bool,
+    recursive: bool,
+    regex_groups: list[str],
+):
+    collected = {}
+    pkeys = set()
+    for item in os.listdir(dir_path):
+        full_path = os.path.join(dir_path, item)
+        if os.path.isdir(full_path):
+            if include_folders:
+                collected[full_path] = matcher(full_path, entire_path, regex_groups)
+                pkeys.update(collected[full_path])
+            if recursive:
+                collected = collected | collect_files(
+                    full_path, include_folders, entire_path, recursive, regex_groups
+                )
+        elif os.path.isfile(full_path):
+            collected[full_path] = matcher(full_path, entire_path, regex_groups)
+            pkeys.update(collected[full_path])
+    return collected, pkeys
+
+
+def write_collected_to_csv(
+    output_path: str,
+    collected: dict[str, dict[str, str]],
+    property_keys: Iterable[str],
+    output_basename: bool,
+):
+    with open(output_path, "w", newline="", encoding="utf-8") as output_fd:
+        s_property_keys = sorted(property_keys)
+        header = ["path"]
+        if output_basename:
+            header.append("basename")
+        header.extend(s_property_keys)
+        writer = csv.writer(output_fd)
+        writer.writerow(header)
+        for full_path, properties in collected.items():
+            row = [full_path]
+            if output_basename:
+                row.append(os.path.basename(full_path))
+            row.extend(
+                (properties[k] if k in properties else "N/A" for k in s_property_keys)
+            )
+            writer.writerow(row)
--- a/environment.yml
+++ b/environment.yml
@@ -0,0 +1,9 @@
+name: csvbyname
+channels:
+  - conda-forge
+dependencies:
+  - build=0.7.0
+  - pytest=7.2.2
+  - twine=4.0.2
+  - python=3.11
+prefix: ./env
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = ["setuptools", "wheel"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+build
+pytest
+twine
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,11 @@
+[metadata]
+name = csvbyname
+version = 0.0.6
+author = Harrison
+
+[options]
+packages = csvbyname
+
+[options.entry_points]
+console_scripts =
+    csvbyname = csvbyname.cli:main
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,3 @@
+from setuptools import setup
+
+setup()
--- a/tests/resources/foo.txt
+++ b/tests/resources/foo.txt
@@ -0,0 +1 @@
+Text
--- a/tests/resources/group1-a-11.txt
+++ b/tests/resources/group1-a-11.txt
@@ -0,0 +1 @@
+Text
--- a/tests/resources/group1-a-12.txt
+++ b/tests/resources/group1-a-12.txt
@@ -0,0 +1,2 @@
+
+Text
--- a/tests/resources/group1-a-13.txt
+++ b/tests/resources/group1-a-13.txt
@@ -0,0 +1 @@
+Text
--- a/tests/resources/group1-b-10.txt
+++ b/tests/resources/group1-b-10.txt
@@ -0,0 +1 @@
+Text
--- a/tests/resources/group1-b-11.txt
+++ b/tests/resources/group1-b-11.txt
@@ -0,0 +1 @@
+Text
--- a/tests/resources/group1-b-9.txt
+++ b/tests/resources/group1-b-9.txt
@@ -0,0 +1 @@
+Text
Author	SHA1	Message	Date
Harrison Deng	21d060be6a	Updated CI config to match server CI All checks were successful csvbyname/pipeline/head This commit looks good Details	2024-11-14 20:48:06 +00:00
Harrison	be4d665301	Updated pipeline to take advantage of latest build container image All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-05-03 08:32:04 -05:00
Harrison	9e59bc097c	Fixed typo in argument help All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-23 15:28:10 -05:00
Harrison	2edd8a2093	Bumped package version All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-23 14:56:48 -05:00
Harrison	7a400457fe	Fixed double line breaks in output on Windows All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-23 14:56:29 -05:00
Harrison	59cfe486aa	Bumped package version All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-23 14:44:03 -05:00
Harrison	266a611fea	Fixed inconsistent CSV writing function All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-23 14:42:44 -05:00
Harrison	cb36b8adb3	Version bump All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 15:56:08 -05:00
Harrison	ded60aa742	Added step to test if command is runnable All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 15:53:07 -05:00
Harrison	adf734f3c1	Added feature to add basename column to output All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 15:52:20 -05:00
Harrison	c579c172ef	Bumped version number All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 13:55:57 -05:00
Harrison	e5bab5b12d	Fixed packaging Some checks failed ydeng/csvbyname/pipeline/head There was a failure building this commit Details	2023-04-21 13:51:12 -05:00
Harrison	73ae49cb89	Bumped packacge version number All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 11:21:03 -05:00
Harrison	958e2b12e3	Added 'archive' stage to Jenkins pipeline All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 11:20:03 -05:00
Harrison	425ef96e9b	Updated '.gitignore' to ignore 'output.csv'	2023-04-21 11:19:46 -05:00
Harrison	95b60c87a5	Bumped python version in 'environment.yml'	2023-04-21 11:19:27 -05:00
Harrison	ea374a971e	Updated pipeline to use branch 'main' as publishing branch All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 11:11:54 -05:00
Harrison	e64e1a038f	Completed basic CLI program All checks were successful ydeng/csvbyname/pipeline/head This commit looks good Details	2023-04-21 11:08:46 -05:00
Harrison	2f170e1088	Began implementing the file collection function	2023-04-20 23:40:39 -05:00
Harrison	f09e0d27fd	Added some CLI arguments and program structure	2023-04-20 16:28:54 -05:00