Completed basic CLI program

2023-04-21 11:03:28 -05:00
parent 2f170e1088
commit e64e1a038f
11 changed files with 181 additions and 71 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,26 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Use Test Resources",
            "type": "python",
            "request": "launch",
            "program": "${workspaceFolder}/csvbyname/csvbyname.py",
            "console": "integratedTerminal",
            "args": [
                "${workspaceFolder}/tests/resources",
                "${workspaceFolder}/output.csv",
                "-r",
                "-p",
                "group_num:group(\\d)-\\w-\\d+\\.txt",
                "group(\\d)-(?P<sect>\\w)-(?P<patid>\\d+)\\.txt",
                "-V",
                "DEBUG"
            ],
            "justMyCode": true
        }
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,6 +1,3 @@
 {
-    "python.formatting.provider": "none",
+    "python.formatting.provider": "black"
    "[python]": {
        "editor.defaultFormatter": "ms-python.black-formatter"
    }
 }
--- a/36
+++ b/36
@@ -0,0 +1,36 @@
 pipeline {
    agent any
    stages {
        stage("clean") {
            steps {
                sh 'rm -rf ./dist/*'
            }
        }
        stage("install") {
            steps {
                sh 'mamba env update --file environment.yml'
                sh 'echo "mamba activate csvbyname" >> ~/.bashrc'
            }
        }
        stage("build") {
            steps {
                sh "python -m build"
            }
        }
        stage("test") {
            steps {
                sh "pip install dist/*.whl"
            }
        }
        stage("publish") {
            when {
                branch '**/master'
            }
            steps {
                withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
                    sh "python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*"
                }
            }
        }
    }
 }
--- a/csvbyname/init.py
+++ b/csvbyname/init.py
--- a/csvbyname/csvbyname.py
+++ b/csvbyname/csvbyname.py
@@ -1,12 +1,48 @@
 import argparse
 import csv
 import os
 import re
 from typing import Iterable
 import exceptions
 import logging
-def matcher(path: str, regex_groups: list[str]):
+def matcher(full_path: str, use_full_path: bool, regex_groups: list[str]):
-    matches = []
+    captured_properties = {}
-    for regex in regex_groups:
+    for regex_and_group in regex_groups:
-        matches[path]
+        match_assume_named = re.match(
            regex_and_group, full_path if use_full_path else os.path.basename(full_path)
        )
        if match_assume_named and len(match_assume_named.groupdict()) > 0:
            for group, val in match_assume_named.groupdict().items():
                if group not in captured_properties:
                    captured_properties[group] = val
                else:
                    raise exceptions.InvalidPropertiesException(
                        f'Duplicate capture group names found: "{group}"'
                    )
        else:
            unnamed_split = regex_and_group.split(":", 1)
            if len(unnamed_split) < 2:
                logger.debug(
                    'File at "%s" could not be matched by regex "%s" '
                    "and will be skipped",
                    full_path,
                    regex_and_group,
                )
                continue
            group, regex = unnamed_split
            unnamed_match = re.match(
                regex, full_path if use_full_path else os.path.basename(full_path)
            )
            if unnamed_match:
                if group not in captured_properties:
                    captured_properties[group] = unnamed_match.group(1)
                else:
                    raise exceptions.InvalidPropertiesException(
                        f'Duplicate capture group names found: "{group}"'
                    )
    return captured_properties
 def collect_files(
@@ -17,44 +53,53 @@ def collect_files(
    regex_groups: list[str],
 ):
    collected = {}
-
+    pkeys = set()
    def matcher(full_path, use_full_path):
        return [
            re.match(
                regex, full_path if use_full_path else os.path.basename(full_path)
            ).groups(1)
            for regex in regex_groups
        ]
    for item in os.listdir(dir_path):
        full_path = os.path.join(dir_path, item)
        if os.path.isdir(full_path):
            if include_folders:
-                if full_path not in collected:
+                collected[full_path] = matcher(full_path, entire_path, regex_groups)
-                    collected[full_path] = set()
+                pkeys.update(collected[full_path])
-                collected = collected[full_path] | matcher(full_path, entire_path)
+            if recursive:
-            collected = collected | collect_files(
+                collected = collected | collect_files(
-                full_path, include_folders, entire_path, recursive, regex_groups
+                    full_path, include_folders, entire_path, recursive, regex_groups
-            )
+                )
        elif os.path.isfile(full_path):
-            if full_path not in collected:
+            collected[full_path] = matcher(full_path, entire_path, regex_groups)
-                collected[full_path] = set()
+            pkeys.update(collected[full_path])
-            collected = collected[full_path] | matcher(full_path, entire_path)
+    return collected, pkeys
-def write_collected_to_csv(output_path: str, collected: dict[str, dict[str, str]]):
+def write_collected_to_csv(
-    # TODO Finish writing collected files/paths to CSV.
+    output_path: str, collected: dict[str, dict[str, str]], property_keys: Iterable[str]
-    pass
+):
    with open(output_path, "w") as output_fd:
        s_property_keys = sorted(property_keys)
        header = ["path", *s_property_keys]
        writer = csv.writer(output_fd)
        writer.writerow(header)
        for full_path, properties in collected.items():
            writer.writerow(
                [
                    full_path,
                    *(
                        properties[k] if k in properties else "N/A"
                        for k in s_property_keys
                    ),
                ]
            )
 def run(args):
-    collect_files(
+    logger.info('Collecting files from "%s"', args.directory)
    collected, pkeys = collect_files(
        args.directory,
        args.include_folders,
        args.entire_path,
        args.recursive,
-        args.add_regex_property,
+        args.add_re_property,
    )
    write_collected_to_csv(args.output, collected, pkeys)
 def main():
@@ -74,7 +119,7 @@ def main():
    )
    argparser.add_argument(
        "-l",
-        "--include-folder",
+        "--include-folders",
        help="Include folders in the cataloguing process",
        action="store_true",
        required=False,
@@ -98,11 +143,28 @@ def main():
    )
    argparser.add_argument(
        "-p",
-        "--add-regex-property",
+        "--add-re-property",
        help="Add a property in the resulting CSV obtained from the first capture "
-        "group of the given REGEX in the following format:\n property-name:regex",
+        "group of the given REGEX in the following format:\n property-name:regex.\n"
        "Alternatively, use named REGEX groups.",
        nargs="+",
        type=str,
    )
    argparser.add_argument(
        "-V",
        "--verbosity",
        help="Set the verbosity of the logging",
        type=str,
        required=False,
        default="INFO",
    )
    args = argparser.parse_args()
    logging.basicConfig(level=args.verbosity.upper())
    global logger
    logger = logging.getLogger(__name__)
    run(args)
 if __name__ == "__main__":
    main()
--- a/csvbyname/exceptions.py
+++ b/csvbyname/exceptions.py
@@ -0,0 +1,2 @@
 class InvalidPropertiesException(Exception):
    pass
--- a/environment.yaml
+++ b/environment.yaml
@@ -1,37 +0,0 @@
 name: /home/ydeng/csvbyname/env
 channels:
  - conda-forge
 dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=2_gnu
  - black=23.3.0=py311h38be061_0
  - bzip2=1.0.8=h7f98852_4
  - ca-certificates=2022.12.7=ha878542_0
  - click=8.1.3=unix_pyhd8ed1ab_2
  - ld_impl_linux-64=2.40=h41732ed_0
  - libexpat=2.5.0=hcb278e6_1
  - libffi=3.4.2=h7f98852_5
  - libgcc-ng=12.2.0=h65d4601_19
  - libgomp=12.2.0=h65d4601_19
  - libnsl=2.0.0=h7f98852_0
  - libsqlite=3.40.0=h753d276_0
  - libuuid=2.38.1=h0b41bf4_0
  - libzlib=1.2.13=h166bdaf_4
  - mypy_extensions=1.0.0=pyha770c72_0
  - ncurses=6.3=h27087fc_1
  - openssl=3.1.0=h0b41bf4_0
  - packaging=23.1=pyhd8ed1ab_0
  - pathspec=0.11.1=pyhd8ed1ab_0
  - pip=23.1=pyhd8ed1ab_0
  - platformdirs=3.2.0=pyhd8ed1ab_0
  - python=3.11.3=h2755cc3_0_cpython
  - python_abi=3.11=3_cp311
  - readline=8.2=h8228510_1
  - setuptools=67.6.1=pyhd8ed1ab_0
  - tk=8.6.12=h27826a3_0
  - typing-extensions=4.5.0=hd8ed1ab_0
  - typing_extensions=4.5.0=pyha770c72_0
  - tzdata=2023c=h71feb2d_0
  - wheel=0.40.0=pyhd8ed1ab_0
  - xz=5.2.6=h166bdaf_0
 prefix: /home/ydeng/csvbyname/env
--- a/environment.yml
+++ b/environment.yml
@@ -0,0 +1,8 @@
 name: csvbyname
 channels:
  - conda-forge
 dependencies:
  - build=0.7.0
  - pytest=7.2.2
  - twine=4.0.2
  - python=3.9
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
 [build-system]
 build-backend = "setuptools.build_meta"
 requires = ["setuptools", "wheel"]
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,10 @@
 [metadata]
 name = csvbyname
 version = 0.0.1
 [options]
 packages = csvbyname
 [options.entry_points]
 console_scripts =
    csvbyname = csvbyname.csvbyname:main
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,3 @@
 from setuptools import setup
 setup()
		`@@ -0,0 +1,2 @@`
							`class InvalidPropertiesException(Exception):`
							`pass`