commit 867686fe9856b4df746efe02095afdc7fbfb0a81 Author: Harrison Date: Wed Apr 5 12:24:58 2023 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a31886c --- /dev/null +++ b/.gitignore @@ -0,0 +1,214 @@ +# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python + +# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) + diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..de288e1 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.formatting.provider": "black" +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..232d46d --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# renamebycsv + +A simple program that renames files by using a spreadsheet in CSV format as a dictionary for the files to be renamed. + +## Features + + - Rename files recursively within a directory to a desired string + - Desired string is set by a CSV where one column is the original string, and another column is the string to replace the original string with + - Uses a REGEX capture group to select file and the portion of the filename to rename \ No newline at end of file diff --git a/renamebycsv.py b/renamebycsv.py new file mode 100755 index 0000000..42c2cb9 --- /dev/null +++ b/renamebycsv.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import os +import re +from typing import Iterable +import logging + + +def find_all_candidates(input_dir: str, regex: str, recursive: bool): + results = [] + for subitem in os.listdir(input_dir): + subitem_path = os.path.join(input_dir, subitem) + match = re.match(regex, subitem) + if os.path.isdir(subitem_path) and recursive: + logging.debug(f'Checking directory "{subitem}"...') + results.extend(find_all_candidates(subitem_path, regex, recursive)) + else: + if not match: + logging.debug(f'Ignoring "{subitem}"...') + continue + results.append((subitem_path, subitem, match)) + logging.debug(f'Collecting "{subitem}"...') + return results + + +def rename( + csv_path: str, + candidates: Iterable[tuple[str, str, re.Match]], + current: str, + become: str, + dry: bool, +): + replacement_dict = {} + with open(csv_path, "r") as csv_fd: + reader = csv.reader(csv_fd) + current_col_ind = None + become_col_ind = None + for row in reader: + if current_col_ind is None and become_col_ind is None: + current_col_ind = row.index(current) + become_col_ind = row.index(become) + continue + if ( + row[current_col_ind] in replacement_dict + and replacement_dict[row[current_col_ind]] != row[become_col_ind] + ): + raise Exception("Duplicate current key.") + replacement_dict[row[current_col_ind]] = row[become_col_ind] + for subitem_path, subitem, match in candidates: + original = subitem_path + objective = os.path.join( + os.path.dirname(subitem_path), + re.sub(match.re, replacement_dict[match.group(1)], subitem), + ) + logging.info(f'Will rename "{original}" to "{os.path.basename(objective)}"') + if os.path.exists(objective): + logging.error( + f'Path at "{objective}" exists, not continuing. ' + "Use -f to overwrite instead of stopping." + ) + exit(1) + if not dry: + os.rename(original, objective) + if dry: + logging.info("No file names were modified.") + + +def main(): + program_name = "renamebycsv" + argparser = argparse.ArgumentParser( + program_name, "Rename all files by using a CSV as a dictionary." + ) + argparser.add_argument( + "input_dir", + help="The directory containing the items that is to be renamed.", + metavar="I", + ) + argparser.add_argument( + "regex", + help="The regex to apply to each file name. The first capture group is used to " + "perform the replacement.", + metavar="R", + ) + argparser.add_argument( + "csv", + help="The CSV to use as the dictionary for the substitutions in file name.", + metavar="C", + ) + argparser.add_argument( + "current", + help="The column header to look for the text matched by the regex.", + metavar="F", + ) + argparser.add_argument( + "become", help="The column header to replace the regex match.", metavar="T" + ) + argparser.add_argument( + "-r", + "--recursive", + help="Perform renaming action recursively", + action="store_true", + ) + argparser.add_argument( + "-f", + "--force", + help="Overwrite files if file already exists", + action="store_true", + ) + argparser.add_argument( + "-d", "--dry", help="Do not perform any renames", action="store_true" + ) + argparser.add_argument( + "-V", + "--verbosity", + help="Set the logging verbosity", + required=False, + type=str, + default="INFO", + ) + + args = argparser.parse_args() + logging.basicConfig( + format="[%(filename)s %(asctime)s - %(levelname)s] %(message)s", + level=args.verbosity.upper(), + ) + candidates = find_all_candidates(args.input_dir, args.regex, args.recursive) + rename(args.csv, candidates, args.current, args.become, args.dry) + + +if __name__ == "__main__": + main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..e0ea542 --- /dev/null +++ b/tox.ini @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 88 +extend-ignore = E203 \ No newline at end of file