Compare commits
13 Commits
a3bb168c14
...
develop
Author | SHA1 | Date | |
---|---|---|---|
3e85185b1a | |||
eacb730961 | |||
844cf4b2de | |||
434f969556 | |||
d98801dd66 | |||
83639a10e2 | |||
34e5b107ff | |||
70af81ed84 | |||
b745915e49 | |||
c6d79c9eb1 | |||
682503a24a | |||
4bf334c9d5 | |||
90a1db4f0c |
22
.devcontainer/devcontainer.json
Normal file
22
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||||
|
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
||||||
|
{
|
||||||
|
"name": "Python 3",
|
||||||
|
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
||||||
|
"image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye"
|
||||||
|
|
||||||
|
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||||
|
// "features": {},
|
||||||
|
|
||||||
|
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||||
|
// "forwardPorts": [],
|
||||||
|
|
||||||
|
// Use 'postCreateCommand' to run commands after the container is created.
|
||||||
|
// "postCreateCommand": "pip3 install --user -r requirements.txt",
|
||||||
|
|
||||||
|
// Configure tool-specific properties.
|
||||||
|
// "customizations": {},
|
||||||
|
|
||||||
|
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||||
|
// "remoteUser": "root"
|
||||||
|
}
|
26
Jenkinsfile
vendored
26
Jenkinsfile
vendored
@@ -1,19 +1,20 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent {
|
||||||
|
kubernetes {
|
||||||
|
cloud 'rsys-devel'
|
||||||
|
defaultContainer 'pip'
|
||||||
|
inheritFrom 'pip'
|
||||||
|
}
|
||||||
|
}
|
||||||
stages {
|
stages {
|
||||||
stage("clean") {
|
|
||||||
steps {
|
|
||||||
sh 'rm -rf ./dist/*'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage("install") {
|
stage("install") {
|
||||||
steps {
|
steps {
|
||||||
sh 'mamba env update --file environment.yml'
|
sh 'pip install -r requirements.txt'
|
||||||
sh 'echo "mamba activate renamebycsv" >> ~/.bashrc'
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("build") {
|
stage("build") {
|
||||||
steps {
|
steps {
|
||||||
|
sh 'rm -rf ./dist/*'
|
||||||
sh "python -m build"
|
sh "python -m build"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -25,17 +26,18 @@ pipeline {
|
|||||||
}
|
}
|
||||||
stage("archive") {
|
stage("archive") {
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl'
|
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage("publish package") {
|
stage("publish package") {
|
||||||
|
environment {
|
||||||
|
CREDS = credentials('rs-git-package-registry-ydeng')
|
||||||
|
}
|
||||||
when {
|
when {
|
||||||
branch '**/master'
|
branch '**/master'
|
||||||
}
|
}
|
||||||
steps {
|
steps {
|
||||||
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
|
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
|
||||||
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*'
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
12
README.md
12
README.md
@@ -24,11 +24,19 @@ This program makes heavy use of REGEX, also known as Regular Expression to give
|
|||||||
- Where it differs is the ability to use one REGEX string to match many strings.
|
- Where it differs is the ability to use one REGEX string to match many strings.
|
||||||
- i.e, the REGEX "`abc\d+`" will match with "`abc1`", "`abc2`", "`abc12`", but not "`ac12`" or "`abc`".
|
- i.e, the REGEX "`abc\d+`" will match with "`abc1`", "`abc2`", "`abc12`", but not "`ac12`" or "`abc`".
|
||||||
- Many characters can be used as normal and will match a string literally (character for character), but some will be treated as special characters (such as the previously used `\`, which indicates that the letter afterwards should be treated specially, such as a token)
|
- Many characters can be used as normal and will match a string literally (character for character), but some will be treated as special characters (such as the previously used `\`, which indicates that the letter afterwards should be treated specially, such as a token)
|
||||||
- Common tokens to be aware of: `.` for any character, `\d` for single digits, `\w` for word characters, `\s` for space characters (tabs, spaces, linebreaks, etc.). Tokens can be repeated by using `+`, indicating "one or more", `*` indicating "none or more".
|
- Common tokens to be aware of: `.` for any character, `\d` for single digits, `\w` for word characters, `\s` for space characters (tabs, spaces, linebreaks, etc.). Tokens can be repeated by using `+`, indicating "one or more", `*` indicating "none or more". If you want to match something that is read as a token by default, such as `.`, or `+`, using the `\` in front of it will cause it to match `.` literally, i.e, `1\.2` matches `1.3`, but not `123`, `1a3`, etc.
|
||||||
- A capture group is a way of "selecting" a part of a text and is formed by using `(` and `)` around the REGEX that should be selected.
|
- A capture group is a way of "selecting" a part of a text and is formed by using `(` and `)` around the REGEX that should be selected.
|
||||||
|
|
||||||
Now for a few examples:
|
Now for a few examples:
|
||||||
|
|
||||||
Let's say we have files `run325-a-1.vcf`, `run326-b-2.vcf`, and `run327-b-3.vcf`. If we know that all that matters is the `1` after the `run[numbers]-[character]-`, we can write `run\d+-\w-(\d).vcf` which will match with all 3 of the above examples, and select the last digit. The program can then use a given CSV to look up the selected digits and replace the name with what is given by the CSV.
|
Let's say we have files `run325-a-1.vcf`, `run326-b-2.vcf`, and `run327-b-3.vcf`. If we know that all that matters is the `1` after the `run[numbers]-[character]-`, we can write `run\d+-\w-(\d)\.vcf` which will match with all 3 of the above examples, and select the last digit. The program can then use a given CSV to look up the selected digits and replace the name with what is given by the CSV.
|
||||||
|
|
||||||
For learning and testing your own REGEX, checkout [regex101.com](https://regex101.com/), which allows you to write the strings that you're trying to match, and the REGEX. It will show you live which parts of the strings match to what, if any parts match.
|
For learning and testing your own REGEX, checkout [regex101.com](https://regex101.com/), which allows you to write the strings that you're trying to match, and the REGEX. It will show you live which parts of the strings match to what, if any parts match.
|
||||||
|
|
||||||
|
## Not Working?
|
||||||
|
|
||||||
|
If the program is not working the way you would like it, try running the program in `-v DEBUG` mode which increases verbosity. Typically, files not being renamed can be attributed to one of two problems:
|
||||||
|
|
||||||
|
1. It's looking in the wrong directory. The solution would be to double check that the directory it's looking in (printed by the program each run) is correct. If not, try adding quotes around the path in the command line.
|
||||||
|
|
||||||
|
2. The provided REGEX pattern isn't matching to any of the files. In this case, test one or two of the files at [regex101.com](https://regex101.com/) with your pattern.
|
@@ -9,3 +9,4 @@ dependencies:
|
|||||||
- setuptools=67.6
|
- setuptools=67.6
|
||||||
- twine=4.0
|
- twine=4.0
|
||||||
- cryptography=38.0.4
|
- cryptography=38.0.4
|
||||||
|
prefix: ./env
|
@@ -1,88 +1,15 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import csv
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from typing import Iterable
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from renamebycsv.renamer import find_all_candidates, rename_by_csv
|
||||||
def find_all_candidates(input_dir: str, regex: str, recursive: bool):
|
|
||||||
results = []
|
|
||||||
for subitem in os.listdir(input_dir):
|
|
||||||
subitem_path = os.path.join(input_dir, subitem)
|
|
||||||
match = re.match(regex, subitem)
|
|
||||||
if os.path.isdir(subitem_path) and recursive:
|
|
||||||
logging.debug(f'Checking directory "{subitem}"...')
|
|
||||||
results.extend(find_all_candidates(subitem_path, regex, recursive))
|
|
||||||
else:
|
|
||||||
if not match:
|
|
||||||
logging.debug(f'Ignoring "{subitem}"...')
|
|
||||||
continue
|
|
||||||
results.append((subitem_path, subitem, match))
|
|
||||||
logging.debug(f'Collecting "{subitem}"...')
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def rename(
|
|
||||||
csv_path: str,
|
|
||||||
candidates: Iterable[tuple[str, str, re.Match]],
|
|
||||||
current: str,
|
|
||||||
become: str,
|
|
||||||
dry: bool,
|
|
||||||
extension: str,
|
|
||||||
keep_extension: bool,
|
|
||||||
):
|
|
||||||
replacement_dict = {}
|
|
||||||
with open(csv_path, "r") as csv_fd:
|
|
||||||
reader = csv.reader(csv_fd)
|
|
||||||
current_col_ind = None
|
|
||||||
become_col_ind = None
|
|
||||||
for row in reader:
|
|
||||||
if current_col_ind is None and become_col_ind is None:
|
|
||||||
current_col_ind = row.index(current)
|
|
||||||
become_col_ind = row.index(become)
|
|
||||||
continue
|
|
||||||
if (
|
|
||||||
row[current_col_ind] in replacement_dict
|
|
||||||
and replacement_dict[row[current_col_ind]] != row[become_col_ind]
|
|
||||||
):
|
|
||||||
raise Exception("Duplicate current key.")
|
|
||||||
replacement_dict[row[current_col_ind]] = row[become_col_ind]
|
|
||||||
for subitem_path, subitem, match in candidates:
|
|
||||||
if match.group(1) not in replacement_dict:
|
|
||||||
logging.warning(
|
|
||||||
'Group "%s" was not matched to any row in the provided CSV. '
|
|
||||||
"Skipping...",
|
|
||||||
match.group(1),
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
original = subitem_path
|
|
||||||
objective = os.path.join(
|
|
||||||
os.path.dirname(subitem_path),
|
|
||||||
re.sub(match.re, replacement_dict[match.group(1)], subitem.strip()),
|
|
||||||
)
|
|
||||||
if extension:
|
|
||||||
objective += ("." if not extension.startswith(".") else "") + extension
|
|
||||||
if keep_extension:
|
|
||||||
objective += os.path.splitext(subitem_path)[1]
|
|
||||||
logging.info(f'Will rename "{original}" to "{os.path.basename(objective)}"')
|
|
||||||
if os.path.exists(objective):
|
|
||||||
logging.error(
|
|
||||||
f'Path at "{objective}" exists, not continuing. '
|
|
||||||
"Use -f to overwrite instead of stopping."
|
|
||||||
)
|
|
||||||
exit(1)
|
|
||||||
if not dry:
|
|
||||||
os.rename(original, objective)
|
|
||||||
if dry:
|
|
||||||
logging.info("No file names were modified.")
|
|
||||||
|
|
||||||
|
|
||||||
def run(args):
|
def run(args):
|
||||||
candidates = find_all_candidates(args.input_dir, args.regex, args.recursive)
|
candidates = find_all_candidates(args.input_dir, args.pattern, args.recursive)
|
||||||
rename(
|
if len(candidates):
|
||||||
|
rename_by_csv(
|
||||||
args.csv,
|
args.csv,
|
||||||
candidates,
|
candidates,
|
||||||
args.current,
|
args.current,
|
||||||
@@ -104,7 +31,7 @@ def main():
|
|||||||
metavar="I",
|
metavar="I",
|
||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"regex",
|
"pattern",
|
||||||
help="The regex to apply to each file name. The first capture group is used to "
|
help="The regex to apply to each file name. The first capture group is used to "
|
||||||
"perform the replacement.",
|
"perform the replacement.",
|
||||||
metavar="R",
|
metavar="R",
|
||||||
|
97
renamebycsv/renamer.py
Normal file
97
renamebycsv/renamer.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
import csv
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
|
||||||
|
def find_all_candidates(input_dir: str, regex: str, recursive: bool):
|
||||||
|
logging.info(
|
||||||
|
'Searching "%s" for files that match "%s" %s',
|
||||||
|
input_dir,
|
||||||
|
regex,
|
||||||
|
"recursively" if recursive else "non-recursively",
|
||||||
|
)
|
||||||
|
results = []
|
||||||
|
for subitem in os.listdir(input_dir):
|
||||||
|
subitem_path = os.path.join(input_dir, subitem)
|
||||||
|
match = re.match(regex, subitem)
|
||||||
|
if os.path.isdir(subitem_path) and recursive:
|
||||||
|
logging.debug(f'Checking directory "{subitem}"...')
|
||||||
|
results.extend(find_all_candidates(subitem_path, regex, recursive))
|
||||||
|
else:
|
||||||
|
if not match:
|
||||||
|
logging.debug(f'Ignoring "{subitem}"...')
|
||||||
|
continue
|
||||||
|
results.append((subitem_path, subitem, match))
|
||||||
|
logging.debug(f'Collecting "{subitem}"...')
|
||||||
|
if len(results) < 1:
|
||||||
|
logging.info(
|
||||||
|
'No results found matching "%s" in "%s". Please double check your REGEX '
|
||||||
|
"pattern and directory being searched.",
|
||||||
|
regex,
|
||||||
|
input_dir,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.info("Collected %d files to rename.", len(results))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def rename_by_csv(
|
||||||
|
csv_path: str,
|
||||||
|
candidates: Iterable[tuple[str, str, re.Match]],
|
||||||
|
current: str,
|
||||||
|
become: str,
|
||||||
|
dry: bool,
|
||||||
|
extension: str,
|
||||||
|
keep_extension: bool,
|
||||||
|
):
|
||||||
|
replacement_dict = {}
|
||||||
|
with open(csv_path, "r") as csv_fd:
|
||||||
|
reader = csv.reader(csv_fd)
|
||||||
|
current_col_ind = None
|
||||||
|
become_col_ind = None
|
||||||
|
for row in reader:
|
||||||
|
if current_col_ind is None and become_col_ind is None:
|
||||||
|
if current not in row:
|
||||||
|
logging.error("\"%s\" not in header %s.", current, list(row))
|
||||||
|
if become not in row:
|
||||||
|
logging.error("\"%s\" not in header %s.", become, list(row))
|
||||||
|
current_col_ind = row.index(current)
|
||||||
|
become_col_ind = row.index(become)
|
||||||
|
continue
|
||||||
|
if (
|
||||||
|
row[current_col_ind] in replacement_dict
|
||||||
|
and replacement_dict[row[current_col_ind]] != row[become_col_ind]
|
||||||
|
):
|
||||||
|
# Check if there's a duplicate key for different values.
|
||||||
|
raise Exception("Duplicate current key.")
|
||||||
|
replacement_dict[row[current_col_ind]] = row[become_col_ind]
|
||||||
|
for subitem_path, subitem, match in candidates:
|
||||||
|
if match.group(1) not in replacement_dict:
|
||||||
|
logging.warning(
|
||||||
|
'Group "%s" was not matched to any row in the provided CSV. '
|
||||||
|
"Skipping...",
|
||||||
|
match.group(1),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
original = subitem_path
|
||||||
|
objective = os.path.join(
|
||||||
|
os.path.dirname(subitem_path),
|
||||||
|
re.sub(match.re, replacement_dict[match.group(1)], subitem.strip()),
|
||||||
|
)
|
||||||
|
if extension:
|
||||||
|
objective += ("." if not extension.startswith(".") else "") + extension
|
||||||
|
if keep_extension:
|
||||||
|
objective += os.path.splitext(subitem_path)[1]
|
||||||
|
logging.info(f'Will rename "{original}" to "{os.path.basename(objective)}"')
|
||||||
|
if os.path.exists(objective):
|
||||||
|
logging.error(
|
||||||
|
f'Path at "{objective}" already exists, not continuing. '
|
||||||
|
"Use -f to overwrite instead of stopping."
|
||||||
|
)
|
||||||
|
exit(1)
|
||||||
|
if not dry:
|
||||||
|
os.rename(original, objective)
|
||||||
|
if dry:
|
||||||
|
logging.info("No file names were modified.")
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
build
|
||||||
|
pytest
|
||||||
|
setuptools
|
||||||
|
twine
|
||||||
|
cryptography
|
Reference in New Issue
Block a user