Compare commits

..

5 Commits

Author SHA1 Message Date
eacb730961 Added a comment
All checks were successful
ydeng/renamebycsv/pipeline/head This commit looks good
2023-09-11 21:55:30 +00:00
844cf4b2de Changed steps to use native credential manager
All checks were successful
ydeng/renamebycsv/pipeline/head This commit looks good
2023-09-11 20:52:34 +00:00
434f969556 Improved logging and reduced exceptions.
All checks were successful
ydeng/renamebycsv/pipeline/head This commit looks good
Also bumped version.
2023-09-11 07:59:56 +00:00
d98801dd66 Updated pipeline to use latest build container image features
All checks were successful
ydeng/renamebycsv/pipeline/head This commit looks good
2023-05-03 08:37:35 -05:00
83639a10e2 Added info to the 'README.md' regarding escape character
All checks were successful
ydeng/renamebycsv/pipeline/head This commit looks good
2023-04-26 13:59:58 -05:00
6 changed files with 30 additions and 25 deletions

18
Jenkinsfile vendored
View File

@@ -1,19 +1,14 @@
pipeline {
agent any
stages {
stage("clean") {
steps {
sh 'rm -rf ./dist/*'
}
}
stage("install") {
steps {
sh 'mamba env update --file environment.yml'
sh 'echo "mamba activate renamebycsv" >> ~/.bashrc'
sh 'mamba env update --file environment.yml --prefix ./env || mamba env create --force --file environment.yml --prefix ./env'
}
}
stage("build") {
steps {
sh 'rm -rf ./dist/*'
sh "python -m build"
}
}
@@ -25,17 +20,18 @@ pipeline {
}
stage("archive") {
steps {
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl'
archiveArtifacts artifacts: 'dist/*.tar.gz, dist/*.whl', fingerprint: true, followSymlinks: false, onlyIfSuccessful: true
}
}
stage("publish package") {
environment {
CREDS = credentials('rs-git-package-registry-ydeng')
}
when {
branch '**/master'
}
steps {
withCredentials([usernamePassword(credentialsId: 'rs-git-package-registry-ydeng', passwordVariable: 'PASS', usernameVariable: 'USER')]) {
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${USER}/pypi -u ${USER} -p ${PASS} --non-interactive --disable-progress-bar --verbose dist/*'
}
sh returnStatus: true, script: 'python -m twine upload --repository-url https://git.reslate.systems/api/packages/${CREDS_USR}/pypi -u ${CREDS_USR} -p ${CREDS_PSW} --non-interactive --disable-progress-bar --verbose dist/*'
}
}
}

View File

@@ -24,12 +24,12 @@ This program makes heavy use of REGEX, also known as Regular Expression to give
- Where it differs is the ability to use one REGEX string to match many strings.
- i.e, the REGEX "`abc\d+`" will match with "`abc1`", "`abc2`", "`abc12`", but not "`ac12`" or "`abc`".
- Many characters can be used as normal and will match a string literally (character for character), but some will be treated as special characters (such as the previously used `\`, which indicates that the letter afterwards should be treated specially, such as a token)
- Common tokens to be aware of: `.` for any character, `\d` for single digits, `\w` for word characters, `\s` for space characters (tabs, spaces, linebreaks, etc.). Tokens can be repeated by using `+`, indicating "one or more", `*` indicating "none or more".
- Common tokens to be aware of: `.` for any character, `\d` for single digits, `\w` for word characters, `\s` for space characters (tabs, spaces, linebreaks, etc.). Tokens can be repeated by using `+`, indicating "one or more", `*` indicating "none or more". If you want to match something that is read as a token by default, such as `.`, or `+`, using the `\` in front of it will cause it to match `.` literally, i.e, `1\.2` matches `1.3`, but not `123`, `1a3`, etc.
- A capture group is a way of "selecting" a part of a text and is formed by using `(` and `)` around the REGEX that should be selected.
Now for a few examples:
Let's say we have files `run325-a-1.vcf`, `run326-b-2.vcf`, and `run327-b-3.vcf`. If we know that all that matters is the `1` after the `run[numbers]-[character]-`, we can write `run\d+-\w-(\d).vcf` which will match with all 3 of the above examples, and select the last digit. The program can then use a given CSV to look up the selected digits and replace the name with what is given by the CSV.
Let's say we have files `run325-a-1.vcf`, `run326-b-2.vcf`, and `run327-b-3.vcf`. If we know that all that matters is the `1` after the `run[numbers]-[character]-`, we can write `run\d+-\w-(\d)\.vcf` which will match with all 3 of the above examples, and select the last digit. The program can then use a given CSV to look up the selected digits and replace the name with what is given by the CSV.
For learning and testing your own REGEX, checkout [regex101.com](https://regex101.com/), which allows you to write the strings that you're trying to match, and the REGEX. It will show you live which parts of the strings match to what, if any parts match.

View File

@@ -8,4 +8,5 @@ dependencies:
- python=3.11
- setuptools=67.6
- twine=4.0
- cryptography=38.0.4
- cryptography=38.0.4
prefix: ./env

View File

@@ -8,15 +8,16 @@ from renamebycsv.renamer import find_all_candidates, rename_by_csv
def run(args):
candidates = find_all_candidates(args.input_dir, args.pattern, args.recursive)
rename_by_csv(
args.csv,
candidates,
args.current,
args.become,
args.dry,
args.extension,
args.keep_extension,
)
if len(candidates):
rename_by_csv(
args.csv,
candidates,
args.current,
args.become,
args.dry,
args.extension,
args.keep_extension,
)
def main():

View File

@@ -32,6 +32,8 @@ def find_all_candidates(input_dir: str, regex: str, recursive: bool):
regex,
input_dir,
)
else:
logging.info("Collected %d files to rename.", len(results))
return results
@@ -51,6 +53,10 @@ def rename_by_csv(
become_col_ind = None
for row in reader:
if current_col_ind is None and become_col_ind is None:
if current not in row:
logging.error("\"%s\" not in header %s.", current, list(row))
if become not in row:
logging.error("\"%s\" not in header %s.", become, list(row))
current_col_ind = row.index(current)
become_col_ind = row.index(become)
continue
@@ -58,6 +64,7 @@ def rename_by_csv(
row[current_col_ind] in replacement_dict
and replacement_dict[row[current_col_ind]] != row[become_col_ind]
):
# Check if there's a duplicate key for different values.
raise Exception("Duplicate current key.")
replacement_dict[row[current_col_ind]] = row[become_col_ind]
for subitem_path, subitem, match in candidates:
@@ -80,7 +87,7 @@ def rename_by_csv(
logging.info(f'Will rename "{original}" to "{os.path.basename(objective)}"')
if os.path.exists(objective):
logging.error(
f'Path at "{objective}" exists, not continuing. '
f'Path at "{objective}" already exists, not continuing. '
"Use -f to overwrite instead of stopping."
)
exit(1)

View File

@@ -1,6 +1,6 @@
[metadata]
name = renamebycsv
version = 0.0.7
version = 0.0.8
[options]
packages = renamebycsv