96 lines
3.4 KiB
Python
96 lines
3.4 KiB
Python
import csv
|
|
import os
|
|
import re
|
|
from typing import Iterable
|
|
from csvbyname import exceptions
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def matcher(full_path: str, use_full_path: bool, regex_groups: list[str]):
|
|
captured_properties = {}
|
|
for regex_and_group in regex_groups:
|
|
match_assume_named = re.match(
|
|
regex_and_group, full_path if use_full_path else os.path.basename(full_path)
|
|
)
|
|
if match_assume_named and len(match_assume_named.groupdict()) > 0:
|
|
for group, val in match_assume_named.groupdict().items():
|
|
if group not in captured_properties:
|
|
captured_properties[group] = val
|
|
else:
|
|
raise exceptions.InvalidPropertiesException(
|
|
f'Duplicate capture group names found: "{group}"'
|
|
)
|
|
else:
|
|
unnamed_split = regex_and_group.split(":", 1)
|
|
if len(unnamed_split) < 2:
|
|
logger.debug(
|
|
'File at "%s" could not be matched by regex "%s" '
|
|
"and will be skipped",
|
|
full_path,
|
|
regex_and_group,
|
|
)
|
|
continue
|
|
group, regex = unnamed_split
|
|
unnamed_match = re.match(
|
|
regex, full_path if use_full_path else os.path.basename(full_path)
|
|
)
|
|
if unnamed_match:
|
|
if group not in captured_properties:
|
|
captured_properties[group] = unnamed_match.group(1)
|
|
else:
|
|
raise exceptions.InvalidPropertiesException(
|
|
f'Duplicate capture group names found: "{group}"'
|
|
)
|
|
return captured_properties
|
|
|
|
|
|
def collect_files(
|
|
dir_path: str,
|
|
include_folders: bool,
|
|
entire_path: bool,
|
|
recursive: bool,
|
|
regex_groups: list[str],
|
|
):
|
|
collected = {}
|
|
pkeys = set()
|
|
for item in os.listdir(dir_path):
|
|
full_path = os.path.join(dir_path, item)
|
|
if os.path.isdir(full_path):
|
|
if include_folders:
|
|
collected[full_path] = matcher(full_path, entire_path, regex_groups)
|
|
pkeys.update(collected[full_path])
|
|
if recursive:
|
|
collected = collected | collect_files(
|
|
full_path, include_folders, entire_path, recursive, regex_groups
|
|
)
|
|
elif os.path.isfile(full_path):
|
|
collected[full_path] = matcher(full_path, entire_path, regex_groups)
|
|
pkeys.update(collected[full_path])
|
|
return collected, pkeys
|
|
|
|
|
|
def write_collected_to_csv(
|
|
output_path: str,
|
|
collected: dict[str, dict[str, str]],
|
|
property_keys: Iterable[str],
|
|
output_basename: bool,
|
|
):
|
|
with open(output_path, "w", newline="", encoding="utf-8") as output_fd:
|
|
s_property_keys = sorted(property_keys)
|
|
header = ["path"]
|
|
if output_basename:
|
|
header.append("basename")
|
|
header.extend(s_property_keys)
|
|
writer = csv.writer(output_fd)
|
|
writer.writerow(header)
|
|
for full_path, properties in collected.items():
|
|
row = [full_path]
|
|
if output_basename:
|
|
row.append(os.path.basename(full_path))
|
|
row.extend(
|
|
(properties[k] if k in properties else "N/A" for k in s_property_keys)
|
|
)
|
|
writer.writerow(row)
|