Source code for ielearn.extract.extract
"""
Extract a data set from NEF and XMP files.
"""
from os import path
import logging
import numpy as np
import pandas as pd
from tqdm import tqdm
from functools import partial
from multiprocessing import Pool
from itertools import filterfalse
from argparse import ArgumentParser
from ielearn.util import (
imap_unordered_bar,
get_lines,
fn_has_ext,
xmp_nef_pairs,
base_fn_add,
raise_after_logging,
remove_extension,
mask_rejected_photos
)
from ielearn.extract import (
embedding,
xmp
)
logger = logging.getLogger("IMG-EDIT-LEARN")
logging.basicConfig(level=logging.INFO)
[docs]def is_exif_descriptor(d):
return d.startswith("exif:")
# merge the two DataFrames by their file name (with extension removed)
# merge_col = 'fn_trunc'
# xmp_df[merge_col] = xmp_df['fn'].map(remove_extension)
# embedding_df[merge_col] = embedding_df['fn'].map(remove_extension)
# del embedding_df['fn']
# main_df = xmp_df.merge(embedding_df, how='inner', on=merge_col)
# del main_df[merge_col]
# return main_df
[docs]def parse_args():
"""parse_args"""
parser = ArgumentParser()
parser.add_argument(dest="input_fn",
help="Path to a file which contains a list of NEF and XMP files to parse (one per line).")
parser.add_argument(dest="base_fn",
help="Base path to where the parsed data sets should be written to.")
return parser.parse_args()
[docs]def cli():
if __name__ == "__main__":
args = parse_args()
xmp_fns, nef_fns = xmp_nef_pairs(args.input_fn)
features, labels = extract(xmp_fns, nef_fns)
features.to_csv(base_fn_add(args.base_fn, ".features"))
labels.to_csv(base_fn_add(args.base_fn, ".labels"))
cli()