from pathlib import Path from cv2 import imread, imwrite import numpy as np from .movie import Movie SUMMARY = {'seqmean': {'monochromatic': ['luminance'], 'saturation': ['saturation', 'colourfulness'], 'hue': ['hsV', 'labHcl']}, 'distribution': {'monochromatic': ['luminance'], 'saturation': ['saturation', 'colourfulness'], 'hue': ['hsV', 'labHcl']}} class Corpus(object): """Main class for working with a corpus of movies and movie data Attributes: basedir (Path) -- The root directory in which all moviefolders together with their data reside. """ def __init__(self, path='./', summary=False): self.basedir = Path(path) self.summary = summary if summary else SUMMARY self.status = self._report() # Aktualisieren nach jeder Digitalisier. self.movies = self._instantiate_movies() def _report(self): """Indexes the available resources for each movie in the corpus. Returns: dict -- a dictionary in which each key corresponds with one movie title holding another 4 keys with information about the availability of the 'video' file, extracted 'frames', 'data' and 'visual'izations. """ # get project directory status = {} korpus_dir = Path(self.basedir).resolve() movie_dirs = [d for d in korpus_dir.iterdir() if d.is_dir()] # filter dot paths and tmp folders movie_dirs = [d for d in movie_dirs if not('.' in str(d.name)[0]) and not('_' in str(d.name)[0])] for m in movie_dirs: movie = m.name status[movie] = {} # get info about movie file video = m / ('movie/' + movie + '.mkv') status[movie]['video'] = video if video.is_file() else None # get info about frame images frames = len(list((m / 'frames/240p30/').glob('*'))) status[movie]['frames'] = frames if frames > 0 else None # get info about data pickles status[movie].setdefault('data', {}) data_files = (m / ('data/')).glob('*.pkl') for d in data_files: d = d.name.split('_') status[movie]['data'].setdefault( d[-4], {}).setdefault( d[-3], {}).update( {d[-2]: True}) # get info about diagram *.png files status[movie].setdefault('visuals', {}) vis_files = (m.glob('*.png')) for v in vis_files: v = v.name.split('_') status[movie]['visuals'].setdefault( v[-4], {}).setdefault( v[-3], {}).update( {v[-2]: True}) return status def _instantiate_movies(self): """Creates a dictionary with one Movie instance for each movie Returns: dict() -- A dictionary in which the movie slugs are the keys and the values are corresponding instances of the Movie class. """ movies = {} for m in self.status.keys(): prefix = m + '_' folder = str(self.basedir / (m + '/frames/240p30/')) movies[m] = Movie(prefix, folder, fps=4) return movies def extract(self): """Exctracts missing frame images and contrast data for each movie The method looks for missing frame images and contrast data in the object's status dictionary and extracts them from the movie file and the frame images. It uses the summary dictionary in order to decide which contrast values should be extracted. """ tasks = self._extraction_tasks() def _extraction_tasks(self): without_frames = {k: v for (k, v) in self.status.items() if v['frames'] == 0} tasks = {} for m in self.status.keys(): print(m) for view in SUMMARY.keys(): print(view) print(self.status[m].keys()) if view not in self.status[m]['data'].keys(): tasks[view] = SUMMARY[view] else: for contrast in SUMMARY[view].keys(): if contrast not in self.status[m]['data'][view].keys(): tasks.setdefault(view, {})[contrast] = (SUMMARY[view][contrast]) else: for method in SUMMARY[view][contrast]: if method not in self.status[m]['data'][view][contrast].keys(): tasks.setdefault(view, {}).setdefault(contrast, []).append(method) print(tasks) def _extract_frames(self): pass def _extract_contrast(self): pass def tableau(self, shape='distribution', ctrst='monochromatic', meth='luminance', write=True): """Creates a tableau of available diagrams for a given contrast Diagrams for the tableau will not be created in case some of the movies in the corpus lack thre required diagram. Consequently, the tableau will only show diagrams from movies in the corpus that exist already. Keyword Arguments: shape {str} -- The shape of the data in the diagram. Can be one of seqmean or distribution (default: {'distribution'}) ctrst {str} -- The name of the contrast. (default: {'monochromatic'}) meth {str} -- The method used in order to calculate the contrast (default: {'luminance'}) write {bool} -- Decide, if the tableau image should be written to disk in the corpus folder or not. (default: {True}) Returns: numpy.ndarray -- A numpy ndarry with the data type uint8 showing the tableau as an image. """ components = self._filter_diagrams(shape=shape, ctrst=ctrst, meth=meth) layout = self.layout_tableau(len(components)) tableau = self._fit_components(components, layout) if write: imwrite('tableau.png', tableau) return tableau def _filter_diagrams(self, shape='distribution', ctrst='monochromatic', meth='luminance'): """Filters which diagrams in a corpus belong to a specific contrast The filter looks for diagrams represented in *.png files only Keyword Arguments: shape {str} -- The shape of the data in the diagram. Can be one of seqmean or distribution (default: {'distribution'}) ctrst {str} -- The name of the contrast. (default: {'monochromatic'}) meth {str} -- The method used in order to calculate the contrast (default: {'luminance'}) Returns: [pathlib.Path] -- A list of file paths to the diagrams that match the selected contrast visualization. Todo: * FEATURE Parametize the file-format instead of looking at png files only. """ diagrams = [] for k, v in self.status.items(): if shape in v['visuals'].keys(): if ctrst in v['visuals'][shape].keys(): if meth in v['visuals'][shape][ctrst].keys(): diagrams.append(Path((self.basedir / k) / (k + '_' + shape + '_' + ctrst + '_' + meth + '_4fps.png'))) return diagrams @staticmethod def layout_tableau(n, ratio=12): """Calculates the shape of a tableau for a given number of diagrams Arguments: n {int} -- The number of diagrams that should fit into the tableau Keyword Arguments: ratio {int} -- The number of rows that should be created before a new column is created (default: {12}) Returns: (int, int, int) -- A tuple with three values describing the number of rows and the number of columns so that the diagrams fit into it considering the given row/column ratio (n) as well as the of missing placeholder images in order to fill-up the whole tableau. """ cols, r = divmod(n, ratio) rows = divmod(n, cols)[0] if r > 0: rows += 1 return (rows, cols, r) @staticmethod def _fit_components(components, layout): """Builds a tablea out of diagrams in a given layout First, the method creates a list of col diagrams for one row. Then this list is stacked to create one row image. This is repeated for each row so that the result is a list of row images. Again this is stacked to one image with is the tableau image. Arguments: components {[pathlib.Path]} -- A list of file paths to the diagram image files. layout {(int, int, int)} -- A tuple describing the number of diagrams for each row, column as well as the differance of available diagrams and places in the tableau. Returns: numpy.ndarray -- A numpy ndarry with the data type uint8 showing the tableau as an image. """ tableau = [] for row in range(layout[0]): column = [] for col in range(layout[1]): n = row * layout[1] + col try: img = imread(str(components[n])) # Create dummy images to fill-up the remaining space in the # tableau except IndexError: img = np.full((1200, 16000, 3), (255, 255, 255)) column.append(img) column = np.hstack(column) tableau.append(column) tableau = np.vstack(tableau) return tableau