from pathlib import Path from cv2 import imread, imwrite import numpy as np from .movie import Movie _summary = {'seqmean': {'monochromatic': ['luminance'], 'saturation': ['saturation', 'colourfulness'], 'hue': ['hsV', 'labHcl']}, 'distribution': {'monochromatic': ['luminance'], 'saturation': ['saturation', 'colourfulness'], 'hue': ['hsV', 'labHcl']}} class Corpus(object): """Main class for working with a corpus of movies and movie data Attributes: basedir (Path) -- The root directory in which all moviefolders together with their data reside. """ def __init__(self, path='./', summary=False): self.basedir = Path(path) self.summary = summary if summary else _summary self.status = self._report() # Aktualisieren nach jeder Digitalisier. self.movies = self._instantiate_movies() def _report(self): """Indexes the available resources for each movie in the corpus. Returns: dict -- a dictionary in which each key corresponds with one movie title holding another 4 keys with information about the availability of the 'video' file, extracted 'frames', 'data' and 'visual'izations. """ # get project directory status = {} korpus_dir = Path(self.basedir).resolve() movie_dirs = [d for d in korpus_dir.iterdir() if d.is_dir()] # filter dot paths and tmp folders movie_dirs = [d for d in movie_dirs if not('.' in str(d.name)[0]) and not('_' in str(d.name)[0])] for m in movie_dirs: movie = m.name status[movie] = {} # get info about movie file video = m / ('movie/' + movie + '.mkv') status[movie]['video'] = video if video.is_file() else None # get info about frame images frames = len(list((m / 'frames/240p30/').glob('*'))) status[movie]['frames'] = frames if frames > 0 else None # get info about data pickles status[movie].setdefault('data', {}) data_files = (m / ('data/')).glob('*.pkl') for d in data_files: d = d.name.split('_') status[movie]['data'].setdefault( d[-4], {}).setdefault( d[-3], {}).update( {d[-2]: True}) # get info about diagram *.png files status[movie].setdefault('visuals', {}) vis_files = (m.glob('*.png')) for v in vis_files: v = v.name.split('_') status[movie]['visuals'].setdefault( v[-4], {}).setdefault( v[-3], {}).update( {v[-2]: True}) return status def _instantiate_movies(self): """Creates a dictionary with one Movie instance for each movie Returns: dict() -- A dictionary in which the movie slugs are the keys and the values are corresponding instances of the Movie class. """ movies = {} for m in self.status.keys(): prefix = m + '_' folder = str(self.basedir / (m + '/frames/240p30/')) movies[m] = Movie(prefix, folder, fps=4) return movies def extract(self): """Exctracts missing frame images and contrast data for each movie The method looks for missing frame images and contrast data in the object's status dictionary and extracts them from the movie file and the frame images. It uses the summary dictionary in order to decide which contrast values should be extracted. """ tasks = self._extraction_tasks() def _extraction_tasks(self): without_frames = {k: v for (k, v) in self.status.items() if v['frames'] == 0} tasks = {} for m in self.status.keys(): print(m) for view in _summary.keys(): print(view) print(self.status[m].keys()) if view not in self.status[m]['data'].keys(): tasks[view] = _summary[view] else: for contrast in _summary[view].keys(): if contrast not in self.status[m]['data'][view].keys(): tasks.setdefault(view, {})[contrast] = (_summary[view][contrast]) else: for method in _summary[view][contrast]: if method not in self.status[m]['data'][view][contrast].keys(): tasks.setdefault(view, {}).setdefault(contrast, []).append(method) print(tasks) def _extract_frames(self): pass def _extract_contrast(self): pass def tableau(self, mode='contrast', select=['distribution', 'monochromatic', 'luminance'], write=True): """Creates a tableau of available diagrams for a given contrast Diagrams for the tableau will not be created in case some of the movies in the corpus lack thre required diagram. Consequently, the tableau will only show diagrams from movies in the corpus that exist already. Keyword Arguments: mode {str} -- Defines the type of components that are drawn together in the tableau. 'contrast' selects diagrams from all the movies in the corpus which represents the contrast, defined in the select argument. 'movie' creates one tableau with all diagrams for each movie, defined in the select argument. (default: {'contrast'}) select {[str]} -- Describes the components selected for the tableau. If the mode is 'contrast' the argument requires a list of 3 strings, refering to the moment, contrast and method in order to count the contrast ([moment, contrast, method]). If mode 'movie' is given the select argument requires a list of 1 to many strings, referring to the movie slugs of movies in the corpus. (default: {['distribution', 'monochromatic', 'luminance']} write {bool} -- Decide, if the tableau image should be written to disk in the corpus folder or not. (default: {True}) Returns: numpy.ndarray -- A numpy ndarry with the data type uint8 showing the tableau as an image. Todo: FIXME Instead of an if/else statement, create tableau class in the visuals module and subclass it for movies and contrasts. Especially the movie table should also be provided as a method for the Movie class. """ if mode == 'contrast': components = self._filter_diagrams(mode=mode, select=select) layout = self.layout_tableau(len(components)) tableau = self._fit_components(components, layout) if write: file_name = self.basedir / (select[0] + '_' + select[1] + '_' + select[2] + '.png') imwrite(str(file_name), tableau) elif mode == 'movie': for movie in select: components = self._filter_diagrams(mode=mode, select=movie) layout = self.layout_tableau(len(components)) tableau = self._fit_components(components, layout) if write: Path(self.basedir / movie / 'visuals' / 'tableau').mkdir( parents=True, exist_ok=True) folder_name = Path(self.basedir / movie / 'visuals' / 'tableau') file_name = folder_name / (movie + '_tableau.png') imwrite(str(file_name), tableau) return tableau def _filter_diagrams(self, mode, select): """Filters which diagrams in a corpus belong to a specific contrast The filter looks for diagrams represented in *.png files only Keyword Arguments: mode {str} -- Where to look for diagrams (see tableau) select {[str]} -- For which movies or contrasts should diagrams be selected (see tableau) Returns: [pathlib.Path] -- A list of file paths to the diagrams that match the selected contrast visualization. Todo: FEATURE Parametize the file-format instead of looking at png files only. """ diagrams = [] if mode == 'contrast': shape, ctrst, meth = select for k, v in self.status.items(): if shape in v['visuals'].keys(): if ctrst in v['visuals'][shape].keys(): if meth in v['visuals'][shape][ctrst].keys(): diagrams.append(Path((self.basedir / k) / (k + '_' + shape + '_' + ctrst + '_' + meth + '_4fps.png'))) elif mode == 'movie': for moment, i in self.status[select]['visuals'].items(): for ctrst, j in i.items(): for meth, k in j.items(): diagrams.append(Path(self.basedir / select / (select + '_' + moment + '_' + ctrst + '_' + meth + '_4fps.png'))) return diagrams @staticmethod def layout_tableau(n, ratio=12): """Calculates the shape of a tableau for a given number of diagrams Arguments: n {int} -- The number of diagrams that should fit into the tableau Keyword Arguments: ratio {int} -- The number of rows that should be created before a new column is created (default: {12}) Returns: (int, int, int) -- A tuple with three values describing the number of rows and the number of columns so that the diagrams fit into it considering the given row/column ratio (n) as well as the of missing placeholder images in order to fill-up the whole tableau. """ cols, r = divmod(n, ratio) if cols == 0: cols, r = (1, 0) rows = divmod(n, cols)[0] if r > 0: rows += 1 return (rows, cols, r) @staticmethod def _fit_components(components, layout): """Builds a tablea out of diagrams in a given layout First, the method creates a list of col diagrams for one row. Then this list is stacked to create one row image. This is repeated for each row so that the result is a list of row images. Again this is stacked to one image with is the tableau image. Arguments: components {[pathlib.Path]} -- A list of file paths to the diagram image files. layout {(int, int, int)} -- A tuple describing the number of diagrams for each row, column as well as the differance of available diagrams and places in the tableau. Returns: numpy.ndarray -- A numpy ndarry with the data type uint8 showing the tableau as an image. """ tableau = [] for row in range(layout[0]): column = [] for col in range(layout[1]): n = row * layout[1] + col try: img = imread(str(components[n])) # Create dummy images to fill-up the remaining space in the # tableau except IndexError: img = np.full((1200, 16000, 3), (255, 255, 255)) column.append(img) column = np.hstack(column) tableau.append(column) tableau = np.vstack(tableau) return tableau