Commit caab3092 authored by Niels-Oliver Walkowski's avatar Niels-Oliver Walkowski
Browse files

feat(mod): Add init and status report for corpus

Add a corpus class which holds information about available movie files,
frames, data and visualizations in a corpus of movie files, following
the cittena movie corpus scheme.
Add a method to report the status of the movie corpus.
parent beabd565
import pathlib
from .movie import Movie
SUMMARY = {'seqmean': {'monochromatic': ['luminance'],
'saturation': ['saturation', 'colourfulness'],
'hue': ['hsV', 'labHcl']},
'distribution': {'monochromatic': ['luminance'],
'saturation': ['saturation', 'colourfulness'],
'hue': ['hsV', 'labHcl']}}
class Corpus(object):
def __init__(self, path='./', summary=False):
self.basedir = pathlib.Path(path)
self.summary = summary if summary else SUMMARY
self.status = self._report() # Aktualisieren nach jeder Digitalisier.
self.movies = self._instantiate_movies()
def _report(self):
"""Indexes the available resources for each movie in the corpus.
dict -- a dictionary in which each key corresponds with one movie
title holding another 4 keys with information about the
availability of the 'video' file, extracted 'frames',
'data' and 'visual'izations.
# get project directory
status = {}
korpus_dir = pathlib.Path(self.basedir).resolve()
movie_dirs = [d for d in korpus_dir.iterdir() if d.is_dir()]
# filter dot paths and tmp folders
movie_dirs = [d for d in movie_dirs if not('.' in str([0]) and
not('_' in str([0])]
for m in movie_dirs:
movie =
status[movie] = {}
video = m / ('movie/' + movie + '.mkv')
status[movie]['video'] = video if video.is_file() else None
frames = len(list((m / 'frames/240p30/').glob('*')))
status[movie]['frames'] = frames if frames > 0 else None
status[movie].setdefault('data', {})
data_files = (m / ('data/')).glob('*.pkl')
for d in data_files:
d ='_')
d[-4], {}).setdefault(
d[-3], {}).update(
{d[-2]: True})
status[movie]['visual'] = 'NEEDS IMPLEMENTATION'
return status
def _instantiate_movies(self):
"""Creates a dictionary with one Movie instance for each movie
dict() -- A dictionary in which the movie slugs are the keys and
the values are corresponding instances of the Movie
movies = {}
for m in self.status.keys():
prefix = m + '_'
folder = str(self.basedir / (m + '/frames/240p30/'))
movies[m] = Movie(prefix, folder, fps=4)
return movies
def extract(self):
"""Exctracts missing frame images and contrast data for each movie
The method looks for missing frame images and contrast data in the
object's status dictionary and extracts them from the movie file and
the frame images. It uses the summary dictionary in order to decide
which contrast values should be extracted.
tasks = self._extraction_tasks()
def _extraction_tasks(self):
without_frames = {k: v for (k, v) in self.status.items()
if v['frames'] == 0}
tasks = {}
for m in self.status.keys():
for view in SUMMARY.keys():
if view not in self.status[m]['data'].keys():
tasks[view] = SUMMARY[view]
for contrast in SUMMARY[view].keys():
if contrast not in self.status[m]['data'][view].keys():
tasks.setdefault(view, {})[contrast] = (SUMMARY[view][contrast])
for method in SUMMARY[view][contrast]:
if method not in self.status[m]['data'][view][contrast].keys():
tasks.setdefault(view, {}).setdefault(contrast, []).append(method)
def _extract_frames(self):
def _extract_contrast(self):
import sys
from import Movie
from itten.views import UnivariateSequence # , MultivariateSequence
# from itten.visuals import UnivariatePlot # , MultivariatePlot
import pytest
import numpy as np
# TODO Eine allgemeine Funktion zum Testen der Frames Klasse, der
# Kontrastberechnung und der Visualisierung, jeweils mit Parametrisierung für
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment