corpus.py 4.31 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pathlib
from .movie import Movie

SUMMARY = {'seqmean': {'monochromatic': ['luminance'],
                       'saturation': ['saturation', 'colourfulness'],
                       'hue': ['hsV', 'labHcl']},
           'distribution': {'monochromatic': ['luminance'],
                            'saturation': ['saturation', 'colourfulness'],
                            'hue': ['hsV', 'labHcl']}}


class Corpus(object):
    def __init__(self, path='./', summary=False):
        self.basedir = pathlib.Path(path)
        self.summary = summary if summary else SUMMARY
        self.status = self._report()  # Aktualisieren nach jeder Digitalisier.
        self.movies = self._instantiate_movies()

    def _report(self):
        """Indexes the available resources for each movie in the corpus.

        Returns:
            dict -- a dictionary in which each key corresponds with one movie
                    title holding another 4 keys with information about the
                    availability of the 'video' file, extracted 'frames',
                    'data' and 'visual'izations.
        """
        # get project directory
        status = {}
        korpus_dir = pathlib.Path(self.basedir).resolve()
        movie_dirs = [d for d in korpus_dir.iterdir() if d.is_dir()]
        # filter dot paths and tmp folders
        movie_dirs = [d for d in movie_dirs if not('.' in str(d.name)[0]) and
                      not('_' in str(d.name)[0])]

        for m in movie_dirs:
            movie = m.name
            status[movie] = {}

            video = m / ('movie/' + movie + '.mkv')
            status[movie]['video'] = video if video.is_file() else None

            frames = len(list((m / 'frames/240p30/').glob('*')))
            status[movie]['frames'] = frames if frames > 0 else None

            status[movie].setdefault('data', {})
            data_files = (m / ('data/')).glob('*.pkl')
            for d in data_files:
                d = d.name.split('_')
                status[movie]['data'].setdefault(
                                                 d[-4], {}).setdefault(
                                                 d[-3], {}).update(
                                                 {d[-2]: True})

            status[movie]['visual'] = 'NEEDS IMPLEMENTATION'

        return status

    def _instantiate_movies(self):
        """Creates a dictionary with one Movie instance for each movie

        Returns:
            dict() -- A dictionary in which the movie slugs are the keys and
                      the values are corresponding instances of the Movie
                      class.
        """
        movies = {}
        for m in self.status.keys():
            prefix = m + '_'
            folder = str(self.basedir / (m + '/frames/240p30/'))
            movies[m] = Movie(prefix, folder, fps=4)
        return movies

    def extract(self):
        """Exctracts missing frame images and contrast data for each movie

           The method looks for missing frame images and contrast data in the
           object's status dictionary and extracts them from the movie file and
           the frame images. It uses the summary dictionary in order to decide
           which contrast values should be extracted.
        """
        tasks = self._extraction_tasks()

    def _extraction_tasks(self):
        without_frames = {k: v for (k, v) in self.status.items()
                          if v['frames'] == 0}
        tasks = {}
        for m in self.status.keys():
            print(m)
            for view in SUMMARY.keys():
                print(view)
                print(self.status[m].keys())
                if view not in self.status[m]['data'].keys():
                    tasks[view] = SUMMARY[view]
                else:
                    for contrast in SUMMARY[view].keys():
                        if contrast not in self.status[m]['data'][view].keys():
                            tasks.setdefault(view, {})[contrast] = (SUMMARY[view][contrast])
                        else:
                            for method in SUMMARY[view][contrast]:
                                if method not in self.status[m]['data'][view][contrast].keys():
                                    tasks.setdefault(view, {}).setdefault(contrast, []).append(method)
        print(tasks)

    def _extract_frames(self):
        pass

    def _extract_contrast(self):
        pass