corpus.py 10.6 KB
Newer Older
1
2
3
from pathlib import Path
from cv2 import imread, imwrite
import numpy as np
4
5
from .movie import Movie

6
_summary = {'seqmean': {'monochromatic': ['luminance'],
7
8
9
10
11
12
13
14
                       'saturation': ['saturation', 'colourfulness'],
                       'hue': ['hsV', 'labHcl']},
           'distribution': {'monochromatic': ['luminance'],
                            'saturation': ['saturation', 'colourfulness'],
                            'hue': ['hsV', 'labHcl']}}


class Corpus(object):
15
    """Main class for working with a corpus of movies and movie data
16

17
    Attributes:
18
        basedir (Path) -- The root directory in which all moviefolders
19
20
            together with their data reside.
    """
21
    def __init__(self, path='./', summary=False):
22
        self.basedir = Path(path)
23
        self.summary = summary if summary else _summary
24
25
26
27
28
29
30
31
32
33
34
35
36
37
        self.status = self._report()  # Aktualisieren nach jeder Digitalisier.
        self.movies = self._instantiate_movies()

    def _report(self):
        """Indexes the available resources for each movie in the corpus.

        Returns:
            dict -- a dictionary in which each key corresponds with one movie
                    title holding another 4 keys with information about the
                    availability of the 'video' file, extracted 'frames',
                    'data' and 'visual'izations.
        """
        # get project directory
        status = {}
38
        korpus_dir = Path(self.basedir).resolve()
39
40
41
42
43
44
45
46
47
        movie_dirs = [d for d in korpus_dir.iterdir() if d.is_dir()]
        # filter dot paths and tmp folders
        movie_dirs = [d for d in movie_dirs if not('.' in str(d.name)[0]) and
                      not('_' in str(d.name)[0])]

        for m in movie_dirs:
            movie = m.name
            status[movie] = {}

48
            # get info about movie file
49
50
51
            video = m / ('movie/' + movie + '.mkv')
            status[movie]['video'] = video if video.is_file() else None

52
            # get info about frame images
53
54
55
            frames = len(list((m / 'frames/240p30/').glob('*')))
            status[movie]['frames'] = frames if frames > 0 else None

56
            # get info about data pickles
57
58
59
60
61
62
63
64
65
            status[movie].setdefault('data', {})
            data_files = (m / ('data/')).glob('*.pkl')
            for d in data_files:
                d = d.name.split('_')
                status[movie]['data'].setdefault(
                                                 d[-4], {}).setdefault(
                                                 d[-3], {}).update(
                                                 {d[-2]: True})

66
67
68
69
70
71
72
73
74
            # get info about diagram *.png files
            status[movie].setdefault('visuals', {})
            vis_files = (m.glob('*.png'))
            for v in vis_files:
                v = v.name.split('_')
                status[movie]['visuals'].setdefault(
                                                    v[-4], {}).setdefault(
                                                    v[-3], {}).update(
                                                    {v[-2]: True})
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

        return status

    def _instantiate_movies(self):
        """Creates a dictionary with one Movie instance for each movie

        Returns:
            dict() -- A dictionary in which the movie slugs are the keys and
                      the values are corresponding instances of the Movie
                      class.
        """
        movies = {}
        for m in self.status.keys():
            prefix = m + '_'
            folder = str(self.basedir / (m + '/frames/240p30/'))
            movies[m] = Movie(prefix, folder, fps=4)
        return movies

    def extract(self):
        """Exctracts missing frame images and contrast data for each movie

           The method looks for missing frame images and contrast data in the
           object's status dictionary and extracts them from the movie file and
           the frame images. It uses the summary dictionary in order to decide
           which contrast values should be extracted.
        """
        tasks = self._extraction_tasks()

    def _extraction_tasks(self):
        without_frames = {k: v for (k, v) in self.status.items()
                          if v['frames'] == 0}
        tasks = {}
        for m in self.status.keys():
            print(m)
109
            for view in _summary.keys():
110
111
112
                print(view)
                print(self.status[m].keys())
                if view not in self.status[m]['data'].keys():
113
                    tasks[view] = _summary[view]
114
                else:
115
                    for contrast in _summary[view].keys():
116
                        if contrast not in self.status[m]['data'][view].keys():
117
                            tasks.setdefault(view, {})[contrast] = (_summary[view][contrast])
118
                        else:
119
                            for method in _summary[view][contrast]:
120
121
122
123
124
125
126
127
128
                                if method not in self.status[m]['data'][view][contrast].keys():
                                    tasks.setdefault(view, {}).setdefault(contrast, []).append(method)
        print(tasks)

    def _extract_frames(self):
        pass

    def _extract_contrast(self):
        pass
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

    def tableau(self,
                shape='distribution',
                ctrst='monochromatic',
                meth='luminance',
                write=True):
        """Creates a tableau of available diagrams for a given contrast

        Diagrams for the tableau will not be created in case some of the movies
        in the corpus lack thre required diagram. Consequently, the tableau
        will only show diagrams from movies in the corpus that exist already.

        Keyword Arguments:
            shape {str} -- The shape of the data in the diagram. Can be one of
                seqmean or distribution (default: {'distribution'})
            ctrst {str} -- The name of the contrast.
                (default: {'monochromatic'})
            meth {str} -- The method used in order to calculate the contrast
                (default: {'luminance'})
            write {bool} -- Decide, if the tableau image should be written to
                disk in the corpus folder or not. (default: {True})

        Returns:
            numpy.ndarray -- A numpy ndarry with the data type uint8
                showing the tableau as an image.
154
155
156
157
158

        Todo:
            FEATURE Allow to create tableaus for all diagrams available for a
                specific movie
            FIXME Move tableau methods to the visuals module
159
160
161
162
163
        """
        components = self._filter_diagrams(shape=shape, ctrst=ctrst, meth=meth)
        layout = self.layout_tableau(len(components))
        tableau = self._fit_components(components, layout)
        if write:
164
165
166
            file_name = self.basedir / (shape + '_' + ctrst + '_' +
                                        meth + '.png')
            imwrite(str(file_name), tableau)
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
        return tableau

    def _filter_diagrams(self,
                         shape='distribution',
                         ctrst='monochromatic',
                         meth='luminance'):
        """Filters which diagrams in a corpus belong to a specific contrast

        The filter looks for diagrams represented in *.png files only

        Keyword Arguments:
            shape {str} -- The shape of the data in the diagram. Can be one of
                seqmean or distribution (default: {'distribution'})
            ctrst {str} -- The name of the contrast.
                (default: {'monochromatic'})
            meth {str} -- The method used in order to calculate the contrast
                (default: {'luminance'})

        Returns:
            [pathlib.Path] -- A list of file paths to the diagrams that match
                the selected contrast visualization.

        Todo:
            * FEATURE Parametize the file-format instead of looking at png
                files only.
        """
        diagrams = []
        for k, v in self.status.items():
            if shape in v['visuals'].keys():
                if ctrst in v['visuals'][shape].keys():
                    if meth in v['visuals'][shape][ctrst].keys():
                        diagrams.append(Path((self.basedir / k) /
                                        (k + '_' + shape + '_' + ctrst + '_' +
                                         meth + '_4fps.png')))
        return diagrams

    @staticmethod
    def layout_tableau(n, ratio=12):
        """Calculates the shape of a tableau for a given number of diagrams

        Arguments:
            n {int} -- The number of diagrams that should fit into the tableau

        Keyword Arguments:
            ratio {int} -- The number of rows that should be created before a
                new column is created (default: {12})

        Returns:
            (int, int, int) -- A tuple with three values describing the number
                of rows and the number of columns so that the diagrams fit into
                it considering the given row/column ratio (n) as well as the
                of missing placeholder images in order to fill-up the whole
                tableau.
        """
        cols, r = divmod(n, ratio)
        rows = divmod(n, cols)[0]
        if r > 0:
            rows += 1
        return (rows, cols, r)

    @staticmethod
    def _fit_components(components, layout):
        """Builds a tablea out of diagrams in a given layout

        First, the method creates a list of col diagrams for one row. Then
        this list is stacked to create one row image. This is repeated for
        each row so that the result is a list of row images. Again this
        is stacked to one image with is the tableau image.

        Arguments:
            components {[pathlib.Path]} -- A list of file paths to the
                diagram image files.
            layout {(int, int, int)} -- A tuple describing the number of
                diagrams for each row, column as well as the differance of
                available diagrams and places in the tableau.

        Returns:
            numpy.ndarray -- A numpy ndarry with the data type uint8
                showing the tableau as an image.
        """
        tableau = []
        for row in range(layout[0]):
            column = []
            for col in range(layout[1]):
                n = row * layout[1] + col
                try:
                    img = imread(str(components[n]))
                # Create dummy images to fill-up the remaining space in the
                # tableau
                except IndexError:
                    img = np.full((1200, 16000, 3), (255, 255, 255))
                column.append(img)
            column = np.hstack(column)
            tableau.append(column)
        tableau = np.vstack(tableau)
        return tableau