corpus.py 12.2 KB
Newer Older
1
2
3
from pathlib import Path
from cv2 import imread, imwrite
import numpy as np
4
5
from .movie import Movie

6
_summary = {'seqmean': {'monochromatic': ['luminance'],
7
8
9
10
11
12
13
14
                       'saturation': ['saturation', 'colourfulness'],
                       'hue': ['hsV', 'labHcl']},
           'distribution': {'monochromatic': ['luminance'],
                            'saturation': ['saturation', 'colourfulness'],
                            'hue': ['hsV', 'labHcl']}}


class Corpus(object):
15
    """Main class for working with a corpus of movies and movie data
16

17
    Attributes:
18
        basedir (Path) -- The root directory in which all moviefolders
19
20
            together with their data reside.
    """
21
    def __init__(self, path='./', summary=False):
22
        self.basedir = Path(path)
23
        self.summary = summary if summary else _summary
24
25
26
27
28
29
30
31
32
33
34
35
36
37
        self.status = self._report()  # Aktualisieren nach jeder Digitalisier.
        self.movies = self._instantiate_movies()

    def _report(self):
        """Indexes the available resources for each movie in the corpus.

        Returns:
            dict -- a dictionary in which each key corresponds with one movie
                    title holding another 4 keys with information about the
                    availability of the 'video' file, extracted 'frames',
                    'data' and 'visual'izations.
        """
        # get project directory
        status = {}
38
        korpus_dir = Path(self.basedir).resolve()
39
40
41
42
43
44
45
46
47
        movie_dirs = [d for d in korpus_dir.iterdir() if d.is_dir()]
        # filter dot paths and tmp folders
        movie_dirs = [d for d in movie_dirs if not('.' in str(d.name)[0]) and
                      not('_' in str(d.name)[0])]

        for m in movie_dirs:
            movie = m.name
            status[movie] = {}

48
            # get info about movie file
49
50
51
            video = m / ('movie/' + movie + '.mkv')
            status[movie]['video'] = video if video.is_file() else None

52
            # get info about frame images
53
54
55
            frames = len(list((m / 'frames/240p30/').glob('*')))
            status[movie]['frames'] = frames if frames > 0 else None

56
            # get info about data pickles
57
58
59
60
61
62
63
64
65
            status[movie].setdefault('data', {})
            data_files = (m / ('data/')).glob('*.pkl')
            for d in data_files:
                d = d.name.split('_')
                status[movie]['data'].setdefault(
                                                 d[-4], {}).setdefault(
                                                 d[-3], {}).update(
                                                 {d[-2]: True})

66
67
68
69
70
71
72
73
74
            # get info about diagram *.png files
            status[movie].setdefault('visuals', {})
            vis_files = (m.glob('*.png'))
            for v in vis_files:
                v = v.name.split('_')
                status[movie]['visuals'].setdefault(
                                                    v[-4], {}).setdefault(
                                                    v[-3], {}).update(
                                                    {v[-2]: True})
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

        return status

    def _instantiate_movies(self):
        """Creates a dictionary with one Movie instance for each movie

        Returns:
            dict() -- A dictionary in which the movie slugs are the keys and
                      the values are corresponding instances of the Movie
                      class.
        """
        movies = {}
        for m in self.status.keys():
            prefix = m + '_'
            folder = str(self.basedir / (m + '/frames/240p30/'))
            movies[m] = Movie(prefix, folder, fps=4)
        return movies

    def extract(self):
        """Exctracts missing frame images and contrast data for each movie

           The method looks for missing frame images and contrast data in the
           object's status dictionary and extracts them from the movie file and
           the frame images. It uses the summary dictionary in order to decide
           which contrast values should be extracted.
        """
        tasks = self._extraction_tasks()

    def _extraction_tasks(self):
        without_frames = {k: v for (k, v) in self.status.items()
                          if v['frames'] == 0}
        tasks = {}
        for m in self.status.keys():
            print(m)
109
            for view in _summary.keys():
110
111
112
                print(view)
                print(self.status[m].keys())
                if view not in self.status[m]['data'].keys():
113
                    tasks[view] = _summary[view]
114
                else:
115
                    for contrast in _summary[view].keys():
116
                        if contrast not in self.status[m]['data'][view].keys():
117
                            tasks.setdefault(view, {})[contrast] = (_summary[view][contrast])
118
                        else:
119
                            for method in _summary[view][contrast]:
120
121
122
123
124
125
126
127
128
                                if method not in self.status[m]['data'][view][contrast].keys():
                                    tasks.setdefault(view, {}).setdefault(contrast, []).append(method)
        print(tasks)

    def _extract_frames(self):
        pass

    def _extract_contrast(self):
        pass
129

130
131
    def tableau(self, mode='contrast',
                select=['distribution', 'monochromatic', 'luminance'],
132
133
134
135
136
137
138
139
                write=True):
        """Creates a tableau of available diagrams for a given contrast

        Diagrams for the tableau will not be created in case some of the movies
        in the corpus lack thre required diagram. Consequently, the tableau
        will only show diagrams from movies in the corpus that exist already.

        Keyword Arguments:
140
141
142
143
144
145
146
147
148
149
150
151
152
153
            mode {str} -- Defines the type of components that are drawn
                together in the tableau. 'contrast' selects diagrams from all
                the movies in the corpus which represents the contrast, defined
                in the select argument. 'movie' creates one tableau with all
                diagrams for each movie,  defined in the select argument.
                (default: {'contrast'})
            select {[str]} -- Describes the components selected for the
                tableau. If the mode is 'contrast' the argument requires a list
                of 3 strings, refering to the moment, contrast and method in
                order to count the contrast ([moment, contrast, method]). If
                mode 'movie' is given the select argument requires a list of
                1 to many strings, referring to the movie slugs of movies in
                the corpus. (default: {['distribution', 'monochromatic',
                'luminance']}
154
155
156
157
158
159
            write {bool} -- Decide, if the tableau image should be written to
                disk in the corpus folder or not. (default: {True})

        Returns:
            numpy.ndarray -- A numpy ndarry with the data type uint8
                showing the tableau as an image.
160
161

        Todo:
162
163
164
165
            FIXME Instead of an if/else statement, create tableau class in the
                visuals module and subclass it for movies and contrasts.
                Especially the movie table should also be provided as a method
                for the Movie class.
166
        """
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

        if mode == 'contrast':
            components = self._filter_diagrams(mode=mode, select=select)
            layout = self.layout_tableau(len(components))
            tableau = self._fit_components(components, layout)
            if write:
                file_name = self.basedir / (select[0] + '_' + select[1] + '_' +
                                            select[2] + '.png')
                imwrite(str(file_name), tableau)
        elif mode == 'movie':
            for movie in select:
                components = self._filter_diagrams(mode=mode, select=movie)
                layout = self.layout_tableau(len(components))
                tableau = self._fit_components(components, layout)
                if write:
                    Path(self.basedir / movie / 'visuals' / 'tableau').mkdir(
                        parents=True, exist_ok=True)
                    folder_name = Path(self.basedir / movie / 'visuals' /
                                       'tableau')
                    file_name = folder_name / (movie + '_tableau.png')
                    imwrite(str(file_name), tableau)
188
189
        return tableau

190
    def _filter_diagrams(self, mode, select):
191
192
193
194
195
        """Filters which diagrams in a corpus belong to a specific contrast

        The filter looks for diagrams represented in *.png files only

        Keyword Arguments:
196
197
198
            mode {str} -- Where to look for diagrams (see tableau)
            select {[str]} -- For which movies or contrasts should diagrams be
                selected (see tableau)
199
200
201
202
203
204

        Returns:
            [pathlib.Path] -- A list of file paths to the diagrams that match
                the selected contrast visualization.

        Todo:
205
            FEATURE Parametize the file-format instead of looking at png
206
207
208
                files only.
        """
        diagrams = []
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
        if mode == 'contrast':
            shape, ctrst, meth = select
            for k, v in self.status.items():
                if shape in v['visuals'].keys():
                    if ctrst in v['visuals'][shape].keys():
                        if meth in v['visuals'][shape][ctrst].keys():
                            diagrams.append(Path((self.basedir / k) /
                                            (k + '_' + shape + '_' + ctrst +
                                             '_' + meth + '_4fps.png')))
        elif mode == 'movie':
            for moment, i in self.status[select]['visuals'].items():
                for ctrst, j in i.items():
                    for meth, k in j.items():
                        diagrams.append(Path(self.basedir / select /
                                        (select + '_' + moment + '_' +
                                         ctrst + '_' + meth + '_4fps.png')))
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
        return diagrams

    @staticmethod
    def layout_tableau(n, ratio=12):
        """Calculates the shape of a tableau for a given number of diagrams

        Arguments:
            n {int} -- The number of diagrams that should fit into the tableau

        Keyword Arguments:
            ratio {int} -- The number of rows that should be created before a
                new column is created (default: {12})

        Returns:
            (int, int, int) -- A tuple with three values describing the number
                of rows and the number of columns so that the diagrams fit into
                it considering the given row/column ratio (n) as well as the
                of missing placeholder images in order to fill-up the whole
                tableau.
        """
        cols, r = divmod(n, ratio)
246
247
        if cols == 0:
            cols, r = (1, 0)
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
        rows = divmod(n, cols)[0]
        if r > 0:
            rows += 1
        return (rows, cols, r)

    @staticmethod
    def _fit_components(components, layout):
        """Builds a tablea out of diagrams in a given layout

        First, the method creates a list of col diagrams for one row. Then
        this list is stacked to create one row image. This is repeated for
        each row so that the result is a list of row images. Again this
        is stacked to one image with is the tableau image.

        Arguments:
            components {[pathlib.Path]} -- A list of file paths to the
                diagram image files.
            layout {(int, int, int)} -- A tuple describing the number of
                diagrams for each row, column as well as the differance of
                available diagrams and places in the tableau.

        Returns:
            numpy.ndarray -- A numpy ndarry with the data type uint8
                showing the tableau as an image.
        """
        tableau = []
        for row in range(layout[0]):
            column = []
            for col in range(layout[1]):
                n = row * layout[1] + col
                try:
                    img = imread(str(components[n]))
                # Create dummy images to fill-up the remaining space in the
                # tableau
                except IndexError:
                    img = np.full((1200, 16000, 3), (255, 255, 255))
                column.append(img)
            column = np.hstack(column)
            tableau.append(column)
        tableau = np.vstack(tableau)
        return tableau