1 | #if 0 |
---|
2 | # ----------------------------------------------------------------------- |
---|
3 | # mkvinfo.py - Matroska Streaming Video Files |
---|
4 | # ----------------------------------------------------------------------- |
---|
5 | # $Id: mkvinfo.py,v 1.3 2004/04/18 17:55:26 dischi Exp $ |
---|
6 | # |
---|
7 | # $Log: mkvinfo.py,v $ |
---|
8 | # Revision 1.3 2004/04/18 17:55:26 dischi |
---|
9 | # update, including subtitle support |
---|
10 | # |
---|
11 | # Revision 1.2 2004/03/21 08:57:31 dischi |
---|
12 | # major bugfix |
---|
13 | # |
---|
14 | # Revision 1.1 2004/01/31 12:24:15 dischi |
---|
15 | # add basic matroska info |
---|
16 | # |
---|
17 | # ----------------------------------------------------------------------- |
---|
18 | # MMPython - Media Metadata for Python |
---|
19 | # Copyright (C) 2003 Thomas Schueppel, Dirk Meyer |
---|
20 | # |
---|
21 | # This program is free software; you can redistribute it and/or modify |
---|
22 | # it under the terms of the GNU General Public License as published by |
---|
23 | # the Free Software Foundation; either version 2 of the License, or |
---|
24 | # (at your option) any later version. |
---|
25 | # |
---|
26 | # This program is distributed in the hope that it will be useful, but |
---|
27 | # WITHOUT ANY WARRANTY; without even the implied warranty of MER- |
---|
28 | # CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
---|
29 | # Public License for more details. |
---|
30 | # |
---|
31 | # You should have received a copy of the GNU General Public License along |
---|
32 | # with this program; if not, write to the Free Software Foundation, Inc., |
---|
33 | # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
34 | # |
---|
35 | # ----------------------------------------------------------------------- |
---|
36 | #endif |
---|
37 | |
---|
38 | |
---|
39 | from mmpython import mediainfo |
---|
40 | import mmpython |
---|
41 | import struct |
---|
42 | import re |
---|
43 | import stat |
---|
44 | import os |
---|
45 | import math |
---|
46 | from types import * |
---|
47 | from struct import * |
---|
48 | from string import * |
---|
49 | |
---|
50 | _print = mediainfo._debug |
---|
51 | |
---|
52 | # Main IDs for the Matroska streams |
---|
53 | MATROSKA_VIDEO_TRACK = 0x01 |
---|
54 | MATROSKA_AUDIO_TRACK = 0x02 |
---|
55 | MATROSKA_SUBTITLES_TRACK = 0x11 |
---|
56 | |
---|
57 | MATROSKA_HEADER_ID = 0x1A45DFA3 |
---|
58 | MATROSKA_TRACKS_ID = 0x1654AE6B |
---|
59 | MATROSKA_SEGMENT_ID = 0x18538067 |
---|
60 | MATROSKA_SEGMENT_INFO_ID = 0x1549A966 |
---|
61 | MATROSKA_CLUSTER_ID = 0x1F43B675 |
---|
62 | MATROSKA_VOID_ID = 0xEC |
---|
63 | MATROSKA_CRC_ID = 0xBF |
---|
64 | MATROSKA_TIMECODESCALE_ID = 0x2AD7B1 |
---|
65 | MATROSKA_DURATION_ID = 0x4489 |
---|
66 | MATROSKA_CRC32_ID = 0xBF |
---|
67 | MATROSKA_TRACK_TYPE_ID = 0x83 |
---|
68 | MATROSKA_TRACK_LANGUAGE_ID = 0x22B59C |
---|
69 | MATROSKA_TIMECODESCALE_ID = 0x4489 |
---|
70 | MATROSKA_MUXING_APP_ID = 0x4D80 |
---|
71 | MATROSKA_WRITING_APP_ID = 0x5741 |
---|
72 | MATROSKA_CODEC_ID = 0x86 |
---|
73 | MATROSKA_CODEC_NAME_ID = 0x258688 |
---|
74 | MATROSKA_FRAME_DURATION_ID = 0x23E383 |
---|
75 | MATROSKA_VIDEO_SETTINGS_ID = 0xE0 |
---|
76 | MATROSKA_VID_WIDTH_ID = 0xB0 |
---|
77 | MATROSKA_VID_HEIGHT_ID = 0xBA |
---|
78 | MATROSKA_AUDIO_SETTINGS_ID = 0xE1 |
---|
79 | MATROSKA_AUDIO_SAMPLERATE_ID = 0xB5 |
---|
80 | MATROSKA_AUDIO_CHANNELS_ID = 0x9F |
---|
81 | MATROSKA_TRACK_UID_ID = 0x73C5 |
---|
82 | MATROSKA_TRACK_NUMBER_ID = 0xD7 |
---|
83 | |
---|
84 | # This is class that is responsible to handle one Ebml entity as described in the Matroska/Ebml spec |
---|
85 | class EbmlEntity: |
---|
86 | def __init__(self, inbuf): |
---|
87 | # Compute the EBML id |
---|
88 | # Set the CRC len to zero |
---|
89 | self.crc_len = 0 |
---|
90 | # Now loop until we find an entity without CRC |
---|
91 | self.build_entity(inbuf) |
---|
92 | while self.get_id() == MATROSKA_CRC32_ID: |
---|
93 | self.crc_len += self.get_total_len() |
---|
94 | inbuf = inbuf[self.get_total_len():] |
---|
95 | self.build_entity(inbuf) |
---|
96 | |
---|
97 | def build_entity(self, inbuf): |
---|
98 | self.compute_id(inbuf) |
---|
99 | #_print("Entity id : %08X" % self.entity_id) |
---|
100 | if ( self.id_len == 0): |
---|
101 | self.valid = 0 |
---|
102 | _print("EBML entity not found, bad file format") |
---|
103 | return |
---|
104 | self.valid = 1 |
---|
105 | self.entity_len = self.compute_len(inbuf[self.id_len:]) |
---|
106 | # Obviously, the segment can be very long (ie the whole file, so we truncate it at the read buffer size |
---|
107 | if (self.entity_len == -1): |
---|
108 | self.entity_data = inbuf[self.id_len+self.len_size:] |
---|
109 | self.entity_len = len(self.entity_data) # Set the remaining size |
---|
110 | else: |
---|
111 | self.entity_data = inbuf[self.id_len+self.len_size:self.id_len+self.len_size+self.entity_len] |
---|
112 | #_print("Entity len : %d" % self.entity_len) |
---|
113 | # if the size is 1, 2 3 or 4 it could be a numeric value, so do the job |
---|
114 | self.value = 0 |
---|
115 | if self.entity_len == 1: |
---|
116 | self.value = ord(self.entity_data[0]) |
---|
117 | if self.entity_len == 2: |
---|
118 | self.value = unpack('!H', self.entity_data)[0] |
---|
119 | if self.entity_len == 3: |
---|
120 | self.value = ord(self.entity_data[0])<<16 | ord(self.entity_data[1])<<8 | ord(self.entity_data[2]) |
---|
121 | if self.entity_len == 4: |
---|
122 | self.value = unpack('!I', self.entity_data)[0] |
---|
123 | |
---|
124 | def compute_id(self, inbuf): |
---|
125 | first = ord(inbuf[0]) |
---|
126 | self.id_len = 0 |
---|
127 | if (first & 0x80): |
---|
128 | self.id_len = 1 |
---|
129 | self.entity_id = first |
---|
130 | elif (first & 0x40): |
---|
131 | self.id_len = 2 |
---|
132 | self.entity_id = ord(inbuf[0])<<8 | ord(inbuf[1]) |
---|
133 | elif (first & 0x20): |
---|
134 | self.id_len = 3 |
---|
135 | self.entity_id = (ord(inbuf[0])<<16) | (ord(inbuf[1])<<8) | (ord(inbuf[2])) |
---|
136 | elif (first & 0x10): |
---|
137 | self.id_len = 4 |
---|
138 | self.entity_id = (ord(inbuf[0])<<24) | (ord(inbuf[1])<<16) | (ord(inbuf[2])<<8) | (ord(inbuf[3])) |
---|
139 | self.entity_str = inbuf[0:self.id_len] |
---|
140 | return |
---|
141 | |
---|
142 | def compute_len(self, inbuf): |
---|
143 | # Here we just handle the size up to 4 bytes |
---|
144 | # The size above will be truncated by the read buffer itself |
---|
145 | first = ord(inbuf[0]) |
---|
146 | if (first & 0x80): |
---|
147 | self.len_size = 1 |
---|
148 | return first - 0x80 |
---|
149 | if (first & 0x40): |
---|
150 | self.len_size = 2 |
---|
151 | (c1,c2) = unpack('BB',inbuf[:2]) |
---|
152 | return ((c1-0x40)<<8) | (c2) |
---|
153 | if (first & 0x20): |
---|
154 | self.len_size = 3 |
---|
155 | (c1, c2, c3) = unpack('BBB',inbuf[:3]) |
---|
156 | return ((c1-0x20)<<16) | (c2<<8) | (c3) |
---|
157 | if (first & 0x10): |
---|
158 | self.len_size = 4 |
---|
159 | (len) = unpack('!I',inbuf[:4]) |
---|
160 | return len |
---|
161 | if (first & 0x08): |
---|
162 | self.len_size = 5 |
---|
163 | return -1 |
---|
164 | if (first & 0x04): |
---|
165 | self.len_size = 6 |
---|
166 | return -1 |
---|
167 | if (first & 0x02): |
---|
168 | self.len_size = 7 |
---|
169 | return -1 |
---|
170 | if (first & 0x01): |
---|
171 | self.len_size = 8 |
---|
172 | return -1 |
---|
173 | |
---|
174 | def get_crc_len(self): |
---|
175 | return self.crc_len |
---|
176 | |
---|
177 | def get_value(self): |
---|
178 | value = self.value |
---|
179 | return value |
---|
180 | |
---|
181 | def get_data(self): |
---|
182 | return self.entity_data |
---|
183 | |
---|
184 | def get_id(self): |
---|
185 | return self.entity_id |
---|
186 | |
---|
187 | def get_str_id(self): |
---|
188 | return self.entity_str |
---|
189 | |
---|
190 | def get_len(self): |
---|
191 | return self.entity_len |
---|
192 | |
---|
193 | def get_total_len(self): |
---|
194 | return self.entity_len+self.id_len+self.len_size |
---|
195 | |
---|
196 | |
---|
197 | # This ithe main Matroska object |
---|
198 | class MkvInfo(mediainfo.AVInfo): |
---|
199 | def __init__(self, file): |
---|
200 | mediainfo.AVInfo.__init__(self) |
---|
201 | self.samplerate = 1 |
---|
202 | |
---|
203 | buffer = file.read(80000) |
---|
204 | if len(buffer) == 0: |
---|
205 | # Regular File end |
---|
206 | return None |
---|
207 | |
---|
208 | # Check the Matroska header |
---|
209 | header = EbmlEntity(buffer) |
---|
210 | if ( header.get_id() == MATROSKA_HEADER_ID ): |
---|
211 | _print("HEADER ID found %08X" % header.get_id() ) |
---|
212 | self.valid = 1 |
---|
213 | self.mime = 'application/mkv' |
---|
214 | self.type = 'Matroska' |
---|
215 | # Now get the segment |
---|
216 | segment = EbmlEntity(buffer[header.get_total_len():]) |
---|
217 | if ( segment.get_id() == MATROSKA_SEGMENT_ID): |
---|
218 | _print("SEGMENT ID found %08X" % segment.get_id() ) |
---|
219 | #MEDIACORE = ['title', 'caption', 'comment', 'artist', 'size', 'type', 'subtype', |
---|
220 | #'date', 'keywords', 'country', 'language', 'url'] |
---|
221 | segtab = self.process_one_level(segment) |
---|
222 | seginfotab = self.process_one_level(segtab[MATROSKA_SEGMENT_INFO_ID]) |
---|
223 | try: |
---|
224 | # Express scalecode in ms instead of ns |
---|
225 | # Rescale it to the second |
---|
226 | scalecode = float(seginfotab[MATROSKA_TIMECODESCALE_ID].get_value() / (1000*1000)) |
---|
227 | except: |
---|
228 | scalecode = 1000 |
---|
229 | try: |
---|
230 | duration = float(unpack('!f', seginfotab[MATROSKA_DURATION_ID].get_data() )[0]) |
---|
231 | duration = float(duration / scalecode) |
---|
232 | # Express the time in minutes |
---|
233 | self.length = int(duration/60) |
---|
234 | except: |
---|
235 | pass |
---|
236 | try: |
---|
237 | _print ("Searching for id : %X" % MATROSKA_TRACKS_ID) |
---|
238 | entity = segtab[MATROSKA_TRACKS_ID] |
---|
239 | self.process_tracks(entity) |
---|
240 | except: |
---|
241 | _print("TRACKS ID not found !!" ) |
---|
242 | else: |
---|
243 | _print("SEGMENT ID not found %08X" % segment.get_id() ) |
---|
244 | else: |
---|
245 | self.valid = 0 |
---|
246 | |
---|
247 | def process_tracks(self, tracks): |
---|
248 | tracksbuf = tracks.get_data() |
---|
249 | indice = 0 |
---|
250 | while indice < tracks.get_len(): |
---|
251 | trackelem = EbmlEntity(tracksbuf[indice:]) |
---|
252 | _print ("ELEMENT %X found" % trackelem.get_id()) |
---|
253 | self.process_one_track(trackelem) |
---|
254 | indice += trackelem.get_total_len() + trackelem.get_crc_len() |
---|
255 | |
---|
256 | def process_one_level(self, item): |
---|
257 | buf = item.get_data() |
---|
258 | indice = 0 |
---|
259 | tabelem = {} |
---|
260 | while indice < item.get_len(): |
---|
261 | elem = EbmlEntity(buf[indice:]) |
---|
262 | tabelem[elem.get_id()] = elem |
---|
263 | indice += elem.get_total_len() + elem.get_crc_len() |
---|
264 | return tabelem |
---|
265 | |
---|
266 | def process_one_track(self, track): |
---|
267 | # Process all the items at the track level |
---|
268 | tabelem = self.process_one_level(track) |
---|
269 | # We have the dict of track eleme, now build the MMPYTHON information |
---|
270 | type = tabelem[MATROSKA_TRACK_TYPE_ID] |
---|
271 | mytype = type.get_value() |
---|
272 | _print ("Track type found with UID %d" % mytype) |
---|
273 | if (mytype == MATROSKA_VIDEO_TRACK ): |
---|
274 | _print("VIDEO TRACK found !!" ) |
---|
275 | #VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'samplebits', |
---|
276 | # 'width', 'height', 'fps', 'aspect'] |
---|
277 | vi = mediainfo.VideoInfo() |
---|
278 | try: |
---|
279 | elem = tabelem[MATROSKA_CODEC_ID] |
---|
280 | vi.codec = elem.get_data() |
---|
281 | except: |
---|
282 | vi.codec = 'Unknown' |
---|
283 | try: |
---|
284 | elem = tabelem[MATROSKA_FRAME_DURATION_ID] |
---|
285 | vi.fps = 1 / (pow(10, -9) * (elem.get_value())) |
---|
286 | except: |
---|
287 | vi.fps = 0 |
---|
288 | try: |
---|
289 | vinfo = tabelem[MATROSKA_VIDEO_SETTINGS_ID] |
---|
290 | vidtab = self.process_one_level(vinfo) |
---|
291 | vi.width = vidtab[MATROSKA_VID_WIDTH_ID].get_value() |
---|
292 | vi.height = vidtab[MATROSKA_VID_HEIGHT_ID].get_value() |
---|
293 | except: |
---|
294 | _print("No other info about video track !!!") |
---|
295 | self.video.append(vi) |
---|
296 | elif (mytype == MATROSKA_AUDIO_TRACK ): |
---|
297 | _print("AUDIO TRACK found !!" ) |
---|
298 | #AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'samplebits', |
---|
299 | # 'bitrate', 'language'] |
---|
300 | ai = mediainfo.AudioInfo() |
---|
301 | try: |
---|
302 | elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID] |
---|
303 | ai.language = elem.get_data() |
---|
304 | ai['language'] = elem.get_data() |
---|
305 | except: |
---|
306 | ai.language = 'en' |
---|
307 | ai['language'] = 'en' |
---|
308 | try: |
---|
309 | elem = tabelem[MATROSKA_CODEC_ID] |
---|
310 | ai.codec = elem.get_data() |
---|
311 | except: |
---|
312 | ai.codec = "Unknown" |
---|
313 | try: |
---|
314 | ainfo = tabelem[MATROSKA_AUDIO_SETTINGS_ID] |
---|
315 | audtab = self.process_one_level(vinfo) |
---|
316 | ai.samplerate = unpack('!f', audtab[MATROSKA_AUDIO_SAMPLERATE_ID].get_value())[0] |
---|
317 | ai.channels = audtab[MATROSKA_AUDIO_CHANNELS_ID].get_value() |
---|
318 | except: |
---|
319 | _print("No other info about audio track !!!") |
---|
320 | self.audio.append(ai) |
---|
321 | elif (mytype == MATROSKA_SUBTITLES_TRACK): |
---|
322 | try: |
---|
323 | elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID] |
---|
324 | language = elem.get_data() |
---|
325 | _print ("Subtitle language found : %s" % elem.get_data() ) |
---|
326 | except: |
---|
327 | language = "en" # By default |
---|
328 | self.subtitles.append(language) |
---|
329 | |
---|
330 | #_print("Found %d elem for this track" % len(tabelem) ) |
---|
331 | |
---|
332 | mmpython.registertype( 'application/mkv', ('mkv', 'mka',), mediainfo.TYPE_AV, MkvInfo ) |
---|