[4] | 1 | #if 0 |
---|
| 2 | # ----------------------------------------------------------------------- |
---|
| 3 | # mkvinfo.py - Matroska Streaming Video Files |
---|
| 4 | # ----------------------------------------------------------------------- |
---|
| 5 | # $Id: mkvinfo.py,v 1.3 2004/04/18 17:55:26 dischi Exp $ |
---|
| 6 | # |
---|
| 7 | # $Log: mkvinfo.py,v $ |
---|
| 8 | # Revision 1.3 2004/04/18 17:55:26 dischi |
---|
| 9 | # update, including subtitle support |
---|
| 10 | # |
---|
| 11 | # Revision 1.2 2004/03/21 08:57:31 dischi |
---|
| 12 | # major bugfix |
---|
| 13 | # |
---|
| 14 | # Revision 1.1 2004/01/31 12:24:15 dischi |
---|
| 15 | # add basic matroska info |
---|
| 16 | # |
---|
| 17 | # ----------------------------------------------------------------------- |
---|
| 18 | # MMPython - Media Metadata for Python |
---|
| 19 | # Copyright (C) 2003 Thomas Schueppel, Dirk Meyer |
---|
| 20 | # |
---|
| 21 | # This program is free software; you can redistribute it and/or modify |
---|
| 22 | # it under the terms of the GNU General Public License as published by |
---|
| 23 | # the Free Software Foundation; either version 2 of the License, or |
---|
| 24 | # (at your option) any later version. |
---|
| 25 | # |
---|
| 26 | # This program is distributed in the hope that it will be useful, but |
---|
| 27 | # WITHOUT ANY WARRANTY; without even the implied warranty of MER- |
---|
| 28 | # CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
---|
| 29 | # Public License for more details. |
---|
| 30 | # |
---|
| 31 | # You should have received a copy of the GNU General Public License along |
---|
| 32 | # with this program; if not, write to the Free Software Foundation, Inc., |
---|
| 33 | # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
| 34 | # |
---|
| 35 | # ----------------------------------------------------------------------- |
---|
| 36 | #endif |
---|
| 37 | |
---|
| 38 | |
---|
| 39 | from mmpython import mediainfo |
---|
| 40 | import mmpython |
---|
| 41 | import struct |
---|
| 42 | import re |
---|
| 43 | import stat |
---|
| 44 | import os |
---|
| 45 | import math |
---|
| 46 | from types import * |
---|
| 47 | from struct import * |
---|
| 48 | from string import * |
---|
| 49 | |
---|
| 50 | _print = mediainfo._debug |
---|
| 51 | |
---|
| 52 | # Main IDs for the Matroska streams |
---|
| 53 | MATROSKA_VIDEO_TRACK = 0x01 |
---|
| 54 | MATROSKA_AUDIO_TRACK = 0x02 |
---|
| 55 | MATROSKA_SUBTITLES_TRACK = 0x11 |
---|
| 56 | |
---|
| 57 | MATROSKA_HEADER_ID = 0x1A45DFA3 |
---|
| 58 | MATROSKA_TRACKS_ID = 0x1654AE6B |
---|
| 59 | MATROSKA_SEGMENT_ID = 0x18538067 |
---|
| 60 | MATROSKA_SEGMENT_INFO_ID = 0x1549A966 |
---|
| 61 | MATROSKA_CLUSTER_ID = 0x1F43B675 |
---|
| 62 | MATROSKA_VOID_ID = 0xEC |
---|
| 63 | MATROSKA_CRC_ID = 0xBF |
---|
| 64 | MATROSKA_TIMECODESCALE_ID = 0x2AD7B1 |
---|
| 65 | MATROSKA_DURATION_ID = 0x4489 |
---|
| 66 | MATROSKA_CRC32_ID = 0xBF |
---|
| 67 | MATROSKA_TRACK_TYPE_ID = 0x83 |
---|
| 68 | MATROSKA_TRACK_LANGUAGE_ID = 0x22B59C |
---|
| 69 | MATROSKA_TIMECODESCALE_ID = 0x4489 |
---|
| 70 | MATROSKA_MUXING_APP_ID = 0x4D80 |
---|
| 71 | MATROSKA_WRITING_APP_ID = 0x5741 |
---|
| 72 | MATROSKA_CODEC_ID = 0x86 |
---|
| 73 | MATROSKA_CODEC_NAME_ID = 0x258688 |
---|
| 74 | MATROSKA_FRAME_DURATION_ID = 0x23E383 |
---|
| 75 | MATROSKA_VIDEO_SETTINGS_ID = 0xE0 |
---|
| 76 | MATROSKA_VID_WIDTH_ID = 0xB0 |
---|
| 77 | MATROSKA_VID_HEIGHT_ID = 0xBA |
---|
| 78 | MATROSKA_AUDIO_SETTINGS_ID = 0xE1 |
---|
| 79 | MATROSKA_AUDIO_SAMPLERATE_ID = 0xB5 |
---|
| 80 | MATROSKA_AUDIO_CHANNELS_ID = 0x9F |
---|
| 81 | MATROSKA_TRACK_UID_ID = 0x73C5 |
---|
| 82 | MATROSKA_TRACK_NUMBER_ID = 0xD7 |
---|
| 83 | |
---|
| 84 | # This is class that is responsible to handle one Ebml entity as described in the Matroska/Ebml spec |
---|
| 85 | class EbmlEntity: |
---|
| 86 | def __init__(self, inbuf): |
---|
| 87 | # Compute the EBML id |
---|
| 88 | # Set the CRC len to zero |
---|
| 89 | self.crc_len = 0 |
---|
| 90 | # Now loop until we find an entity without CRC |
---|
| 91 | self.build_entity(inbuf) |
---|
| 92 | while self.get_id() == MATROSKA_CRC32_ID: |
---|
| 93 | self.crc_len += self.get_total_len() |
---|
| 94 | inbuf = inbuf[self.get_total_len():] |
---|
| 95 | self.build_entity(inbuf) |
---|
| 96 | |
---|
| 97 | def build_entity(self, inbuf): |
---|
| 98 | self.compute_id(inbuf) |
---|
| 99 | #_print("Entity id : %08X" % self.entity_id) |
---|
| 100 | if ( self.id_len == 0): |
---|
| 101 | self.valid = 0 |
---|
| 102 | _print("EBML entity not found, bad file format") |
---|
| 103 | return |
---|
| 104 | self.valid = 1 |
---|
| 105 | self.entity_len = self.compute_len(inbuf[self.id_len:]) |
---|
| 106 | # Obviously, the segment can be very long (ie the whole file, so we truncate it at the read buffer size |
---|
| 107 | if (self.entity_len == -1): |
---|
| 108 | self.entity_data = inbuf[self.id_len+self.len_size:] |
---|
| 109 | self.entity_len = len(self.entity_data) # Set the remaining size |
---|
| 110 | else: |
---|
| 111 | self.entity_data = inbuf[self.id_len+self.len_size:self.id_len+self.len_size+self.entity_len] |
---|
| 112 | #_print("Entity len : %d" % self.entity_len) |
---|
| 113 | # if the size is 1, 2 3 or 4 it could be a numeric value, so do the job |
---|
| 114 | self.value = 0 |
---|
| 115 | if self.entity_len == 1: |
---|
| 116 | self.value = ord(self.entity_data[0]) |
---|
| 117 | if self.entity_len == 2: |
---|
| 118 | self.value = unpack('!H', self.entity_data)[0] |
---|
| 119 | if self.entity_len == 3: |
---|
| 120 | self.value = ord(self.entity_data[0])<<16 | ord(self.entity_data[1])<<8 | ord(self.entity_data[2]) |
---|
| 121 | if self.entity_len == 4: |
---|
| 122 | self.value = unpack('!I', self.entity_data)[0] |
---|
| 123 | |
---|
| 124 | def compute_id(self, inbuf): |
---|
| 125 | first = ord(inbuf[0]) |
---|
| 126 | self.id_len = 0 |
---|
| 127 | if (first & 0x80): |
---|
| 128 | self.id_len = 1 |
---|
| 129 | self.entity_id = first |
---|
| 130 | elif (first & 0x40): |
---|
| 131 | self.id_len = 2 |
---|
| 132 | self.entity_id = ord(inbuf[0])<<8 | ord(inbuf[1]) |
---|
| 133 | elif (first & 0x20): |
---|
| 134 | self.id_len = 3 |
---|
| 135 | self.entity_id = (ord(inbuf[0])<<16) | (ord(inbuf[1])<<8) | (ord(inbuf[2])) |
---|
| 136 | elif (first & 0x10): |
---|
| 137 | self.id_len = 4 |
---|
| 138 | self.entity_id = (ord(inbuf[0])<<24) | (ord(inbuf[1])<<16) | (ord(inbuf[2])<<8) | (ord(inbuf[3])) |
---|
| 139 | self.entity_str = inbuf[0:self.id_len] |
---|
| 140 | return |
---|
| 141 | |
---|
| 142 | def compute_len(self, inbuf): |
---|
| 143 | # Here we just handle the size up to 4 bytes |
---|
| 144 | # The size above will be truncated by the read buffer itself |
---|
| 145 | first = ord(inbuf[0]) |
---|
| 146 | if (first & 0x80): |
---|
| 147 | self.len_size = 1 |
---|
| 148 | return first - 0x80 |
---|
| 149 | if (first & 0x40): |
---|
| 150 | self.len_size = 2 |
---|
| 151 | (c1,c2) = unpack('BB',inbuf[:2]) |
---|
| 152 | return ((c1-0x40)<<8) | (c2) |
---|
| 153 | if (first & 0x20): |
---|
| 154 | self.len_size = 3 |
---|
| 155 | (c1, c2, c3) = unpack('BBB',inbuf[:3]) |
---|
| 156 | return ((c1-0x20)<<16) | (c2<<8) | (c3) |
---|
| 157 | if (first & 0x10): |
---|
| 158 | self.len_size = 4 |
---|
| 159 | (len) = unpack('!I',inbuf[:4]) |
---|
| 160 | return len |
---|
| 161 | if (first & 0x08): |
---|
| 162 | self.len_size = 5 |
---|
| 163 | return -1 |
---|
| 164 | if (first & 0x04): |
---|
| 165 | self.len_size = 6 |
---|
| 166 | return -1 |
---|
| 167 | if (first & 0x02): |
---|
| 168 | self.len_size = 7 |
---|
| 169 | return -1 |
---|
| 170 | if (first & 0x01): |
---|
| 171 | self.len_size = 8 |
---|
| 172 | return -1 |
---|
| 173 | |
---|
| 174 | def get_crc_len(self): |
---|
| 175 | return self.crc_len |
---|
| 176 | |
---|
| 177 | def get_value(self): |
---|
| 178 | value = self.value |
---|
| 179 | return value |
---|
| 180 | |
---|
| 181 | def get_data(self): |
---|
| 182 | return self.entity_data |
---|
| 183 | |
---|
| 184 | def get_id(self): |
---|
| 185 | return self.entity_id |
---|
| 186 | |
---|
| 187 | def get_str_id(self): |
---|
| 188 | return self.entity_str |
---|
| 189 | |
---|
| 190 | def get_len(self): |
---|
| 191 | return self.entity_len |
---|
| 192 | |
---|
| 193 | def get_total_len(self): |
---|
| 194 | return self.entity_len+self.id_len+self.len_size |
---|
| 195 | |
---|
| 196 | |
---|
| 197 | # This ithe main Matroska object |
---|
| 198 | class MkvInfo(mediainfo.AVInfo): |
---|
| 199 | def __init__(self, file): |
---|
| 200 | mediainfo.AVInfo.__init__(self) |
---|
| 201 | self.samplerate = 1 |
---|
| 202 | |
---|
| 203 | buffer = file.read(80000) |
---|
| 204 | if len(buffer) == 0: |
---|
| 205 | # Regular File end |
---|
| 206 | return None |
---|
| 207 | |
---|
| 208 | # Check the Matroska header |
---|
| 209 | header = EbmlEntity(buffer) |
---|
| 210 | if ( header.get_id() == MATROSKA_HEADER_ID ): |
---|
| 211 | _print("HEADER ID found %08X" % header.get_id() ) |
---|
| 212 | self.valid = 1 |
---|
| 213 | self.mime = 'application/mkv' |
---|
| 214 | self.type = 'Matroska' |
---|
| 215 | # Now get the segment |
---|
| 216 | segment = EbmlEntity(buffer[header.get_total_len():]) |
---|
| 217 | if ( segment.get_id() == MATROSKA_SEGMENT_ID): |
---|
| 218 | _print("SEGMENT ID found %08X" % segment.get_id() ) |
---|
| 219 | #MEDIACORE = ['title', 'caption', 'comment', 'artist', 'size', 'type', 'subtype', |
---|
| 220 | #'date', 'keywords', 'country', 'language', 'url'] |
---|
| 221 | segtab = self.process_one_level(segment) |
---|
| 222 | seginfotab = self.process_one_level(segtab[MATROSKA_SEGMENT_INFO_ID]) |
---|
| 223 | try: |
---|
| 224 | # Express scalecode in ms instead of ns |
---|
| 225 | # Rescale it to the second |
---|
| 226 | scalecode = float(seginfotab[MATROSKA_TIMECODESCALE_ID].get_value() / (1000*1000)) |
---|
| 227 | except: |
---|
| 228 | scalecode = 1000 |
---|
| 229 | try: |
---|
| 230 | duration = float(unpack('!f', seginfotab[MATROSKA_DURATION_ID].get_data() )[0]) |
---|
| 231 | duration = float(duration / scalecode) |
---|
| 232 | # Express the time in minutes |
---|
| 233 | self.length = int(duration/60) |
---|
| 234 | except: |
---|
| 235 | pass |
---|
| 236 | try: |
---|
| 237 | _print ("Searching for id : %X" % MATROSKA_TRACKS_ID) |
---|
| 238 | entity = segtab[MATROSKA_TRACKS_ID] |
---|
| 239 | self.process_tracks(entity) |
---|
| 240 | except: |
---|
| 241 | _print("TRACKS ID not found !!" ) |
---|
| 242 | else: |
---|
| 243 | _print("SEGMENT ID not found %08X" % segment.get_id() ) |
---|
| 244 | else: |
---|
| 245 | self.valid = 0 |
---|
| 246 | |
---|
| 247 | def process_tracks(self, tracks): |
---|
| 248 | tracksbuf = tracks.get_data() |
---|
| 249 | indice = 0 |
---|
| 250 | while indice < tracks.get_len(): |
---|
| 251 | trackelem = EbmlEntity(tracksbuf[indice:]) |
---|
| 252 | _print ("ELEMENT %X found" % trackelem.get_id()) |
---|
| 253 | self.process_one_track(trackelem) |
---|
| 254 | indice += trackelem.get_total_len() + trackelem.get_crc_len() |
---|
| 255 | |
---|
| 256 | def process_one_level(self, item): |
---|
| 257 | buf = item.get_data() |
---|
| 258 | indice = 0 |
---|
| 259 | tabelem = {} |
---|
| 260 | while indice < item.get_len(): |
---|
| 261 | elem = EbmlEntity(buf[indice:]) |
---|
| 262 | tabelem[elem.get_id()] = elem |
---|
| 263 | indice += elem.get_total_len() + elem.get_crc_len() |
---|
| 264 | return tabelem |
---|
| 265 | |
---|
| 266 | def process_one_track(self, track): |
---|
| 267 | # Process all the items at the track level |
---|
| 268 | tabelem = self.process_one_level(track) |
---|
| 269 | # We have the dict of track eleme, now build the MMPYTHON information |
---|
| 270 | type = tabelem[MATROSKA_TRACK_TYPE_ID] |
---|
| 271 | mytype = type.get_value() |
---|
| 272 | _print ("Track type found with UID %d" % mytype) |
---|
| 273 | if (mytype == MATROSKA_VIDEO_TRACK ): |
---|
| 274 | _print("VIDEO TRACK found !!" ) |
---|
| 275 | #VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'samplebits', |
---|
| 276 | # 'width', 'height', 'fps', 'aspect'] |
---|
| 277 | vi = mediainfo.VideoInfo() |
---|
| 278 | try: |
---|
| 279 | elem = tabelem[MATROSKA_CODEC_ID] |
---|
| 280 | vi.codec = elem.get_data() |
---|
| 281 | except: |
---|
| 282 | vi.codec = 'Unknown' |
---|
| 283 | try: |
---|
| 284 | elem = tabelem[MATROSKA_FRAME_DURATION_ID] |
---|
| 285 | vi.fps = 1 / (pow(10, -9) * (elem.get_value())) |
---|
| 286 | except: |
---|
| 287 | vi.fps = 0 |
---|
| 288 | try: |
---|
| 289 | vinfo = tabelem[MATROSKA_VIDEO_SETTINGS_ID] |
---|
| 290 | vidtab = self.process_one_level(vinfo) |
---|
| 291 | vi.width = vidtab[MATROSKA_VID_WIDTH_ID].get_value() |
---|
| 292 | vi.height = vidtab[MATROSKA_VID_HEIGHT_ID].get_value() |
---|
| 293 | except: |
---|
| 294 | _print("No other info about video track !!!") |
---|
| 295 | self.video.append(vi) |
---|
| 296 | elif (mytype == MATROSKA_AUDIO_TRACK ): |
---|
| 297 | _print("AUDIO TRACK found !!" ) |
---|
| 298 | #AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'samplebits', |
---|
| 299 | # 'bitrate', 'language'] |
---|
| 300 | ai = mediainfo.AudioInfo() |
---|
| 301 | try: |
---|
| 302 | elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID] |
---|
| 303 | ai.language = elem.get_data() |
---|
| 304 | ai['language'] = elem.get_data() |
---|
| 305 | except: |
---|
| 306 | ai.language = 'en' |
---|
| 307 | ai['language'] = 'en' |
---|
| 308 | try: |
---|
| 309 | elem = tabelem[MATROSKA_CODEC_ID] |
---|
| 310 | ai.codec = elem.get_data() |
---|
| 311 | except: |
---|
| 312 | ai.codec = "Unknown" |
---|
| 313 | try: |
---|
| 314 | ainfo = tabelem[MATROSKA_AUDIO_SETTINGS_ID] |
---|
| 315 | audtab = self.process_one_level(vinfo) |
---|
| 316 | ai.samplerate = unpack('!f', audtab[MATROSKA_AUDIO_SAMPLERATE_ID].get_value())[0] |
---|
| 317 | ai.channels = audtab[MATROSKA_AUDIO_CHANNELS_ID].get_value() |
---|
| 318 | except: |
---|
| 319 | _print("No other info about audio track !!!") |
---|
| 320 | self.audio.append(ai) |
---|
| 321 | elif (mytype == MATROSKA_SUBTITLES_TRACK): |
---|
| 322 | try: |
---|
| 323 | elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID] |
---|
| 324 | language = elem.get_data() |
---|
| 325 | _print ("Subtitle language found : %s" % elem.get_data() ) |
---|
| 326 | except: |
---|
| 327 | language = "en" # By default |
---|
| 328 | self.subtitles.append(language) |
---|
| 329 | |
---|
| 330 | #_print("Found %d elem for this track" % len(tabelem) ) |
---|
| 331 | |
---|
| 332 | mmpython.registertype( 'application/mkv', ('mkv', 'mka',), mediainfo.TYPE_AV, MkvInfo ) |
---|