source: trunk/src/testing/bin/fileServer/misc/mmpython/video/mkvinfo.py @ 4

Revision 4, 12.3 KB checked in by ajaworski, 13 years ago (diff)

Added modified SAGE sources

Line 
1#if 0
2# -----------------------------------------------------------------------
3# mkvinfo.py - Matroska Streaming Video Files
4# -----------------------------------------------------------------------
5# $Id: mkvinfo.py,v 1.3 2004/04/18 17:55:26 dischi Exp $
6#
7# $Log: mkvinfo.py,v $
8# Revision 1.3  2004/04/18 17:55:26  dischi
9# update, including subtitle support
10#
11# Revision 1.2  2004/03/21 08:57:31  dischi
12# major bugfix
13#
14# Revision 1.1  2004/01/31 12:24:15  dischi
15# add basic matroska info
16#
17# -----------------------------------------------------------------------
18# MMPython - Media Metadata for Python
19# Copyright (C) 2003 Thomas Schueppel, Dirk Meyer
20#
21# This program is free software; you can redistribute it and/or modify
22# it under the terms of the GNU General Public License as published by
23# the Free Software Foundation; either version 2 of the License, or
24# (at your option) any later version.
25#
26# This program is distributed in the hope that it will be useful, but
27# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
28# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
29# Public License for more details.
30#
31# You should have received a copy of the GNU General Public License along
32# with this program; if not, write to the Free Software Foundation, Inc.,
33# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
34#
35# -----------------------------------------------------------------------
36#endif
37
38
39from mmpython import mediainfo
40import mmpython
41import struct
42import re
43import stat
44import os
45import math
46from types import *
47from struct import *
48from string import *
49
50_print = mediainfo._debug
51
52# Main IDs for the Matroska streams
53MATROSKA_VIDEO_TRACK     = 0x01
54MATROSKA_AUDIO_TRACK     = 0x02
55MATROSKA_SUBTITLES_TRACK = 0x11
56
57MATROSKA_HEADER_ID  = 0x1A45DFA3
58MATROSKA_TRACKS_ID  = 0x1654AE6B
59MATROSKA_SEGMENT_ID = 0x18538067
60MATROSKA_SEGMENT_INFO_ID      = 0x1549A966
61MATROSKA_CLUSTER_ID           = 0x1F43B675
62MATROSKA_VOID_ID              = 0xEC
63MATROSKA_CRC_ID               = 0xBF
64MATROSKA_TIMECODESCALE_ID     = 0x2AD7B1
65MATROSKA_DURATION_ID          = 0x4489
66MATROSKA_CRC32_ID             = 0xBF
67MATROSKA_TRACK_TYPE_ID        = 0x83
68MATROSKA_TRACK_LANGUAGE_ID    = 0x22B59C
69MATROSKA_TIMECODESCALE_ID     = 0x4489
70MATROSKA_MUXING_APP_ID        = 0x4D80
71MATROSKA_WRITING_APP_ID       = 0x5741
72MATROSKA_CODEC_ID             = 0x86
73MATROSKA_CODEC_NAME_ID        = 0x258688
74MATROSKA_FRAME_DURATION_ID    = 0x23E383
75MATROSKA_VIDEO_SETTINGS_ID    = 0xE0
76MATROSKA_VID_WIDTH_ID         = 0xB0
77MATROSKA_VID_HEIGHT_ID        = 0xBA
78MATROSKA_AUDIO_SETTINGS_ID    = 0xE1
79MATROSKA_AUDIO_SAMPLERATE_ID  = 0xB5
80MATROSKA_AUDIO_CHANNELS_ID    = 0x9F
81MATROSKA_TRACK_UID_ID         = 0x73C5
82MATROSKA_TRACK_NUMBER_ID      = 0xD7
83
84# This is class that is responsible to handle one Ebml entity as described in the Matroska/Ebml spec
85class EbmlEntity:
86    def __init__(self, inbuf):
87        # Compute the EBML id
88        # Set the CRC len to zero
89        self.crc_len = 0
90        # Now loop until we find an entity without CRC
91        self.build_entity(inbuf)
92        while self.get_id() == MATROSKA_CRC32_ID:
93            self.crc_len += self.get_total_len()
94            inbuf = inbuf[self.get_total_len():]
95            self.build_entity(inbuf)
96
97    def build_entity(self, inbuf):
98        self.compute_id(inbuf)
99        #_print("Entity id : %08X" % self.entity_id)
100        if ( self.id_len == 0):
101            self.valid = 0
102            _print("EBML entity not found, bad file format")
103            return
104        self.valid = 1
105        self.entity_len = self.compute_len(inbuf[self.id_len:])
106        # Obviously, the segment can be very long (ie the whole file, so we truncate it at the read buffer size
107        if (self.entity_len == -1):
108            self.entity_data = inbuf[self.id_len+self.len_size:]
109            self.entity_len = len(self.entity_data) # Set the remaining size
110        else:
111            self.entity_data = inbuf[self.id_len+self.len_size:self.id_len+self.len_size+self.entity_len]
112        #_print("Entity len : %d" % self.entity_len)
113        # if the size is 1, 2 3 or 4 it could be a numeric value, so do the job
114        self.value = 0
115        if self.entity_len == 1:
116            self.value = ord(self.entity_data[0])
117        if self.entity_len == 2:
118            self.value = unpack('!H', self.entity_data)[0]
119        if self.entity_len == 3:
120            self.value = ord(self.entity_data[0])<<16 | ord(self.entity_data[1])<<8 | ord(self.entity_data[2])
121        if self.entity_len == 4:
122            self.value = unpack('!I', self.entity_data)[0]
123
124    def compute_id(self, inbuf):
125        first = ord(inbuf[0])
126        self.id_len = 0
127        if (first & 0x80):
128            self.id_len = 1
129            self.entity_id = first
130        elif (first & 0x40):
131            self.id_len = 2
132            self.entity_id = ord(inbuf[0])<<8 | ord(inbuf[1])
133        elif (first & 0x20):
134            self.id_len = 3
135            self.entity_id = (ord(inbuf[0])<<16) | (ord(inbuf[1])<<8) | (ord(inbuf[2]))
136        elif (first & 0x10):
137            self.id_len = 4
138            self.entity_id = (ord(inbuf[0])<<24) | (ord(inbuf[1])<<16) | (ord(inbuf[2])<<8) | (ord(inbuf[3]))
139        self.entity_str = inbuf[0:self.id_len]
140        return
141
142    def compute_len(self, inbuf):
143        # Here we just handle the size up to 4 bytes
144        # The size above will be truncated by the read buffer itself
145        first = ord(inbuf[0])
146        if (first & 0x80):
147            self.len_size = 1
148            return first - 0x80
149        if (first & 0x40):
150            self.len_size = 2
151            (c1,c2) = unpack('BB',inbuf[:2])
152            return ((c1-0x40)<<8) | (c2)
153        if (first & 0x20):
154            self.len_size = 3
155            (c1, c2, c3) = unpack('BBB',inbuf[:3])
156            return ((c1-0x20)<<16) | (c2<<8) | (c3)
157        if (first & 0x10):
158            self.len_size = 4
159            (len) = unpack('!I',inbuf[:4])
160            return len
161        if (first & 0x08):
162            self.len_size = 5
163            return -1
164        if (first & 0x04):
165            self.len_size = 6
166            return -1
167        if (first & 0x02):
168            self.len_size = 7
169            return -1
170        if (first & 0x01):
171            self.len_size = 8
172            return -1
173
174    def get_crc_len(self):
175        return self.crc_len
176
177    def get_value(self):
178        value = self.value
179        return value
180
181    def get_data(self):
182        return self.entity_data
183
184    def get_id(self):
185        return self.entity_id
186
187    def get_str_id(self):
188        return self.entity_str
189
190    def get_len(self):
191        return self.entity_len
192
193    def get_total_len(self):
194        return self.entity_len+self.id_len+self.len_size
195
196
197# This ithe main Matroska object
198class MkvInfo(mediainfo.AVInfo):
199    def __init__(self, file):
200        mediainfo.AVInfo.__init__(self)
201        self.samplerate = 1
202
203        buffer = file.read(80000)
204        if len(buffer) == 0:
205            # Regular File end
206            return None
207
208        # Check the Matroska header
209        header = EbmlEntity(buffer)
210        if ( header.get_id() == MATROSKA_HEADER_ID ):
211            _print("HEADER ID found %08X" % header.get_id() )
212            self.valid = 1
213            self.mime = 'application/mkv'
214            self.type = 'Matroska'
215            # Now get the segment
216            segment = EbmlEntity(buffer[header.get_total_len():])
217            if ( segment.get_id() == MATROSKA_SEGMENT_ID):
218                _print("SEGMENT ID found %08X" % segment.get_id() )
219                #MEDIACORE = ['title', 'caption', 'comment', 'artist', 'size', 'type', 'subtype',
220                #'date', 'keywords', 'country', 'language', 'url']
221                segtab = self.process_one_level(segment)
222                seginfotab = self.process_one_level(segtab[MATROSKA_SEGMENT_INFO_ID])
223                try:
224                    # Express scalecode in ms instead of ns
225                    # Rescale it to the second
226                    scalecode = float(seginfotab[MATROSKA_TIMECODESCALE_ID].get_value() / (1000*1000))
227                except:
228                    scalecode = 1000
229                try:
230                    duration = float(unpack('!f', seginfotab[MATROSKA_DURATION_ID].get_data() )[0])
231                    duration = float(duration / scalecode)
232                    # Express the time in minutes
233                    self.length = int(duration/60)
234                except:
235                    pass
236                try:
237                    _print ("Searching for id : %X" % MATROSKA_TRACKS_ID)
238                    entity = segtab[MATROSKA_TRACKS_ID]
239                    self.process_tracks(entity)
240                except:
241                    _print("TRACKS ID not found !!" )
242            else:
243                _print("SEGMENT ID not found %08X" % segment.get_id() )
244        else:
245            self.valid = 0
246
247    def process_tracks(self, tracks):
248        tracksbuf = tracks.get_data()
249        indice = 0
250        while indice < tracks.get_len():
251            trackelem = EbmlEntity(tracksbuf[indice:])
252            _print ("ELEMENT %X found" % trackelem.get_id())
253            self.process_one_track(trackelem)
254            indice += trackelem.get_total_len() + trackelem.get_crc_len()
255
256    def process_one_level(self, item):
257        buf = item.get_data()
258        indice = 0
259        tabelem = {}
260        while indice < item.get_len():
261            elem = EbmlEntity(buf[indice:])
262            tabelem[elem.get_id()] = elem
263            indice += elem.get_total_len() + elem.get_crc_len()
264        return tabelem
265
266    def process_one_track(self, track):
267        # Process all the items at the track level
268        tabelem = self.process_one_level(track)
269        # We have the dict of track eleme, now build the MMPYTHON information
270        type = tabelem[MATROSKA_TRACK_TYPE_ID]
271        mytype = type.get_value()
272        _print ("Track type found with UID %d" % mytype)
273        if (mytype == MATROSKA_VIDEO_TRACK ):
274            _print("VIDEO TRACK found !!" )
275            #VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'samplebits',
276            #     'width', 'height', 'fps', 'aspect']
277            vi = mediainfo.VideoInfo()
278            try:
279                elem = tabelem[MATROSKA_CODEC_ID]
280                vi.codec = elem.get_data()
281            except:
282                vi.codec = 'Unknown'
283            try:
284                elem = tabelem[MATROSKA_FRAME_DURATION_ID]
285                vi.fps = 1 / (pow(10, -9) * (elem.get_value()))
286            except:
287                vi.fps = 0
288            try:
289                vinfo = tabelem[MATROSKA_VIDEO_SETTINGS_ID]
290                vidtab = self.process_one_level(vinfo)
291                vi.width  = vidtab[MATROSKA_VID_WIDTH_ID].get_value()
292                vi.height = vidtab[MATROSKA_VID_HEIGHT_ID].get_value()
293            except:
294                _print("No other info about video track !!!")
295            self.video.append(vi)
296        elif (mytype == MATROSKA_AUDIO_TRACK ):
297            _print("AUDIO TRACK found !!" )
298            #AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'samplebits',
299            #     'bitrate', 'language']
300            ai = mediainfo.AudioInfo()
301            try:
302                elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID]
303                ai.language = elem.get_data()
304                ai['language'] = elem.get_data()
305            except:
306                ai.language = 'en'
307                ai['language'] = 'en'
308            try:
309                elem = tabelem[MATROSKA_CODEC_ID]
310                ai.codec = elem.get_data()
311            except:
312                ai.codec = "Unknown"
313            try:
314                ainfo = tabelem[MATROSKA_AUDIO_SETTINGS_ID]
315                audtab = self.process_one_level(vinfo)
316                ai.samplerate  = unpack('!f', audtab[MATROSKA_AUDIO_SAMPLERATE_ID].get_value())[0]
317                ai.channels = audtab[MATROSKA_AUDIO_CHANNELS_ID].get_value()
318            except:
319                _print("No other info about audio track !!!")
320            self.audio.append(ai)
321        elif (mytype == MATROSKA_SUBTITLES_TRACK):
322            try:
323                elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID]
324                language = elem.get_data()
325                _print ("Subtitle language found : %s" % elem.get_data() )
326            except:
327                language = "en" # By default
328            self.subtitles.append(language)
329
330        #_print("Found %d elem for this track" % len(tabelem) )
331
332mmpython.registertype( 'application/mkv', ('mkv', 'mka',), mediainfo.TYPE_AV, MkvInfo )
Note: See TracBrowser for help on using the repository browser.