1 | #if 0 |
---|
2 | # $Id: asfinfo.py,v 1.18 2004/01/31 12:37:25 dischi Exp $ |
---|
3 | # $Log: asfinfo.py,v $ |
---|
4 | # Revision 1.18 2004/01/31 12:37:25 dischi |
---|
5 | # remove bad chars |
---|
6 | # |
---|
7 | # Revision 1.17 2003/08/30 09:36:22 dischi |
---|
8 | # turn off some debug based on DEBUG |
---|
9 | # |
---|
10 | # Revision 1.16 2003/06/30 13:17:20 the_krow |
---|
11 | # o Refactored mediainfo into factory, synchronizedobject |
---|
12 | # o Parsers now register directly at mmpython not at mmpython.mediainfo |
---|
13 | # o use mmpython.Factory() instead of mmpython.mediainfo.get_singleton() |
---|
14 | # o Bugfix in PNG parser |
---|
15 | # o Renamed disc.AudioInfo into disc.AudioDiscInfo |
---|
16 | # o Renamed disc.DataInfo into disc.DataDiscInfo |
---|
17 | # |
---|
18 | # Revision 1.15 2003/06/20 19:17:22 dischi |
---|
19 | # remove filename again and use file.name |
---|
20 | # |
---|
21 | # Revision 1.14 2003/06/12 14:43:21 the_krow |
---|
22 | # Realmedia file parsing. Title, Artist, Copyright work. Couldn't find |
---|
23 | # many technical parameters to retrieve. |
---|
24 | # Some initial QT parsing |
---|
25 | # added Real to __init__.py |
---|
26 | # |
---|
27 | # Revision 1.13 2003/06/12 10:42:47 the_krow |
---|
28 | # Added Bitrate, Extended Info |
---|
29 | # Still need to identify streams by their streamid |
---|
30 | # |
---|
31 | # Revision 1.12 2003/06/12 09:38:24 the_krow |
---|
32 | # ASF Header parser completed. I need test files or a way to generate |
---|
33 | # them. |
---|
34 | # |
---|
35 | # Revision 1.11 2003/06/12 00:36:30 the_krow |
---|
36 | # ASF Audio parsing |
---|
37 | # |
---|
38 | # Revision 1.10 2003/06/12 00:27:25 the_krow |
---|
39 | # More asf parsing: Width, Height, Video Codec |
---|
40 | # |
---|
41 | # Revision 1.9 2003/06/11 20:51:00 the_krow |
---|
42 | # Title, Artist and some other data sucessfully parsed from wmv, asf, wma |
---|
43 | # |
---|
44 | # Revision 1.8 2003/06/11 19:07:57 the_krow |
---|
45 | # asf,wmv,wma now get the guids right... |
---|
46 | # |
---|
47 | # Revision 1.7 2003/06/11 16:11:08 the_krow |
---|
48 | # asf parsing... asf is really an ugly format. |
---|
49 | # |
---|
50 | # Revision 1.6 2003/06/08 19:53:21 dischi |
---|
51 | # also give the filename to init for additional data tests |
---|
52 | # |
---|
53 | # Revision 1.5 2003/06/08 15:40:26 dischi |
---|
54 | # catch exception, raised for small text files |
---|
55 | # |
---|
56 | # Revision 1.4 2003/06/08 13:44:58 dischi |
---|
57 | # Changed all imports to use the complete mmpython path for mediainfo |
---|
58 | # |
---|
59 | # Revision 1.3 2003/06/08 13:11:38 dischi |
---|
60 | # removed print at the end and moved it into register |
---|
61 | # |
---|
62 | # Revision 1.2 2003/05/13 12:31:43 the_krow |
---|
63 | # + Copyright Notice |
---|
64 | # |
---|
65 | # |
---|
66 | # MMPython - Media Metadata for Python |
---|
67 | # Copyright (C) 2003 Thomas Schueppel |
---|
68 | # |
---|
69 | # This program is free software; you can redistribute it and/or modify |
---|
70 | # it under the terms of the GNU General Public License as published by |
---|
71 | # the Free Software Foundation; either version 2 of the License, or |
---|
72 | # (at your option) any later version. |
---|
73 | # |
---|
74 | # This program is distributed in the hope that it will be useful, but |
---|
75 | # WITHOUT ANY WARRANTY; without even the implied warranty of MER- |
---|
76 | # CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
---|
77 | # Public License for more details. |
---|
78 | # |
---|
79 | # You should have received a copy of the GNU General Public License along |
---|
80 | # with this program; if not, write to the Free Software Foundation, Inc., |
---|
81 | # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
82 | # |
---|
83 | # ----------------------------------------------------------------------- |
---|
84 | #endif |
---|
85 | |
---|
86 | import re |
---|
87 | import struct |
---|
88 | import string |
---|
89 | import fourcc |
---|
90 | import mmpython |
---|
91 | |
---|
92 | from mmpython import mediainfo |
---|
93 | |
---|
94 | def _guid(input): |
---|
95 | # Remove any '-' |
---|
96 | s = string.join(string.split(input,'-'), '') |
---|
97 | r = '' |
---|
98 | if len(s) != 32: |
---|
99 | return '' |
---|
100 | x = '' |
---|
101 | for i in range(0,16): |
---|
102 | r+=chr(int(s[2*i:2*i+2],16)) |
---|
103 | guid = struct.unpack('>IHHBB6s',r) |
---|
104 | return guid |
---|
105 | |
---|
106 | GUIDS = { |
---|
107 | 'ASF_Header_Object' : _guid('75B22630-668E-11CF-A6D9-00AA0062CE6C'), |
---|
108 | 'ASF_Data_Object' : _guid('75B22636-668E-11CF-A6D9-00AA0062CE6C'), |
---|
109 | 'ASF_Simple_Index_Object' : _guid('33000890-E5B1-11CF-89F4-00A0C90349CB'), |
---|
110 | 'ASF_Index_Object' : _guid('D6E229D3-35DA-11D1-9034-00A0C90349BE'), |
---|
111 | 'ASF_Media_Object_Index_Object' : _guid('FEB103F8-12AD-4C64-840F-2A1D2F7AD48C'), |
---|
112 | 'ASF_Timecode_Index_Object' : _guid('3CB73FD0-0C4A-4803-953D-EDF7B6228F0C'), |
---|
113 | |
---|
114 | 'ASF_File_Properties_Object' : _guid('8CABDCA1-A947-11CF-8EE4-00C00C205365'), |
---|
115 | 'ASF_Stream_Properties_Object' : _guid('B7DC0791-A9B7-11CF-8EE6-00C00C205365'), |
---|
116 | 'ASF_Header_Extension_Object' : _guid('5FBF03B5-A92E-11CF-8EE3-00C00C205365'), |
---|
117 | 'ASF_Codec_List_Object' : _guid('86D15240-311D-11D0-A3A4-00A0C90348F6'), |
---|
118 | 'ASF_Script_Command_Object' : _guid('1EFB1A30-0B62-11D0-A39B-00A0C90348F6'), |
---|
119 | 'ASF_Marker_Object' : _guid('F487CD01-A951-11CF-8EE6-00C00C205365'), |
---|
120 | 'ASF_Bitrate_Mutual_Exclusion_Object' : _guid('D6E229DC-35DA-11D1-9034-00A0C90349BE'), |
---|
121 | 'ASF_Error_Correction_Object' : _guid('75B22635-668E-11CF-A6D9-00AA0062CE6C'), |
---|
122 | 'ASF_Content_Description_Object' : _guid('75B22633-668E-11CF-A6D9-00AA0062CE6C'), |
---|
123 | 'ASF_Extended_Content_Description_Object' : _guid('D2D0A440-E307-11D2-97F0-00A0C95EA850'), |
---|
124 | 'ASF_Content_Branding_Object' : _guid('2211B3FA-BD23-11D2-B4B7-00A0C955FC6E'), |
---|
125 | 'ASF_Stream_Bitrate_Properties_Object' : _guid('7BF875CE-468D-11D1-8D82-006097C9A2B2'), |
---|
126 | 'ASF_Content_Encryption_Object' : _guid('2211B3FB-BD23-11D2-B4B7-00A0C955FC6E'), |
---|
127 | 'ASF_Extended_Content_Encryption_Object' : _guid('298AE614-2622-4C17-B935-DAE07EE9289C'), |
---|
128 | 'ASF_Alt_Extended_Content_Encryption_Obj' : _guid('FF889EF1-ADEE-40DA-9E71-98704BB928CE'), |
---|
129 | 'ASF_Digital_Signature_Object' : _guid('2211B3FC-BD23-11D2-B4B7-00A0C955FC6E'), |
---|
130 | 'ASF_Padding_Object' : _guid('1806D474-CADF-4509-A4BA-9AABCB96AAE8'), |
---|
131 | |
---|
132 | 'ASF_Extended_Stream_Properties_Object' : _guid('14E6A5CB-C672-4332-8399-A96952065B5A'), |
---|
133 | 'ASF_Advanced_Mutual_Exclusion_Object' : _guid('A08649CF-4775-4670-8A16-6E35357566CD'), |
---|
134 | 'ASF_Group_Mutual_Exclusion_Object' : _guid('D1465A40-5A79-4338-B71B-E36B8FD6C249'), |
---|
135 | 'ASF_Stream_Prioritization_Object' : _guid('D4FED15B-88D3-454F-81F0-ED5C45999E24'), |
---|
136 | 'ASF_Bandwidth_Sharing_Object' : _guid('A69609E6-517B-11D2-B6AF-00C04FD908E9'), |
---|
137 | 'ASF_Language_List_Object' : _guid('7C4346A9-EFE0-4BFC-B229-393EDE415C85'), |
---|
138 | 'ASF_Metadata_Object' : _guid('C5F8CBEA-5BAF-4877-8467-AA8C44FA4CCA'), |
---|
139 | 'ASF_Metadata_Library_Object' : _guid('44231C94-9498-49D1-A141-1D134E457054'), |
---|
140 | 'ASF_Index_Parameters_Object' : _guid('D6E229DF-35DA-11D1-9034-00A0C90349BE'), |
---|
141 | 'ASF_Media_Object_Index_Parameters_Obj' : _guid('6B203BAD-3F11-4E84-ACA8-D7613DE2CFA7'), |
---|
142 | 'ASF_Timecode_Index_Parameters_Object' : _guid('F55E496D-9797-4B5D-8C8B-604DFE9BFB24'), |
---|
143 | |
---|
144 | 'ASF_Audio_Media' : _guid('F8699E40-5B4D-11CF-A8FD-00805F5C442B'), |
---|
145 | 'ASF_Video_Media' : _guid('BC19EFC0-5B4D-11CF-A8FD-00805F5C442B'), |
---|
146 | 'ASF_Command_Media' : _guid('59DACFC0-59E6-11D0-A3AC-00A0C90348F6'), |
---|
147 | 'ASF_JFIF_Media' : _guid('B61BE100-5B4E-11CF-A8FD-00805F5C442B'), |
---|
148 | 'ASF_Degradable_JPEG_Media' : _guid('35907DE0-E415-11CF-A917-00805F5C442B'), |
---|
149 | 'ASF_File_Transfer_Media' : _guid('91BD222C-F21C-497A-8B6D-5AA86BFC0185'), |
---|
150 | 'ASF_Binary_Media' : _guid('3AFB65E2-47EF-40F2-AC2C-70A90D71D343'), |
---|
151 | |
---|
152 | 'ASF_Web_Stream_Media_Subtype' : _guid('776257D4-C627-41CB-8F81-7AC7FF1C40CC'), |
---|
153 | 'ASF_Web_Stream_Format' : _guid('DA1E6B13-8359-4050-B398-388E965BF00C'), |
---|
154 | |
---|
155 | 'ASF_No_Error_Correction' : _guid('20FB5700-5B55-11CF-A8FD-00805F5C442B'), |
---|
156 | 'ASF_Audio_Spread' : _guid('BFC3CD50-618F-11CF-8BB2-00AA00B4E220'), |
---|
157 | } |
---|
158 | |
---|
159 | _print = mediainfo._debug |
---|
160 | |
---|
161 | class AsfInfo(mediainfo.AVInfo): |
---|
162 | def __init__(self,file): |
---|
163 | mediainfo.AVInfo.__init__(self) |
---|
164 | self.context = 'video' |
---|
165 | self.valid = 0 |
---|
166 | self.mime = 'video/asf' |
---|
167 | self.type = 'asf video' |
---|
168 | h = file.read(30) |
---|
169 | if len(h) < 30: |
---|
170 | return |
---|
171 | self.valid = 1 |
---|
172 | (guidstr,objsize,objnum,reserved1,reserved2) = struct.unpack('<16sQIBB',h) |
---|
173 | guid = self._parseguid(guidstr) |
---|
174 | if (guid != GUIDS['ASF_Header_Object']): |
---|
175 | self.valid = 0 |
---|
176 | return |
---|
177 | if reserved1 != 0x01 or reserved2 != 0x02: |
---|
178 | self.valid = 0 |
---|
179 | _print("asf header size: %d / %d objects" % (objsize,objnum)) |
---|
180 | header = file.read(objsize-30) |
---|
181 | for i in range(0,objnum): |
---|
182 | h = self._getnextheader(header) |
---|
183 | header = header[h[1]:] |
---|
184 | |
---|
185 | def _printguid(self,guid): |
---|
186 | r = "%.8X-%.4X-%.4X-%.2X%.2X-%s" % guid |
---|
187 | return r |
---|
188 | |
---|
189 | def _parseguid(self,string): |
---|
190 | return struct.unpack('<IHHBB6s', string[:16]) |
---|
191 | |
---|
192 | def _parsekv(self,s): |
---|
193 | pos = 0 |
---|
194 | (descriptorlen,) = struct.unpack('<H', s[pos:pos+2]) |
---|
195 | pos += 2 |
---|
196 | descriptorname = s[pos:pos+descriptorlen] |
---|
197 | pos += descriptorlen |
---|
198 | descriptortype, valuelen = struct.unpack('<HH', s[pos:pos+4]) |
---|
199 | pos += 4 |
---|
200 | descriptorvalue = s[pos:pos+valuelen] |
---|
201 | pos += valuelen |
---|
202 | value = None |
---|
203 | if descriptortype == 0x0000: |
---|
204 | # Unicode string |
---|
205 | value = descriptorvalue |
---|
206 | elif descriptortype == 0x0001: |
---|
207 | # Byte Array |
---|
208 | value = descriptorvalue |
---|
209 | elif descriptortype == 0x0002: |
---|
210 | # Bool (?) |
---|
211 | value = struct.unpack('<I', descriptorvalue)[0] != 0 |
---|
212 | elif descriptortype == 0x0003: |
---|
213 | # DWORD |
---|
214 | value = struct.unpack('<I', descriptorvalue)[0] |
---|
215 | elif descriptortype == 0x0004: |
---|
216 | # QWORD |
---|
217 | value = struct.unpack('<Q', descriptorvalue)[0] |
---|
218 | elif descriptortype == 0x0005: |
---|
219 | # WORD |
---|
220 | value = struct.unpack('<H', descriptorvalue)[0] |
---|
221 | else: |
---|
222 | _print("Unknown Descriptor Type %d" % descriptortype) |
---|
223 | return (pos,descriptorname,value) |
---|
224 | |
---|
225 | def _parsekv2(self,s): |
---|
226 | pos = 0 |
---|
227 | (strno,descriptorlen,descriptortype,valuelen) = struct.unpack('<2xHHHI', s[pos:pos+12]) |
---|
228 | pos += 12 |
---|
229 | descriptorname = s[pos:pos+descriptorlen] |
---|
230 | pos += descriptorlen |
---|
231 | descriptorvalue = s[pos:pos+valuelen] |
---|
232 | pos += valuelen |
---|
233 | value = None |
---|
234 | #print "%d %s [%d]" % (strno, descriptorname, valuelen) |
---|
235 | if descriptortype == 0x0000: |
---|
236 | # Unicode string |
---|
237 | value = descriptorvalue |
---|
238 | elif descriptortype == 0x0001: |
---|
239 | # Byte Array |
---|
240 | value = descriptorvalue |
---|
241 | elif descriptortype == 0x0002: |
---|
242 | # Bool |
---|
243 | value = struct.unpack('<H', descriptorvalue)[0] != 0 |
---|
244 | pass |
---|
245 | elif descriptortype == 0x0003: |
---|
246 | # DWORD |
---|
247 | value = struct.unpack('<I', descriptorvalue)[0] |
---|
248 | elif descriptortype == 0x0004: |
---|
249 | # QWORD |
---|
250 | value = struct.unpack('<Q', descriptorvalue)[0] |
---|
251 | elif descriptortype == 0x0005: |
---|
252 | # WORD |
---|
253 | value = struct.unpack('<H', descriptorvalue)[0] |
---|
254 | else: |
---|
255 | _print("Unknown Descriptor Type %d" % descriptortype) |
---|
256 | return (pos,descriptorname,value,strno) |
---|
257 | |
---|
258 | |
---|
259 | def _getnextheader(self,s): |
---|
260 | r = struct.unpack('<16sQ',s[:24]) |
---|
261 | (guidstr,objsize) = r |
---|
262 | guid = self._parseguid(guidstr) |
---|
263 | if guid == GUIDS['ASF_File_Properties_Object']: |
---|
264 | _print("File Properties Object") |
---|
265 | val = struct.unpack('<16s6Q4I',s[24:24+80]) |
---|
266 | (fileid, size, date, packetcount, duration, \ |
---|
267 | senddur, preroll, flags, minpack, maxpack, maxbr) = \ |
---|
268 | val |
---|
269 | self.length = duration/10000000 |
---|
270 | elif guid == GUIDS['ASF_Stream_Properties_Object']: |
---|
271 | _print("Stream Properties Object [%d]" % objsize) |
---|
272 | streamtype = self._parseguid(s[24:40]) |
---|
273 | errortype = self._parseguid(s[40:56]) |
---|
274 | offset, typelen, errorlen, flags = struct.unpack('>QIIH4x', s[56:78]) |
---|
275 | strno = flags & 63 |
---|
276 | encrypted = flags >> 15 |
---|
277 | if streamtype == GUIDS['ASF_Video_Media']: |
---|
278 | vi = mediainfo.VideoInfo() |
---|
279 | #vi.width, vi.height, formatsize = struct.unpack('<IIxH', s[78:89]) |
---|
280 | vi.width, vi.height, depth, codec, = struct.unpack('<4xII2xH4s', s[89:89+20]) |
---|
281 | vi.codec = fourcc.RIFFCODEC[codec] |
---|
282 | vi.id = strno |
---|
283 | self.video.append(vi) |
---|
284 | elif streamtype == GUIDS['ASF_Audio_Media']: |
---|
285 | ai = mediainfo.AudioInfo() |
---|
286 | twocc, ai.channels, ai.samplerate, bitrate, block, ai.samplebits, = struct.unpack('<HHIIHH', s[78:78+16]) |
---|
287 | ai.bitrate = 8*bitrate # XXX Is this right? |
---|
288 | ai.codec = fourcc.RIFFWAVE[twocc] |
---|
289 | ai.id = strno |
---|
290 | self.audio.append(ai) |
---|
291 | pass |
---|
292 | elif guid == GUIDS['ASF_Header_Extension_Object']: |
---|
293 | _print("ASF_Header_Extension_Object %d" % objsize) |
---|
294 | size = struct.unpack('<I',s[42:46])[0] |
---|
295 | data = s[46:46+size] |
---|
296 | while len(data): |
---|
297 | _print("Sub:") |
---|
298 | h = self._getnextheader(data) |
---|
299 | data = data[h[1]:] |
---|
300 | |
---|
301 | elif guid == GUIDS['ASF_Codec_List_Object']: |
---|
302 | _print("List Object") |
---|
303 | pass |
---|
304 | elif guid == GUIDS['ASF_Error_Correction_Object']: |
---|
305 | _print("Error Correction") |
---|
306 | pass |
---|
307 | elif guid == GUIDS['ASF_Content_Description_Object']: |
---|
308 | _print("Content Description Object") |
---|
309 | val = struct.unpack('<5H', s[24:24+10]) |
---|
310 | pos = 34 |
---|
311 | strings = [] |
---|
312 | for i in val: |
---|
313 | strings.append(s[pos:pos+i].replace('\0', '').lstrip().rstrip()) |
---|
314 | pos+=i |
---|
315 | (self.title, self.artist, self.copyright, self.caption, rating) = tuple(strings) |
---|
316 | elif guid == GUIDS['ASF_Extended_Content_Description_Object']: |
---|
317 | (count,) = struct.unpack('<H', s[24:26]) |
---|
318 | pos = 26 |
---|
319 | descriptor = {} |
---|
320 | for i in range(0, count): |
---|
321 | # Read additional content descriptors |
---|
322 | d = self._parsekv(s[pos:]) |
---|
323 | pos += d[0] |
---|
324 | descriptor[d[1]] = d[2] |
---|
325 | self.appendtable('ASFDESCRIPTOR', descriptor) |
---|
326 | elif guid == GUIDS['ASF_Metadata_Object']: |
---|
327 | (count,) = struct.unpack('<H', s[24:26]) |
---|
328 | pos = 26 |
---|
329 | descriptor = {} |
---|
330 | for i in range(0, count): |
---|
331 | # Read additional content descriptors |
---|
332 | d = self._parsekv2(s[pos:]) |
---|
333 | pos += d[0] |
---|
334 | descriptor[d[1]] = d[2] |
---|
335 | # TODO: Find the stream in self.audio and self.video and |
---|
336 | # append it there instead of here |
---|
337 | self.appendtable('ASFMETADATA%d'%d[3], descriptor) |
---|
338 | elif guid == GUIDS['ASF_Language_List_Object']: |
---|
339 | count = struct.unpack('<H', s[24:26])[0] |
---|
340 | pos = 26 |
---|
341 | lang = [] |
---|
342 | for i in range(0, count): |
---|
343 | idlen = struct.unpack('<B', s[pos:pos+1])[0] |
---|
344 | idstring = s[pos+1:pos+1+idlen] |
---|
345 | _print("Language: %d/%d: %s" % (i+1, count, idstring)) |
---|
346 | lang.append(idstring) |
---|
347 | pos += 1+idlen |
---|
348 | if len(lang) == 1: |
---|
349 | self.language = lang[0] |
---|
350 | else: |
---|
351 | self.language = tuple(lang) |
---|
352 | # TODO: Find the stream in self.audio and self.video and |
---|
353 | # set it there instead of here |
---|
354 | elif guid == GUIDS['ASF_Stream_Bitrate_Properties_Object']: |
---|
355 | (count,) = struct.unpack('<H', s[24:26]) |
---|
356 | pos = 26 |
---|
357 | for i in range(0,count): |
---|
358 | strno, avbitrate = struct.unpack('<HI', s[pos:pos+6]) |
---|
359 | strno &= 63 |
---|
360 | _print("Stream %d Bitrate: %d" % (strno, avbitrate)) |
---|
361 | # TODO: Find the stream in self.audio and self.video and |
---|
362 | # set it there instead of here |
---|
363 | else: |
---|
364 | # Just print the type: |
---|
365 | bfail = 1 |
---|
366 | for h in GUIDS.keys(): |
---|
367 | if GUIDS[h] == guid: |
---|
368 | _print("Unparsed %s [%d]" % (h,objsize)) |
---|
369 | bfail = 0 |
---|
370 | if bfail: |
---|
371 | _print("unknown: %s [%d]" % (self._printguid(guid), objsize)) |
---|
372 | return r |
---|
373 | |
---|
374 | mmpython.registertype( 'video/asf', ('asf','wmv','wma'), mediainfo.TYPE_AV, AsfInfo ) |
---|