Source code for pytomo.kaa_metadata.video.riff

# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# riff.py - riff (avi) file parser
# -----------------------------------------------------------------------------
# $Id: riff.py 3652 2008-10-26 18:23:25Z dmeyer $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2006 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <stain@acm.org>
# Maintainer:    Dirk Meyer <dischi@freevo.org>
#
# Please see the file AUTHORS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------

from __future__ import absolute_import

__all__ = ['Parser']

# python imports
import os
import struct
import string
import logging
import time

# import kaa_metadata.video core
from . import core

# get logging object
log = logging.getLogger('metadata')

# List of tags
# http://kibus1.narod.ru/frames_eng.htm?sof/abcavi/infotags.htm
# http://www.divx-digest.com/software/avitags_dll.html
# File Format: google for odmlff2.pdf

AVIINFO = {
    'INAM': 'title',
    'IART': 'artist',
    'IPRD': 'product',
    'ISFT': 'software',
    'ICMT': 'comment',
    'ILNG': 'language',
    'IKEY': 'keywords',
    'IPRT': 'trackno',
    'IFRM': 'trackof',
    'IPRO': 'producer',
    'IWRI': 'writer',
    'IGNR': 'genre',
    'ICOP': 'copyright'
}

# Taken from libavcodec/mpeg4data.h (pixel_aspect struct)
PIXEL_ASPECT = {
    1: (1, 1),
    2: (12, 11),
    3: (10, 11),
    4: (16, 11),
    5: (40, 33)
}


class Riff(core.AVContainer):
    """
    AVI parser also parsing metadata like title, languages, etc.
    """
    table_mapping = { 'AVIINFO' : AVIINFO }

    def __init__(self,file):
        core.AVContainer.__init__(self)
        # read the header
        h = file.read(12)
        if h[:4] != "RIFF" and h[:4] != 'SDSS':
            raise core.ParseError()

        self.has_idx = False
        self.header = {}
        self.junkStart = None
        self.infoStart = None
        self.type = h[8:12]
        if self.type == 'AVI ':
            self.mime = 'video/avi'
        elif self.type == 'WAVE':
            self.mime = 'audio/wav'
        try:
            while self._parseRIFFChunk(file):
                pass
        except IOError:
            log.exception('error in file, stop parsing')

        self._find_subtitles(file.name)

        if not self.has_idx and self.media == core.MEDIA_AV:
            log.debug('WARNING: avi has no index')
            self._set('corrupt', True)


    def _find_subtitles(self, filename):
        """
        Search for subtitle files. Right now only VobSub is supported
        """
        base = os.path.splitext(filename)[0]
        if os.path.isfile(base+'.idx') and \
               (os.path.isfile(base+'.sub') or os.path.isfile(base+'.rar')):
            file = open(base+'.idx')
            if file.readline().find('VobSub index file') > 0:
                for line in file.readlines():
                    if line.find('id') == 0:
                        sub = core.Subtitle()
                        sub.language = line[4:6]
                        sub.trackno = base + '.idx'  # Maybe not?
                        self.subtitles.append(sub)
            file.close()


    def _parseAVIH(self,t):
        retval = {}
        v = struct.unpack('<IIIIIIIIIIIIII',t[0:56])
        ( retval['dwMicroSecPerFrame'],
          retval['dwMaxBytesPerSec'],
          retval['dwPaddingGranularity'],
          retval['dwFlags'],
          retval['dwTotalFrames'],
          retval['dwInitialFrames'],
          retval['dwStreams'],
          retval['dwSuggestedBufferSize'],
          retval['dwWidth'],
          retval['dwHeight'],
          retval['dwScale'],
          retval['dwRate'],
          retval['dwStart'],
          retval['dwLength'] ) = v
        if retval['dwMicroSecPerFrame'] == 0:
            log.warning("ERROR: Corrupt AVI")
            raise core.ParseError()

        return retval


    def _parseSTRH(self,t):
        retval = {}
        retval['fccType'] = t[0:4]
        log.debug("_parseSTRH(%s) : %d bytes" % ( retval['fccType'], len(t)))
        if retval['fccType'] != 'auds':
            retval['fccHandler'] = t[4:8]
            v = struct.unpack('<IHHIIIIIIIII',t[8:52])
            ( retval['dwFlags'],
              retval['wPriority'],
              retval['wLanguage'],
              retval['dwInitialFrames'],
              retval['dwScale'],
              retval['dwRate'],
              retval['dwStart'],
              retval['dwLength'],
              retval['dwSuggestedBufferSize'],
              retval['dwQuality'],
              retval['dwSampleSize'],
              retval['rcFrame'], ) = v
        else:
            try:
                v = struct.unpack('<IHHIIIIIIIII',t[8:52])
                ( retval['dwFlags'],
                  retval['wPriority'],
                  retval['wLanguage'],
                  retval['dwInitialFrames'],
                  retval['dwScale'],
                  retval['dwRate'],
                  retval['dwStart'],
                  retval['dwLength'],
                  retval['dwSuggestedBufferSize'],
                  retval['dwQuality'],
                  retval['dwSampleSize'],
                  retval['rcFrame'], ) = v
                self.delay = float(retval['dwStart']) / \
                             (float(retval['dwRate']) / retval['dwScale'])
            except (KeyError, IndexError, ValueError, ZeroDivisionError):
                pass

        return retval


    def _parseSTRF(self,t,strh):
        fccType = strh['fccType']
        retval = {}
        if fccType == 'auds':
            ( retval['wFormatTag'],
              retval['nChannels'],
              retval['nSamplesPerSec'],
              retval['nAvgBytesPerSec'],
              retval['nBlockAlign'],
              retval['nBitsPerSample'],
            ) = struct.unpack('<HHHHHH',t[0:12])
            ai = core.AudioStream()
            ai.samplerate = retval['nSamplesPerSec']
            ai.channels = retval['nChannels']
            # FIXME: Bitrate calculation is completely wrong.
            #ai.samplebits = retval['nBitsPerSample']
            #ai.bitrate = retval['nAvgBytesPerSec'] * 8

            # TODO: set code if possible
            # http://www.stats.uwa.edu.au/Internal/Specs/DXALL/FileSpec/\
            #    Languages
            # ai.language = strh['wLanguage']
            ai.codec = retval['wFormatTag']
            self.audio.append(ai)
        elif fccType == 'vids':
            v = struct.unpack('<IIIHH',t[0:16])
            ( retval['biSize'],
              retval['biWidth'],
              retval['biHeight'],
              retval['biPlanes'],
              retval['biBitCount'], ) = v
            v = struct.unpack('IIIII',t[20:40])
            ( retval['biSizeImage'],
              retval['biXPelsPerMeter'],
              retval['biYPelsPerMeter'],
              retval['biClrUsed'],
              retval['biClrImportant'], ) = v
            vi = core.VideoStream()
            vi.codec = t[16:20]
            vi.width = retval['biWidth']
            vi.height = retval['biHeight']
            # FIXME: Bitrate calculation is completely wrong.
            #vi.bitrate = strh['dwRate']
            vi.fps = float(strh['dwRate']) / strh['dwScale']
            vi.length = strh['dwLength'] / vi.fps
            self.video.append(vi)
        return retval


    def _parseSTRL(self,t):
        retval = {}
        size = len(t)
        i = 0
        key = t[i:i+4]
        sz = struct.unpack('<I',t[i+4:i+8])[0]
        i+=8
        value = t[i:]

        if key == 'strh':
            retval[key] = self._parseSTRH(value)
            i += sz
        else:
            log.debug("_parseSTRL: Error")
        key = t[i:i+4]
        sz = struct.unpack('<I',t[i+4:i+8])[0]
        i+=8
        value = t[i:]

        if key == 'strf':
            retval[key] = self._parseSTRF(value, retval['strh'])
            i += sz
        return ( retval, i )


    def _parseODML(self,t):
        retval = {}
        size = len(t)
        i = 0
        key = t[i:i+4]
        sz = struct.unpack('<I',t[i+4:i+8])[0]
        i += 8
        value = t[i:]
        if key != 'dmlh':
            log.debug("_parseODML: Error")

        i += sz - 8
        return ( retval, i )


    def _parseVPRP(self,t):
        retval = {}
        v = struct.unpack('<IIIIIIIIII',t[:4*10])

        ( retval['VideoFormat'],
          retval['VideoStandard'],
          retval['RefreshRate'],
          retval['HTotalIn'],
          retval['VTotalIn'],
          retval['FrameAspectRatio'],
          retval['wPixel'],
          retval['hPixel'] ) = v[1:-1]

        # I need an avi with more informations
        # enum {FORMAT_UNKNOWN, FORMAT_PAL_SQUARE, FORMAT_PAL_CCIR_601,
        #    FORMAT_NTSC_SQUARE, FORMAT_NTSC_CCIR_601,...} VIDEO_FORMAT;
        # enum {STANDARD_UNKNOWN, STANDARD_PAL, STANDARD_NTSC, STANDARD_SECAM}
        #    VIDEO_STANDARD;
        #
        r = retval['FrameAspectRatio']
        r = float(r >> 16) / (r & 0xFFFF)
        retval['FrameAspectRatio'] = r
        if self.video:
            map(lambda v: setattr(v, 'aspect', r), self.video)
        return ( retval, v[0] )


    def _parseLISTmovi(self, size, file):
        """
        Digs into movi list, looking for a Video Object Layer header in an
        mpeg4 stream in order to determine aspect ratio.
        """
        i = 0
        n_dc = 0
        done = False
        # If the VOL header doesn't appear within 5MB or 5 video chunks,
        # give up.  The 5MB limit is not likely to apply except in
        # pathological cases.
        while i < min(1024*1024*5, size - 8) and n_dc < 5:
            data = file.read(8)
            if ord(data[0]) == 0:
                # Eat leading nulls.
                data = data[1:] + file.read(1)
                i += 1

            key, sz = struct.unpack('<4sI', data)
            if key[2:] != 'dc' or sz > 1024*500:
                # This chunk is not video or is unusually big (> 500KB);
                # skip it.
                file.seek(sz, 1)
                i += 8 + sz
                continue

            n_dc += 1
            # Read video chunk into memory
            data = file.read(sz)

            #for p in range(0,min(80, sz)):
            #    print "%02x " % ord(data[p]),
            #print "\n\n"

            # Look through the picture header for VOL startcode.  The basic
            # logic for this is taken from libavcodec, h263.c
            pos = 0
            startcode = 0xff
            def bits(v, o, n):
                # Returns n bits in v, offset o bits.
                return (v & 2**n-1 << (64-n-o)) >> 64-n-o

            while pos < sz:
                startcode = ((startcode << 8) | ord(data[pos])) & 0xffffffff
                pos += 1
                if startcode & 0xFFFFFF00 != 0x100:
                    # No startcode found yet
                    continue

                if startcode >= 0x120 and startcode <= 0x12F:
                    # We have the VOL startcode.  Pull 64 bits of it and treat
                    # as a bitstream
                    v = struct.unpack(">Q", data[pos : pos+8])[0]
                    offset = 10
                    if bits(v, 9, 1):
                        # is_ol_id, skip over vo_ver_id and vo_priority
                        offset += 7
                    ar_info = bits(v, offset, 4)
                    if ar_info == 15:
                        # Extended aspect
                        num = bits(v, offset + 4, 8)
                        den = bits(v, offset + 12, 8)
                    else:
                        # A standard pixel aspect
                        num, den = PIXEL_ASPECT.get(ar_info, (0, 0))

                    # num/den indicates pixel aspect; convert to video aspect,
                    # so we need frame width and height.
                    if 0 not in (num, den):
                        width, height = self.video[-1].width, self.video[-1].height
                        self.video[-1].aspect = num / float(den) * width / height

                    done = True
                    break

                startcode = 0xff

            i += 8 + len(data)

            if done:
                # We have the aspect, no need to continue parsing the movi
                # list, so break out of the loop.
                break


        if i < size:
            # Seek past whatever might be remaining of the movi list.
            file.seek(size-i,1)



    def _parseLIST(self,t):
        retval = {}
        i = 0
        size = len(t)

        while i < size-8:
            # skip zero
            if ord(t[i]) == 0: i += 1
            key = t[i:i+4]
            sz = 0

            if key == 'LIST':
                sz = struct.unpack('<I',t[i+4:i+8])[0]
                i+=8
                key = "LIST:"+t[i:i+4]
                value = self._parseLIST(t[i:i+sz])
                if key == 'strl':
                    for k in value.keys():
                        retval[k] = value[k]
                else:
                    retval[key] = value
                i+=sz
            elif key == 'avih':
                sz = struct.unpack('<I',t[i+4:i+8])[0]
                i += 8
                value = self._parseAVIH(t[i:i+sz])
                i += sz
                retval[key] = value
            elif key == 'strl':
                i += 4
                (value, sz) = self._parseSTRL(t[i:])
                key = value['strh']['fccType']
                i += sz
                retval[key] = value
            elif key == 'odml':
                i += 4
                (value, sz) = self._parseODML(t[i:])
                i += sz
            elif key == 'vprp':
                i += 4
                (value, sz) = self._parseVPRP(t[i:])
                retval[key] = value
                i += sz
            elif key == 'JUNK':
                sz = struct.unpack('<I',t[i+4:i+8])[0]
                i += sz + 8
            else:
                sz = struct.unpack('<I',t[i+4:i+8])[0]
                i+=8
                # in most cases this is some info stuff
                if not key in AVIINFO.keys() and key != 'IDIT':
                    log.debug("Unknown Key: %s, len: %d" % (key,sz))
                value = t[i:i+sz]
                if key == 'ISFT':
                    # product information
                    if value.find('\0') > 0:
                        # works for Casio S500 camera videos
                        value = value[:value.find('\0')]
                    value = value.replace('\0', '').lstrip().rstrip()
                value = value.replace('\0', '').lstrip().rstrip()
                if value:
                    retval[key] = value
                    if key in ('IDIT', 'ICRD'):
                        # Timestamp the video was created
                        try:
                            # The doc says it should be a format like
                            # "Wed Jan 02 02:03:55 1990"
                            t = time.strptime(value, "%a %b %d %H:%M:%S %Y")
                        except ValueError:
                            try:
                                # The Casio S500 uses "2005/12/24/ 14:11"
                                t = time.strptime(value, "%Y/%m/%d/ %H:%M")
                            except ValueError, e:
                                # FIXME: something different
                                log.debug('no support for time format %s', value)
                                t = 0
                        if t:
                            # save timestamp as int
                            self.timestamp = int(time.mktime(t))
                i+=sz
        return retval


    def _parseRIFFChunk(self,file):
        h = file.read(8)
        if len(h) < 4:
            return False
        name = h[:4]
        size = struct.unpack('<I',h[4:8])[0]

        if name == 'LIST':
            pos = file.tell() - 8
            key = file.read(4)
            if key == 'movi' and self.video and not self.video[-1].aspect and \
               self.video[-1].width and self.video[-1].height and \
               self.video[-1].format in ('DIVX', 'XVID', 'FMP4'): # any others?
                # If we don't have the aspect (i.e. it isn't in odml vprp
                # header), but we do know the video's dimensions, and
                # we're dealing with an mpeg4 stream, try to get the aspect
                # from the VOL header in the mpeg4 stream.
                self._parseLISTmovi(size-4, file)
                return True
            elif size > 80000:
                log.debug('RIFF LIST "%s" too long to parse: %s bytes' % (key, size))
                t = file.seek(size-4,1)
                return True
            elif size < 5:
                log.debug('RIFF LIST "%s" too short: %s bytes' % (key, size))
                return True

            t = file.read(size-4)
            log.debug('parse RIFF LIST "%s": %d bytes' % (key, size))
            value = self._parseLIST(t)
            self.header[key] = value
            if key == 'INFO':
                self.infoStart = pos
                self._appendtable( 'AVIINFO', value )
            elif key == 'MID ':
                self._appendtable( 'AVIMID', value )
            elif key in ('hdrl', ):
                # no need to add this info to a table
                pass
            else:
                log.debug('Skipping table info %s' % key)

        elif name == 'JUNK':
            self.junkStart = file.tell() - 8
            self.junkSize  = size
            file.seek(size, 1)
        elif name == 'idx1':
            self.has_idx = True
            log.debug('idx1: %s bytes' % size)
            # no need to parse this
            t = file.seek(size,1)
        elif name == 'RIFF':
            log.debug("New RIFF chunk, extended avi [%i]" % size)
            type = file.read(4)
            if type != 'AVIX':
                log.debug("Second RIFF chunk is %s, not AVIX, skipping", type)
                file.seek(size-4, 1)
            # that's it, no new informations should be in AVIX
            return False
        elif name == 'fmt ' and size <= 50:
            # This is a wav file.
            self.media = core.MEDIA_AUDIO
            data = file.read(size)
            fmt = struct.unpack("<HHLLHH", data[:16])
            self._set('codec', hex(fmt[0]))
            self._set('samplerate', fmt[2])
            # fmt[3] is average bytes per second, so we must divide it
            # by 125 to get kbits per second
            self._set('bitrate', fmt[3] / 125)
            # ugly hack: remember original rate in bytes per second
            # so that the length can be calculated in next elif block
            self._set('byterate', fmt[3])
            # Set a dummy fourcc so codec will be resolved in finalize.
            self._set('fourcc', 'dummy')
        elif name == 'data':
            # XXX: this is naive and may not be right.  For example if the
            # stream is something that supports VBR like mp3, the value
            # will be off.  The only way to properly deal with this issue
            # is to decode part of the stream based on its codec, but
            # kaa_metadata doesn't have this capability (yet?)
            # ugly hack: use original rate in bytes per second
            self._set('length', size / float(self.byterate))
            file.seek(size, 1)
        elif not name.strip(string.printable + string.whitespace):
            # check if name is something usefull at all, maybe it is no
            # avi or broken
            t = file.seek(size,1)
            log.debug("Skipping %s [%i]" % (name,size))
        else:
            # bad avi
            log.debug("Bad or broken avi")
            return False
        return True


Parser = Riff