# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# jpg.py - jpg file parsing
# -----------------------------------------------------------------------------
# $Id: jpg.py 3654 2008-10-26 20:05:40Z dmeyer $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2006 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <stain@acm.org>
# Maintainer: Dirk Meyer <dischi@freevo.org>
#
# Please see the file AUTHORS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
from __future__ import absolute_import
__all__ = ['Parser']
# python imports
import struct
import time
import logging
import cStringIO
# import kaa_metadata.image core
from . import core
from . import EXIF
from . import IPTC
# get logging object
log = logging.getLogger('metadata')
# interesting file format info:
# http://www.dcs.ed.ac.uk/home/mxr/gfx/2d-hi.html
# http://www.funducode.com/freec/Fileformats/format3/format3b.htm
SOF = { 0xC0 : "Baseline",
0xC1 : "Extended sequential",
0xC2 : "Progressive",
0xC3 : "Lossless",
0xC5 : "Differential sequential",
0xC6 : "Differential progressive",
0xC7 : "Differential lossless",
0xC9 : "Extended sequential, arithmetic coding",
0xCA : "Progressive, arithmetic coding",
0xCB : "Lossless, arithmetic coding",
0xCD : "Differential sequential, arithmetic coding",
0xCE : "Differential progressive, arithmetic coding",
0xCF : "Differential lossless, arithmetic coding",
}
EXIFMap = {
'Image Artist': 'artist',
'Image Model': 'hardware',
'Image Software': 'software',
}
class JPG(core.Image):
"""
JPEG parser supporting EXIf and IPTC tables. The important
information is mapped to match the kaa_metadata key naming, the
complete table can be accessed with self.tables.
"""
table_mapping = { 'EXIF': EXIFMap, 'IPTC': IPTC.mapping }
def __init__(self,file):
core.Image.__init__(self)
self.mime = 'image/jpeg'
self.type = 'jpeg image'
if file.read(2) != '\xff\xd8':
raise core.ParseError()
file.seek(-2,2)
if file.read(2) != '\xff\xd9':
# Normally an JPEG should end in ffd9. This does not however
# we assume it's an jpeg for now
log.info("Wrong encode found for jpeg")
file.seek(2)
app = file.read(4)
self.meta = {}
while (len(app) == 4):
(ff,segtype,seglen) = struct.unpack(">BBH", app)
if ff != 0xff: break
if segtype == 0xd9:
break
elif SOF.has_key(segtype):
data = file.read(seglen-2)
(precision,self.height,self.width,\
num_comp) = struct.unpack('>BHHB', data[:6])
elif segtype == 0xe1:
data = file.read(seglen-2)
type = data[:data.find('\0')]
if type == 'Exif':
# create a fake file from the data we have to
# pass it to the EXIF parser
fakefile = cStringIO.StringIO()
fakefile.write('\xFF\xD8')
fakefile.write(app)
fakefile.write(data)
fakefile.seek(0)
exif = EXIF.process_file(fakefile)
fakefile.close()
if exif:
self.thumbnail = exif.get('JPEGThumbnail', None)
if self.thumbnail:
self.thumbnail = str(self.thumbnail)
self._appendtable('EXIF', exif)
if 'Image Orientation' in exif:
orientation = str(exif['Image Orientation'])
if orientation.find('90 CW') > 0:
self.rotation = 90
elif orientation.find('90') > 0:
self.rotation = 270
elif orientation.find('180') > 0:
self.rotation = 180
t = exif.get('Image DateTimeOriginal')
if not t:
# sometimes it is called this way
t = exif.get('EXIF DateTimeOriginal')
if not t:
t = exif.get('Image DateTime')
if t:
try:
t = time.strptime(str(t), '%Y:%m:%d %H:%M:%S')
self.timestamp = int(time.mktime(t))
except ValueError:
# Malformed time string.
pass
elif type == 'http://ns.adobe.com/xap/1.0/':
# FIXME: parse XMP data (xml)
doc = data[data.find('\0')+1:]
else:
pass
elif segtype == 0xed:
iptc = IPTC.parseiptc(file.read(seglen-2))
if iptc:
self._appendtable('IPTC', iptc)
elif segtype == 0xe7:
# information created by libs like epeg
data = file.read(seglen-2)
if data.count('\n') == 1:
key, value = data.split('\n')
self.meta[key] = value
elif segtype == 0xfe:
self.comment = file.read(seglen-2)
if self.comment.startswith('<?xml'):
# This could be a comment based on
# http://www.w3.org/TR/photo-rdf/
log.error('xml comment parser not integrated')
self.comment = ''
else:
# Huffman table marker (FFC4)
# Start of Scan marker (FFDA)
# Quantization table marker (FFDB)
# Restart Interval (FFDD) ???
if not segtype in (0xc4, 0xda, 0xdb, 0xdd):
log.info("SEGMENT: 0x%x%x, len=%d" % (ff,segtype,seglen))
file.seek(seglen-2,1)
app = file.read(4)
if len(self.meta.keys()):
self._appendtable( 'JPGMETA', self.meta )
for key, value in self.meta.items():
if key.startswith('Thumb:') or key == 'Software':
self._set(key, value)
Parser = JPG