Source code for pytomo.lib_database

#!/usr/bin/env python
''' Module for sqllite interface to the pytomo database
    Usage (to be run interactively above the pytomo directory):

       import pytomo.start_pytomo as start_pytomo
       start_pytomo.configure_log_file('doc_test')
       import pytomo.lib_database as lib_database
       import time
       import datetime
       timestamp = time.strftime("%Y-%m-%d.%H_%M_%S")
       # to make sure a new file is created for every run.
       db_name = 'doc_test' + str(timestamp) + '.db'
       doc_db = lib_database.PytomoDatabase(db_name)
       doc_db.create_pytomo_table('doc_test_table')
       doc_db.describe_tables()
       row = (datetime.datetime(2011, 5, 6, 15, 30, 50, 103775),
            'Youtube', 'http://www.youtube.com/watch?v=RcmKbTR--iA',
            'http://v15.lscache3.c.youtube.com',
            '173.194.20.56','default_10.193.225.12', None, None, None,
            8.9944229125976562, 'mp4', 225, 115012833.0, 511168.14666666667,
            9575411, 0, 1024, 100, 0.99954795837402344, 7.9875903129577637,
            40, 11.722306421319782, 1192528.8804511931,
            'http://www.youtube.com/fake_redirect')
       doc_db.insert_record(row)
       doc_db.fetch_all()
       doc_db.fetch_all_parameters(['DownloadTime', 'PingMin', 'PingMax'])

       >>> import time
       >>> timestamp = time.strftime("%Y-%m-%d.%H_%M_%S")
       >>> # to make sure a new file is created for every run we use
       >>> # timestamp.
       >>> db_name = 'doc_test_lib_db' + str(timestamp) + '.db'
       >>> # import pytomo.lib_database as lib_database
       >>> doc_db = PytomoDatabase(db_name)
       >>> doc_db.create_pytomo_table('doc_test_table')
       >>> doc_db.describe_tables() #doctest: +NORMALIZE_WHITESPACE
       (u'CREATE TABLE doc_test_table(ID TIMESTAMP,\\n
           Service text,\\n                       Url text,\\n
           CacheUrl text,\\n                       IP text,\\n
           Resolver text,\\n                       PingMin real,\\n
           PingAvg real,\\n                       PingMax real,\\n
           DownloadTime real,\\n                       VideoType text,\\n
           VideoDuration real,\\n                       VideoLength real,\\n
           EncodingRate real,\\n                       DownloadBytes int,\\n
           DownloadInterruptions int,\\n                       InitialData
           real,\\n                       InitialRate real,\\n
           InitialPlaybacKBuffer real,\\n
           BufferingDuration real,\\n                       PlaybackDuration
           real,\\n                       BufferDurationAtEnd real,\\n
           MaxInstantThp real,\\n                       RedirectUrl text\\n
           )',)
       >>> import datetime
       >>> record = (datetime.datetime(2011, 5, 6, 15, 30, 50, 103775),
       ... 'Youtube', 'http://www.youtube.com/watch?v=RcmKbTR--iA',
       ... 'http://v15.lscache3.c.youtube.com',
       ... '173.194.20.56','default_10.193.225.12', None, None, None,
       ... 8.9944229125976562, 'mp4', 225, 115012833.0, 511168.14666666667,
       ... 9575411, 0, 1024 ,100,  0.99954795837402344, 7.9875903129577637,
       ... 35, 11.722306421319782, 1192528.8804511931, None)
       >>> doc_db.insert_record(record)
       >>> record = (datetime.datetime(2011, 5, 6, 15, 40, 50, 103775),
       ... 'Youtube', 'http://www.youtube.com/watch?v=RcmKbTR--iA',
       ... 'http://v15.lscache3.c.youtube.com',
       ... '173.194.20.56','default_10.193.225.12', None, None, None,
       ... 8.9944229125976562, 'mp4', 225, 115012833.0, 511168.14666666667,
       ... 9575411, 0, 1024, 100, 0.99954795837402344, 7.9875903129577637,
       ... 40, 11.722306421319782, 1192528.8804511931,
       ... 'http://www.youtube.com/fake_redirect')
       >>> doc_db.insert_record(record)
       >>> doc_db.fetch_all() #doctest: +NORMALIZE_WHITESPACE
       (u'2011-05-06 15:30:50.103775',
        u'Youtube',
        u'http://www.youtube.com/watch?v=RcmKbTR--iA',
        u'http://v15.lscache3.c.youtube.com',
        u'173.194.20.56',
        u'default_10.193.225.12',
        None,
        None,
        None,
        8.9944229125976562,
        u'mp4',
        225.0,
        115012833.0,
        511168.14666666667,
        9575411,
        0,
        1024.0,
        100.0,
        0.99954795837402344,
        7.9875903129577637,
        35.0,
        11.722306421319782,
        1192528.8804511931,
        None)
       (u'2011-05-06 15:40:50.103775',
        u'Youtube',
        u'http://www.youtube.com/watch?v=RcmKbTR--iA',
        u'http://v15.lscache3.c.youtube.com',
        u'173.194.20.56',
        u'default_10.193.225.12',
        None,
        None,
        None,
        8.9944229125976562,
        u'mp4',
        225.0,
        115012833.0,
        511168.14666666667,
        9575411,
        0,
        1024.0,
        100.0,
        0.99954795837402344,
        7.9875903129577637,
        40.0,
        11.722306421319782,
        1192528.8804511931,
        u'http://www.youtube.com/fake_redirect')
       >>> doc_db.fetch_single_parameter('DownloadTime')
       ... #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        [(u'2011-05-06 15:30:50.103775', 8.9944229125976562),
        (u'2011-05-06 15:40:50.103775', 8.9944229125976562)]
        >>> doc_db.fetch_all_parameters(['DownloadTime', 'PingMin', 'PingMax'])
        ... #doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        [(8.9944229125976562, None, None, u'2011-05-06 15:30:50.103775'),
        (8.9944229125976562, None, None, u'2011-05-06 15:40:50.103775')]
        >>> doc_db.fetch_start_time()
        1304688650
        >>> from os import unlink
        >>> unlink(db_name)
'''

from __future__ import with_statement, absolute_import, print_function
import sqlite3
from pprint import pprint
import operator

# only for logging
import logging
import sys
import time
import os

# config file
try:
    from . import config_pytomo
except ValueError:
    import config_pytomo

[docs]class PytomoDatabase: ''' Pytomo database class The columns of the file pytomo_table are as follows: TID - A timestamped ID generated by for each record entered Service - The website on which the analysis is performed Example: Youtube, Dailymotion Url - The url of the webpage CacheUrl- The Url of the cache server hosting the video CacheServerDelay- the delay to obtain the cache server url (from the initial web page) IP - The IP address of the cache server from which the video is downloaded Resolver- The DNS resolver used to get obtain the IP address of the cache server prefixed with ISP given (if any) Example Google DNS, Local DNS ResolveTime- The time to get an answer from DNS AS - The AS as resolved by RIPE PingMin - The minimum recorded ping time to the resolved IP address of the cache server PingAvg - The average recorded ping time to the resolved IP address of the cache server PingMax - The maximum recorded ping time to the resolved IP address of the cache server DownloadTime - The Time taken to download the video sample (We do not download the entire video but only for a limited download time) VideoDuration - The actual duration of the complete video VideoLength - The length (in bytes) of the complete video EncodingRate - The encoding rate of the video: VideoLength/VideoDuration DownloadBytes - The length of the video sample (in bytes) DownloadInterruptions - Nb of interruptions experienced during the download InitialData - Number of bytes downloaded in the initial buffering period, InitialRate - The mean data rate (in kbps) during the initial buffering period, BufferingDuration - Accumulate time spend in buffering state PlaybackDuration - Accumulate time spend in playing state BufferDurationAtEnd - The buffer length at the end of download TimeTogetFirstByte - Time to get first byte MaxInstantThp - The max instantaneous throughput of the download RedirectUrl - The Redirection Url in case of an HTTP redirect StatusCode - HTTP Return Code ''' _table_name = None created = None def __init__(self, database_file=config_pytomo.DATABASE_TIMESTAMP): '''Initialize the database object''' # Intialize the logger for standalone testing Logging if not config_pytomo.LOG: self.logger_db() try: # isolation_level in order to auto-commit self.py_conn = sqlite3.connect(database_file, isolation_level=None) except sqlite3.Error, mes: config_pytomo.LOG.exception(''.join(( 'Unable to connect to the database: ', database_file, '\nError message: ', str(mes)))) self.created = False return config_pytomo.LOG.info(' '.join(("Created connection to data base", "Database:", os.path.basename(database_file)))) self.created = True self.py_cursor = self.py_conn.cursor()
[docs] def create_pytomo_table(self, table=config_pytomo.TABLE_TIMESTAMP): ''' Function to create a table''' # Using Python's string operations makes it insecure (vulnerable # to SQL injection attack). Use of tuples makes it secure. if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Table creation aborted') return table_name = (table,) self._table_name = table cmd = ''.join(( "CREATE TABLE ", table, """(ID TIMESTAMP, Service text, Url text, CacheUrl text, CacheServerDelay real, IP text, Resolver text, ResolveTime real, ASNumber int, PingMin real, PingAvg real, PingMax real, DownloadTime real, VideoType text, VideoDuration real, VideoLength real, EncodingRate real, DownloadBytes int, DownloadInterruptions int, InitialData real, InitialRate real, InitialPlaybacKBuffer real, BufferingDuration real, PlaybackDuration real, BufferDurationAtEnd real, TimeTogetFirstByte real, MaxInstantThp real, RedirectUrl text, StatusCode int )""")) try: self.py_cursor.execute(cmd) except sqlite3.Error, mes: config_pytomo.LOG.info("Table %s already exists: %s" % (table_name, mes)) else: config_pytomo.LOG.info("Creating table : %s" % table_name)
[docs] def insert_record(self, row): ''' Function to insert a record''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Insertion aborted') return cmd = ''.join(("INSERT INTO ", self._table_name, " VALUES(?", ',?' * config_pytomo.NB_FIELDS, ')')) try: self.py_cursor.execute(cmd, row) except sqlite3.Error, mes: config_pytomo.LOG.error('unable to add row: %s with error: %s' % (row, mes)) else: config_pytomo.LOG.debug('row added to table')
[docs] def fetch_all(self): ''' Function to print all the records of the table''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Fetch aborted') return if not self._table_name: cmd = "select name from sqlite_master" self.py_cursor.execute(cmd) table = self.py_cursor.fetchall() if len(table) == 1: self._table_name = table[0][0] else: print("No tables found in database") cmd = ' '.join(("SELECT * FROM", self._table_name)) self.py_cursor.execute(cmd) for record in self.py_cursor: pprint(record)
[docs] def describe_tables(self): '''Function to show the create command of a table''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Fetch aborted') return if not self._table_name: cmd = "SELECT name FROM sqlite_master" self.py_cursor.execute(cmd) table = self.py_cursor.fetchall() if len(table) == 1: self._table_name = table[0][0] else: print("No tables found in database") cmd = "SELECT sql FROM sqlite_master WHERE type = 'table'" self.py_cursor.execute(cmd) for record in self.py_cursor: pprint(record)
[docs] def count_rows(self): '''Function to return the number of rows in a table. If there are problems related to database integrity, -1 is returned. ''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Fetch aborted') return -1 if not self._table_name: cmd = "SELECT name FROM sqlite_master" self.py_cursor.execute(cmd) table = self.py_cursor.fetchall() if len(table) == 1: self._table_name = table[0][0] else: print("No tables found in database") return -1 cmd = ' '.join(("SELECT COUNT(*) FROM", self._table_name)) self.py_cursor.execute(cmd) return self.py_cursor.fetchall()[0][0]
[docs] def fetch_single_parameter_with_stats(self, parameter): '''Function to save (timestamp, parameter) in a sorted list of tuples only for records with stats ''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Fetch aborted') return if not self._table_name: cmd = "select name from sqlite_master" self.py_cursor.execute(cmd) table = self.py_cursor.fetchall() if len(table) == 1: self._table_name = table[0][0] else: print("No tables found in database") return cmd = ' '.join(('SELECT ID, %s FROM' % parameter, self._table_name, 'WHERE DownloadTime > 0')) self.py_cursor.execute(cmd) return sorted(self.py_cursor.fetchall(), key=operator.itemgetter(0))
[docs] def fetch_single_parameter(self, parameter): '''Function to save (timestamp,parameter) in a sorted list of tuples''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Fetch aborted') return if not self._table_name: cmd = "select name from sqlite_master" self.py_cursor.execute(cmd) table = self.py_cursor.fetchall() if len(table) == 1: self._table_name = table[0][0] else: print("No tables found in database") return cmd = ' '.join(("SELECT ID, %s FROM" % parameter, self._table_name)) self.py_cursor.execute(cmd) return sorted(self.py_cursor.fetchall(), key=operator.itemgetter(0))
[docs] def fetch_all_parameters(self, parameters): '''Function to save (parameter_1, ..., parameter_n, timestamp) in a sorted list of tuples dependent on timestamp''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Fetch aborted') return if not self._table_name: cmd = "select name from sqlite_master" self.py_cursor.execute(cmd) table = self.py_cursor.fetchall() if len(table) == 1: self._table_name = table[0][0] else: print("No tables found in database") return # create the command to extract all the specified parameters # timestamp is the last element of each tuple join_parameters = ', '.join(parameters) cmd = ' '.join(("SELECT %s, ID FROM" % join_parameters, self._table_name)) config_pytomo.LOG.debug('Select command: %s' % cmd) self.py_cursor.execute(cmd) all_parameters = self.py_cursor.fetchall() #config_pytomo.LOG.debug('Extracted: %s' % # str(all_parameters)) return sorted(all_parameters, key=operator.itemgetter(-1))
[docs] def fetch_start_time(self): '''Function to return the first timestamp in the database in linux format ''' if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Fetch aborted') return if not self._table_name: cmd = "select name from sqlite_master" self.py_cursor.execute(cmd) table = self.py_cursor.fetchall() if len(table) == 1: self._table_name = table[0][0] else: config_pytomo.LOG.warning('No tables found in database') return cmd = ' '.join(("SELECT ID FROM", self._table_name)) self.py_cursor.execute(cmd) try: timestamp = time_to_epoch(min(self.py_cursor.fetchall())[0]) except ValueError: timestamp = None return timestamp
[docs] def close_handle(self): "Closes the connection to the database" if not self.created: config_pytomo.LOG.warn('Database could not be created\n' 'Close aborted') return self.py_conn.close()
@staticmethod
[docs] def logger_db(): ''' Initialze the logger''' config_pytomo.LOG = logging.getLogger('pytomo_db') # to not have console output config_pytomo.LOG.propagate = False config_pytomo.LOG.setLevel(config_pytomo.LOG_LEVEL) timestamp = time.strftime("%Y-%m-%d.%H_%M_%S") if config_pytomo.LOG_FILE == '-': handler = logging.StreamHandler(sys.stdout) else: log_file = os.path.sep.join((config_pytomo. LOG_DIR, '.'.join((timestamp, config_pytomo.LOG_FILE)))) try: with open(log_file, 'a') as _: pass except IOError: raise IOError('Logfile %s could not be open for writing' % log_file) handler = logging.FileHandler(filename=log_file) log_formatter = logging.Formatter("%(asctime)s - %(name)s - " "%(levelname)s - %(message)s") handler.setFormatter(log_formatter) config_pytomo.LOG.addHandler(handler) # TODO
[docs]def time_to_epoch(timestamp): ''' Function to transform to seconds from epoch time represented by a string of the form '%Y-%m-%d %H:%M:%S.%f' >>> time_to_epoch('2012-06-25 14:54:57.422007') 1340628897 >>> time_to_epoch(None) Traceback (most recent call last): ... TypeError: expected string or buffer >>> time_to_epoch('2012-06-25 14:54:57') 1340628897 >>> time_to_epoch('2012-06-25 14:54:57') #doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... ValueError: time data '2012-06-25 14:54:57' does not match format '%Y-%m-%d %H:%M:%S.%f' ''' try: return int(time.mktime(time.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f"))) except ValueError: return int(time.mktime(time.strptime(timestamp, "%Y-%m-%d %H:%M:%S")))
if __name__ == '__main__': import doctest doctest.testmod()