#!/usr/bin/env python
'''Module for I/O operations on filesystem.
Used to write the index.html to display the graphical interface.
Version: 0.1
Author: Ana Oprea
Date: 20.07.2012
Usage:
'''
from __future__ import with_statement, absolute_import, print_function
import time
from optparse import OptionParser
import re
# only for logging
import logging
import sys
import os
from itertools import chain
from datetime import datetime
from operator import itemgetter
# config file
try:
from . import config_pytomo
except ValueError:
import config_pytomo
# database data
try:
from . import lib_database
except ValueError:
import lib_database
from sqlite3 import Error
# parameters to extract from db and plot
try:
from .lib_plot import UNITS
except ValueError:
from lib_plot import UNITS
# to check if files/dirs exist and set name
try:
from .start_pytomo import check_out_files
except ValueError:
from start_pytomo import check_out_files
from .fpdf import FPDF, HTMLMixin
# FPDF can buffer output or write to file
WRITE_TO_FILE = 'F'
# html related
# TODO: relative path?
# template that contains style and header
START_TEMPLATE_NAME = 'start_template.html'
# template that contains footer
END_TEMPLATE_NAME = 'end_template.html'
# names and headers
MIDDLE_COL_TABLE_HEADER = ['Time', 'Video', 'Cache server']
MID_COL_NAME = 'col1'
MID_COL_HEADER = 'Graphs'
LEFT_COL_NAME = 'col2'
LEFT_COL_HEADER = 'Links to graphs'
RIGHT_COL_NAME = 'col3'
RIGHT_COL_HEADER = 'Average parameters'
RIGHT_COL_TABLE_HEADER = ['Parameter', 'Value']
# html tags
BR_TAG = '<BR>'
IMG_TAG = '<IMG src="%s" alt=%s>'
LI_START_TAG = '<li>'
LI_END_TAG = '</li>'
UL_START_TAG = '<ul>'
UL_NOSTYLE_START_TAG = '<ul style="list-style: none;">'
UL_END_TAG = '</ul>'
DIV_START_TAG = '<div class=%s>'
DIV_END_TAG = '</div>'
HEADER_TAG = '<h2>%s</h2>'
TABLE_START_TAG = '<table border="1"; align="center">'
TABLE_END_TAG = '</table>'
TR_START_TAG = '<tr>'
TR_END_TAG = '</tr>'
TH_TAG = '<th>%s</th>'
TH_TAG_WIDTH = '<th width="50%">%s</th>'
TH_TAG_ALIGNED = '<th align="left">%s</th>'
TD_START_TAG = '<td>'
TD_START_TAG_ALIGNED = '<td align="left">'
TD_END_TAG = '</td>'
A_TAG = '<a href="%s">%s</a>'
P_TAG = '<p>%s</p> '
# nr. of spaces before the DIV tags in the main page
NR_DIV_SPACES = 13
# nr. of spaces before the h2/IMG tags in the main page
NR_INNER_SPACES = 17
# end of line
END_LINE = '\n'
# separator for directories when accessing through http
# for example, you for an image, it will always be ..\images\xx.png
PATH_SEPARATOR = '/'
# text to display for Service link
TEXT_YT = ' video'
# text to display for cache server
TEXT_CACHE = 'cache server'
# names will have parameter and timestamp connected by '_'
NAME_CONNECTOR = '_'
# dictionaries/lists with parameters to plot
# key that contains all the parameters to plot
ALL_KEY = 'All_Parameters'
# key that contains all the main parameters to plot
MAIN_KEY = ' Main_Parameters'
# key that is used to display 'Service' videos downloaded instead of plots
LINKS_KEY = 'Links_to_played_videos'
# key that is used to display the existent databases in the database directory
DB_KEY = 'Database_archive'
# key that is used to link to the description
DOC_KEY = 'Project_documentation'
# database extension
DB_EXTENSION = '(pytomo_database\.db)$'
# database input parameter
DB_INPUT = '?db='
# units for graphs that do not have records in the database
NO_DB_RECORDS_UNITS = {
'AvgThroughput': 'kbps',
'ReceptionRatio': '',
'VideosWithInterruptions': ''
}
# dictionary with mappings for what to plot
ALL_PLOTS = {
ALL_KEY: sorted(UNITS.keys() + NO_DB_RECORDS_UNITS.keys()),
MAIN_KEY: ['VideosWithInterruptions', 'AvgThroughput',
'BufferingDuration', 'PingAvg'],
'QoE': ['VideosWithInterruptions','DownloadInterruptions',
'ReceptionRatio', 'BufferingDuration', 'BufferDurationAtEnd'],
'QoS': ['AvgThroughput', 'MaxInstantThp', 'InitialRate',
'InitialData', 'DownloadBytes', 'DownloadTime',
'PlaybackDuration', 'PingMin', 'PingAvg', 'PingMax'],
'Video_characteristics': ['VideoLength', 'VideoDuration',
'EncodingRate'],
}
# dictionary with explanations for what is plotted
MAN_PLOTS = {
'PingMin': 'The minimum recorded ping time to the resolved IP address'
' of the cache server',
'PingAvg': 'The average recorded ping time to the resolved IP address'
' of the cache server',
'PingMax': 'The maximum recorded ping time to the resolved IP address'
' of the cache server',
'DownloadTime': 'The time taken to download the video sample (we do not'
' download the entire video, but only its beginning)',
'VideoDuration': 'The actual duration of the complete video',
'VideoLength': 'The length of the complete video',
'EncodingRate': 'The encoding rate of the video:'
' VideoLength / VideoDuration',
'DownloadBytes': 'The length of the video sample',
'DownloadInterruptions': 'Number of interruptions experienced during'
' the download',
'InitialData': 'Data downloaded in the initial buffering period',
'InitialRate': 'The mean data rate during the initial'
' buffering period',
'BufferingDuration': 'Accumulated time spent in buffering state',
'PlaybackDuration': 'Accumulated time spent in playing state',
'BufferDurationAtEnd': 'The buffer length at the end of the download',
'MaxInstantThp': 'The maximum instantaneous throughput of the '
'download',
'AvgThroughput': 'The average throughput: DownloadBytes / DownloadTime',
'ReceptionRatio': 'Quality of Experience parameter: '
'AvgThroughput / EncodingRate',
'VideosWithInterruptions': 'Quality of Experience parameter: signals'
' interruptions during video download <BR>'
'(1 - there are interuptions, 0 - no interuptions)'
}
# average parameters
AVERAGE_PARAM_DESCRIPTION = ['Service crawled', 'Total crawl time',
'Start crawl time', 'End crawl time', 'Number of videos played',
'Download Time average (sec)', 'Download Interruptions average',
'Ping average (msec)']
AVERAGE_PARAM = ['DownloadTime', 'DownloadInterruptions', 'PingAvg']
# parameters regarding urls - must change Service position when needed!!!
URL_PARAM = ['Url', 'CacheUrl', 'Service']
SERVICE_POS_IN_URL = 2
# indexes of parameters (for example {'Url': 0})
#INDEX_DICT = dict((v,k) for (k,v) in enumerate(DATA_PARAM))
# timestamp position in the data extracted from the database
TIMESTAMP_POSITION = -1
# max timestamp length (not to display fractions)
MAX_TIMESTAMP_LENGTH = 19
# precision of average parameters
AVERAGE_PRECISION = 4
# message to display in case there are not enough records in db to create plots
DB_NO_RECORDS_MSG = ('There are not enough records in the selected database to'
' create the graphs. Please wait for the database to be '
'populated or select another database.')
# only make page autorefresh if database has changed recently
REFRESH_TIMEOUT = 120000 # 120s
# http://ckon.wordpress.com/2008/07/25/stop-using-windowonload-in-javascript/
ONLOAD_REFRESH_SCRIPT = ('<script type="text/JavaScript">'
'window.addEventListener ?\n'
'window.addEventListener("load",'
'AutoRefresh(%i),false):\n'
'window.attachEvent && window.attachEvent("onload",'
'AutoRefresh(%i));\n'
'</script>\n'% (REFRESH_TIMEOUT, REFRESH_TIMEOUT))
# Service being crawled taken from the database info, default config_pytomo
SERVICE = config_pytomo.CRAWL_SERVICE
# message to display PDF path
PDF_MSG = 'The PDF report of this page is can be found at:<BR>'
PDF_HEADER = '<H1 align="center">Pytomo Statistics Report</H1>'
# how many BR to insert in order to not write man under image
PDF_SPACE_FOR_IMG = 12
# how many BR to insert between images
PDF_SPACE_BETWEEN = 5
# connector for http links (needed for rel path of pdf on win)
HTTP_CONNECTOR = '/'
[docs]class PytomoFPDF(FPDF, HTMLMixin):
''' Class to create pdf from html
'''
def __init__(self, pdf_name):
''' Set the pdf name
'''
self.pdf_name = pdf_name
super(PytomoFPDF, self).__init__()
[docs] def close_pdf(self):
self.output(self.pdf_name, WRITE_TO_FILE)
[docs]def create_pdf(pdf_name, timestamp, average_values, *parameters):
config_pytomo.LOG.debug('pdf_name = %s; timestamp = %s; parameters = %s' %
(pdf_name, timestamp, str(parameters)))
pdf = PytomoFPDF(pdf_name)
pdf.add_page()
#config_pytomo.LOG.debug('Added 1 page')
html_text = html_graphs_for_pdf(timestamp, average_values, *parameters)
pdf.write_html(html_text)
#config_pytomo.LOG.debug('Wrote html')
pdf.close_pdf()
#config_pytomo.LOG.debug('Closed pdf')
# code adapted from write_middle/right_column, done in a hurry
[docs]def html_graphs_for_pdf(timestamp, average_values, *parameters):
''' Function to return the html containing graphs and their explanation
for the *parameters
'''
html_text = PDF_HEADER
# average values
if not average_values:
config_pytomo.LOG.debug('No data has been received to create the list')
return html_text
# start list of average values
html_text += (' ' * NR_INNER_SPACES + UL_START_TAG + END_LINE)
for parameter, value in zip(AVERAGE_PARAM_DESCRIPTION, average_values):
html_text += ((' ' * NR_INNER_SPACES + LI_START_TAG + '%s: %s' +
LI_END_TAG + END_LINE) % (parameter, value))
# end list of average values
html_text += (' ' * NR_INNER_SPACES + UL_END_TAG + END_LINE)
html_text += (BR_TAG)
# graphs
# if there are not enough records in the db to display plots
if not timestamp and parameters[0] != DB_KEY:
html_text += ((P_TAG + END_LINE) % DB_NO_RECORDS_MSG)
return
# check which graphs to display
if parameters[0] not in [LINKS_KEY, DB_KEY]:
param_to_display = parameters
else:
param_to_display = ALL_PLOTS[MAIN_KEY]
# the middle column consists of plots for the parameters
# the graphs
for parameter in param_to_display:
for index in xrange(PDF_SPACE_BETWEEN):
html_text += (' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
# function is called from pytomo directory, the image dir is
# a subfolder of parent directory
html_text += ((' ' * NR_INNER_SPACES + IMG_TAG + END_LINE) %
(plot_path_to_write_in_pdf(parameter, timestamp), parameter))
for index in xrange(PDF_SPACE_FOR_IMG):
html_text += (BR_TAG)
html_text += ((' ' * NR_INNER_SPACES + P_TAG + END_LINE) %
MAN_PLOTS[parameter])
return html_text
# TODO: check on windows
[docs]def plot_path_to_write_in_pdf(param, timestamp):
''' Return the path to the plot relative to the TEMPLATES_DIR
Will have the pattern:
<RRD_PLOT_DIR>/<plot_name>
'''
return PATH_SEPARATOR.join((config_pytomo.RRD_PLOT_DIR,
os.path.basename(plot_filename(param, timestamp))))
[docs]def pdf_filename(param, timestamp):
''' Return the file name of the pdf (try to create it if it does not
exist). Will have the pattern:
<PDF_DIR>/<hostname>.<timestamp>.<param_PDF_FILE>
'''
return check_out_files(NAME_CONNECTOR.join((param,
config_pytomo.PDF_FILE)),
config_pytomo.PDF_DIR, str(timestamp))
[docs]def rrd_filename(timestamp):
''' Return the file name of the rrd (try to create it if it does not
exist). Will have the pattern:
<RRD_DIR>/<hostname>.<timestamp>.<RRD_FILE>
'''
return check_out_files(config_pytomo.RRD_FILE, config_pytomo.RRD_DIR,
str(timestamp))
[docs]def plot_filename(param, timestamp):
''' Return the file name of the plot (try to create it if it does not
exist). Will have the pattern:
<RRD_PLOT_DIR>/<hostname>.<timestamp>.<param_IMAGE_FILE>
'''
return check_out_files(NAME_CONNECTOR.join((param,
config_pytomo.IMAGE_FILE)),
config_pytomo.RRD_PLOT_DIR, str(timestamp))
[docs]def index_filename(param, timestamp):
''' Return the file name of the index (try to create it if it does not
exist). Will have the pattern:
<TEMPLATES_DIR>/<hostname>.<timestamp>.<param_TEMPLATE_FILE>
'''
return check_out_files(NAME_CONNECTOR.join((param,
config_pytomo.TEMPLATE_FILE)),
config_pytomo.TEMPLATES_DIR, str(timestamp))
[docs]def plot_path_to_write_in_html(param, timestamp):
''' Return the path to the plot relative to the TEMPLATES_DIR
Will have the pattern:
../<RRD_PLOT_DIR>/<plot_name>
'''
return PATH_SEPARATOR.join((os.pardir, config_pytomo.RRD_PLOT_DIR,
os.path.basename(plot_filename(param, timestamp))))
[docs]def check_templates_exist(timestamp):
''' Verify that all html templates and their plots have been created.
'''
for parameter in ALL_PLOTS[ALL_KEY]:
if (not (os.path.getsize(index_filename(parameter, timestamp)))
or not (os.path.getsize(plot_filename(parameter, timestamp)))):
return False
for parameter in chain(ALL_PLOTS.keys(), [LINKS_KEY, DB_KEY]):
if not os.path.getsize(index_filename(parameter, timestamp)):
return False
return True
[docs]def get_latest_file(path):
''' Function to return the newest file in a path
>>> import os.path
>>> from tempfile import NamedTemporaryFile
>>> f = NamedTemporaryFile(delete=False)
>>> f.name == get_latest_file(os.path.dirname(f.name))
True
>>> f.close()
>>> os.unlink(f.name)
'''
for root, dirs, files in os.walk(path):
try:
return max([os.path.join(root, name) for name in files],
key=os.path.getmtime)
except ValueError:
config_pytomo.LOG.error('There is no file in %s' % path)
[docs]def get_latest_specific_file(path, include):
''' Function to return the newest file in a path
>>> import os.path
>>> from tempfile import NamedTemporaryFile
>>> INCLUDE = 'test'
>>> f = NamedTemporaryFile(suffix=INCLUDE, delete=False)
>>> f.name == get_latest_specific_file(os.path.dirname(f.name), INCLUDE)
True
>>> f.close()
>>> os.unlink(f.name)
'''
try:
return max(get_specific_files(path, include), key=os.path.getmtime)
except TypeError:
#config_pytomo.LOG.warning('There is no file in %s that includes %s!' %
# (path, include))
return None
[docs]def get_specific_files(path, include):
''' Function to return all the files in path that contain include string in
their name
>>> import os.path
>>> from tempfile import NamedTemporaryFile
>>> INCLUDE = 'test'
>>> f1 = NamedTemporaryFile(suffix=INCLUDE, delete=False)
>>> f2 = NamedTemporaryFile(suffix=INCLUDE, delete=False)
>>> f3 = NamedTemporaryFile(delete=False)
>>> set([f1.name, f2.name]) == set(
... get_specific_files(os.path.dirname(f1.name), INCLUDE))
True
>>> set([f1.name, f2.name, f3.name]) == set(
... get_specific_files(os.path.dirname(f1.name), INCLUDE))
False
>>> f1.close()
>>> f2.close()
>>> f3.close()
>>> os.unlink(f1.name)
>>> os.unlink(f2.name)
>>> os.unlink(f3.name)
'''
p = re.compile(include)
for root, dirs, files in os.walk(path):
# both Linux&Win must have / as separator (http acces)
return sorted([(path + PATH_SEPARATOR + name)
for name in files if p.search(name)], key=os.path.getmtime,
reverse=True)
# AO 20121015 not used anymore
[docs]def get_file_by_param_timestamp(path, parameter, timestamp):
''' Function to return from the path directory the files for a specific
parameter timestamped or None.
The filenames are relative to the parent directory.
>>> import os.path
>>> from tempfile import NamedTemporaryFile
>>> from time import time
>>> PARAM = 'DownloadTime'
>>> TIMESTAMP = str(int(time()))
>>> RRD_PLOT_DIR = 'images'
>>> f1 = NamedTemporaryFile(suffix=PARAM, dir=RRD_PLOT_DIR, delete=False)
>>> f2 = NamedTemporaryFile(suffix=TIMESTAMP, dir=RRD_PLOT_DIR,
... delete=False)
>>> f3 = NamedTemporaryFile(suffix=(PARAM + '_' + TIMESTAMP),
... dir=RRD_PLOT_DIR, delete=False)
>>> os.path.basename(f3.name) == os.path.basename(
... get_file_by_param_timestamp(RRD_PLOT_DIR, PARAM, TIMESTAMP))
True
>>> os.path.basename(f2.name) == os.path.basename(
... get_file_by_param_timestamp(RRD_PLOT_DIR, PARAM, TIMESTAMP))
False
>>> os.path.basename(f1.name) == os.path.basename(
... get_file_by_param_timestamp(RRD_PLOT_DIR, PARAM, TIMESTAMP))
False
>>> f1.close()
>>> f2.close()
>>> f3.close()
>>> os.unlink(f1.name)
>>> os.unlink(f2.name)
>>> os.unlink(f3.name)
'''
try:
file_name = str(os.path.pardir + PATH_SEPARATOR +
get_specific_files(path, parameter + NAME_CONNECTOR +
str(timestamp))[0])
except IndexError:
file_name = None
return file_name
# AO 20121015 not used anymore
[docs]def check_templates_exist_obsolete(timestamp):
''' Verify that all html templates and their plots have been created.
'''
for parameter in ALL_PLOTS[ALL_KEY]:
if ((get_file_by_param_timestamp(config_pytomo.TEMPLATES_DIR, parameter,
timestamp) is None)
or (get_file_by_param_timestamp(config_pytomo.RRD_PLOT_DIR,
parameter, timestamp) is None)):
return False
for parameter in chain(ALL_PLOTS.keys(), [LINKS_KEY, DB_KEY]):
if (get_file_by_param_timestamp(config_pytomo.TEMPLATES_DIR, parameter,
timestamp) is None):
return False
return True
[docs]def write_database_archive(f_index, db_dir):
''' Write the list of databases from db_dir in the html template.
'''
# if a history of plots needs to be kept, the existent databases are
# displayed with links that represent parameters;
# in web.py the parameters can be retrieved as mentioned in:
# http://webpy.org/cookbook/input
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
# the column header
f_index.write((' ' * NR_INNER_SPACES + HEADER_TAG + END_LINE) % DB_KEY)
# the list of databases
# the start of the list of links
f_index.write(' ' * NR_INNER_SPACES + UL_NOSTYLE_START_TAG + END_LINE)
# the links
for database in get_specific_files(db_dir, DB_EXTENSION):
# function is called from pytomo directory, the image dir is
# subfolder of parent directory
f_index.write((' ' * NR_INNER_SPACES + LI_START_TAG + A_TAG +
END_LINE) % (DB_INPUT + database,
os.path.basename(database)))
f_index.write(' ' * NR_INNER_SPACES + LI_END_TAG + END_LINE)
# the end of the list of databases
f_index.write(' ' * NR_INNER_SPACES + UL_END_TAG + END_LINE)
[docs]def write_middle_column(f_index, timestamp, links, db_dir, *parameters):
''' Function to write the header and contents of the middle column - plots
for the *parameters or the table with the links to the videos downloaded
'''
# TODO: temporary, write the column div outside to also put the pdf path
# the column div
#f_index.write((' ' * NR_DIV_SPACES + DIV_START_TAG + END_LINE) %
# MID_COL_NAME)
# if only the database archive is displayed
if parameters[0] == DB_KEY:
write_database_archive(f_index, db_dir)
# the column div
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
return
# if there are not enough records in the db to display plots
if not timestamp and parameters[0] != DB_KEY:
# the column header
f_index.write((' ' * NR_INNER_SPACES + HEADER_TAG + END_LINE) %
MID_COL_HEADER)
f_index.write((' ' * NR_INNER_SPACES + P_TAG + END_LINE) %
DB_NO_RECORDS_MSG)
# the column div
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
return
# the column header
f_index.write((' ' * NR_INNER_SPACES + HEADER_TAG + END_LINE) %
MID_COL_HEADER)
# check which graphs to display
if parameters[0] not in [LINKS_KEY, DB_KEY]:
param_to_display = parameters
else:
param_to_display = ALL_PLOTS[MAIN_KEY]
# the middle column consists of plots for the parameters
# the graphs
for parameter in param_to_display:
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
# function is called from pytomo directory, the image dir is
# a subfolder of parent directory
f_index.write((' ' * NR_INNER_SPACES + IMG_TAG + END_LINE) %
(plot_path_to_write_in_html(parameter, timestamp), parameter))
f_index.write((' ' * NR_INNER_SPACES + P_TAG + END_LINE) %
MAN_PLOTS[parameter])
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
if parameters[0] == LINKS_KEY:
# the middle column consists of the main plots and a table with links to
# the crawled videos
# table div
f_index.write(' ' * NR_INNER_SPACES + TABLE_START_TAG + END_LINE)
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_START_TAG + END_LINE)
# table header
for parameter in MIDDLE_COL_TABLE_HEADER:
f_index.write((' ' * NR_INNER_SPACES + TH_TAG_ALIGNED + END_LINE) %
parameter)
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_END_TAG + END_LINE)
# table contents
for (url, url_cache, service, timestamp) in links:
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_START_TAG + END_LINE)
# td for each column in a row
f_index.write(' ' * NR_INNER_SPACES + TD_START_TAG_ALIGNED +
timestamp[:MAX_TIMESTAMP_LENGTH] + TD_END_TAG + END_LINE)
f_index.write((' ' * NR_INNER_SPACES + TD_START_TAG_ALIGNED + A_TAG
+ TD_END_TAG + END_LINE) % (url, url))
f_index.write((' ' * NR_INNER_SPACES + TD_START_TAG_ALIGNED + A_TAG
+ TD_END_TAG + END_LINE) % (url_cache, url_cache))
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_END_TAG + END_LINE)
# table div
f_index.write(' ' * NR_INNER_SPACES + TABLE_END_TAG + END_LINE)
# the column div
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
[docs]def write_left_column(f_index, database):
''' Function to write the header and contents of the left column - links
'''
# the column div
f_index.write((' ' * NR_DIV_SPACES + DIV_START_TAG + END_LINE) %
LEFT_COL_NAME)
# the column header
f_index.write((' ' * NR_INNER_SPACES + HEADER_TAG + END_LINE) %
LEFT_COL_HEADER)
# the start of the list of links
f_index.write(' ' * NR_INNER_SPACES + UL_START_TAG + END_LINE)
# the links
for parameter in sorted(ALL_PLOTS.keys()):
# function is called from pytomo directory, the image dir is subfolder
# of parent directory
f_index.write((' ' * NR_INNER_SPACES + LI_START_TAG + A_TAG +
UL_START_TAG + END_LINE) % (parameter + DB_INPUT +
database, parameter))
# all the parameters are already present in the other keys
if parameter != ALL_KEY and parameter != MAIN_KEY:
for mapping in ALL_PLOTS[parameter]:
f_index.write((' ' * NR_INNER_SPACES + LI_START_TAG + A_TAG +
LI_END_TAG + END_LINE) % (mapping + DB_INPUT + database,
mapping))
f_index.write(' ' * NR_INNER_SPACES + UL_END_TAG + LI_END_TAG +
END_LINE)
# the link to the table that displays the links to the crawled videos
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
f_index.write((' ' * NR_INNER_SPACES + A_TAG + END_LINE) %
(LINKS_KEY + DB_INPUT + database, LINKS_KEY))
# the link to the list that displays the existent database archive
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
f_index.write((' ' * NR_INNER_SPACES + A_TAG + END_LINE) %
(DB_KEY + DB_INPUT + database, DB_KEY))
# the link to the project documentation index
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
f_index.write((' ' * NR_INNER_SPACES + A_TAG + END_LINE) %
(config_pytomo.DOC_DIR + config_pytomo.TEMPLATE_FILE, DOC_KEY))
# the link to the pdf gen
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
f_index.write(' ' * NR_INNER_SPACES + BR_TAG + END_LINE)
# the end of the list of links
f_index.write(' ' * NR_INNER_SPACES + UL_END_TAG + END_LINE)
# the column div
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
[docs]def write_right_column(f_index, average_values):
''' Function to write the header and contents of the right column - tables
containing the average values determined by the crawl and the list of
existent databases.
'''
if not average_values:
config_pytomo.LOG.debug('No data has been received to create the table')
return
# the column div
f_index.write((' ' * NR_DIV_SPACES + DIV_START_TAG + END_LINE) %
RIGHT_COL_NAME)
# the column header
f_index.write((' ' * NR_INNER_SPACES + HEADER_TAG + END_LINE) %
RIGHT_COL_HEADER)
# the table including the average parameters
# table div
f_index.write(' ' * NR_INNER_SPACES + TABLE_START_TAG + END_LINE)
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_START_TAG + END_LINE)
# table header
for header in RIGHT_COL_TABLE_HEADER:
f_index.write((' ' * NR_INNER_SPACES + TH_TAG + END_LINE) % header)
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_END_TAG + END_LINE)
# table contents
for parameter, value in zip(AVERAGE_PARAM_DESCRIPTION, average_values):
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_START_TAG + END_LINE)
# td
f_index.write(' ' * NR_INNER_SPACES + TD_START_TAG + parameter +
TD_END_TAG + END_LINE)
f_index.write(' ' * NR_INNER_SPACES + TD_START_TAG + str(value) +
TD_END_TAG + END_LINE)
# tr
f_index.write(' ' * NR_INNER_SPACES + TR_END_TAG + END_LINE)
# table div
f_index.write(' ' * NR_INNER_SPACES + TABLE_END_TAG + END_LINE)
# the column div
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
[docs]def write_end_div_refresh(f_index, database):
# the column div colleft
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
# the column div colmid
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
# the column div colmask threecol
f_index.write(' ' * NR_DIV_SPACES + DIV_END_TAG + END_LINE)
# if the database was modified recently, should refresh page
if (time.time() - os.path.getmtime(database)) < REFRESH_TIMEOUT:
f_index.write(ONLOAD_REFRESH_SCRIPT)
[docs]def write_index(timestamp, database, db_dir=config_pytomo.DATABASE_DIR):
''' Function to create the parameter_timestamp_index.html from template
files and include the images that also contain a specific timestamp.
'''
# if database is not empty
if timestamp:
# get the data from the database
# retrieve data on which average parameters and video links depend on
try:
avg_data = lib_database.PytomoDatabase(database).\
fetch_all_parameters(AVERAGE_PARAM)
except Error, mes:
config_pytomo.LOG.error('Unable to extract data %s with error:'
'%s' % (str(AVERAGE_PARAM), mes))
try:
links_data = lib_database.PytomoDatabase(database).\
fetch_all_parameters(URL_PARAM)
# assumes all crawled links are from the same service (hardcoded)
SERVICE = links_data[0][SERVICE_POS_IN_URL]
except Error, mes:
config_pytomo.LOG.error('Unable to extract data %s with error:'
'%s' % (str(URL_PARAM), mes))
else:
links_data = None
avg_data = None
# open all the file descriptors
try:
f_s_template = open(os.path.join(config_pytomo.TEMPLATES_DIR,
START_TEMPLATE_NAME), 'r')
except IOError:
config_pytomo.LOG.error('Problem opening file %s' %
os.path.join(config_pytomo.TEMPLATES_DIR,
START_TEMPLATE_NAME))
return
try:
f_e_template = open(os.path.join(config_pytomo.TEMPLATES_DIR,
END_TEMPLATE_NAME), 'r')
except IOError:
config_pytomo.LOG.error('Problem opening file %s' %
os.path.join(config_pytomo.TEMPLATES_DIR,
END_TEMPLATE_NAME))
return
# create a different index.html for each parameter, with specific timestamp
# first the elements in ALL_PLOTS.keys()
# then the elements in UNITS.keys()
# then LINKS_KEY, DB_KEY
f_param = []
for param in chain(ALL_PLOTS.keys(), ALL_PLOTS[ALL_KEY],
[LINKS_KEY, DB_KEY]):
try:
f_name = index_filename(param, timestamp)
except IOError:
config_pytomo.LOG.error('Problem opening index file for parameter'
' %s' % param)
return
f_index = open(f_name, 'w')
f_param.append(f_index)
# retrieve the header lines
header_lines = f_s_template.readlines()
# retrieve the footer lines
footer_lines = f_e_template.readlines()
# add the style and header of the page to each index
for f_index in f_param:
for line in header_lines:
f_index.write(line)
if timestamp:
avg_values = tuple(chain(*((SERVICE,),
compute_average_values(avg_data))))
# add the body of the page
# middle column - plots or table with the links to crawled videos or
# database archive
# parameter represents either a list (a key in the dictionary ALL_PLOTS),
# a single parameter that can be plotted or the LINKS_KEY / DB_KEY
# if anything else is given, the main graphs are plotted
for parameter, f_index in \
zip(ALL_PLOTS.keys(), f_param[:len(ALL_PLOTS.keys())]):
# TODO: must be moved back to middle column
# the middle column div
f_index.write((' ' * NR_DIV_SPACES + DIV_START_TAG + END_LINE) %
MID_COL_NAME)
if timestamp:
# TODO: function is not properly checked, this must be removed
try:
pdf_name = pdf_filename(parameter, timestamp)
rel_pdf_name = HTTP_CONNECTOR.join((config_pytomo.PDF_DIR,
os.path.basename(pdf_name)))
create_pdf(pdf_name, timestamp, avg_values,
*ALL_PLOTS[parameter])
f_index.write((' ' * NR_INNER_SPACES + P_TAG + END_LINE) %
PDF_MSG)
f_index.write((' ' * NR_INNER_SPACES + A_TAG + END_LINE) %
(rel_pdf_name, rel_pdf_name))
except Exception, mes:
config_pytomo.LOG.error('Could not create PDF, error: %s' % mes)
write_middle_column(f_index, timestamp, links_data, db_dir,
*ALL_PLOTS[parameter])
for parameter, f_index in zip(chain(ALL_PLOTS[ALL_KEY],
[LINKS_KEY, DB_KEY]), f_param[len(ALL_PLOTS.keys()):]):
# TODO: must be moved back to middle column
# the middle column div
f_index.write((' ' * NR_DIV_SPACES + DIV_START_TAG + END_LINE) %
MID_COL_NAME)
if timestamp:
# TODO: function is not properly checked, this must be removed
try:
pdf_name = pdf_filename(parameter, timestamp)
rel_pdf_name = HTTP_CONNECTOR.join((config_pytomo.PDF_DIR,
os.path.basename(pdf_name)))
create_pdf(pdf_name, timestamp, avg_values, parameter)
f_index.write((' ' * NR_INNER_SPACES + P_TAG + END_LINE) %
PDF_MSG)
f_index.write((' ' * NR_INNER_SPACES + A_TAG + END_LINE) %
(rel_pdf_name, rel_pdf_name))
except Exception, mes:
config_pytomo.LOG.error('Could not create PDF, error: %s' % mes)
write_middle_column(f_index, timestamp, links_data, db_dir, parameter)
# add the left, right columns and the footer to each index
for f_index in f_param:
# left column - links to plots
write_left_column(f_index, database)
# right column - average values
if timestamp:
write_right_column(f_index, avg_values)
# check if page should be automatically refreshed
write_end_div_refresh(f_index, database)
# add the footer of the page
for line in footer_lines:
f_index.write(line)
# close the file object
f_index.close()
f_s_template.close()
f_e_template.close()
[docs]def compute_average_values(data):
'''Function to return a tuple (start_crawl_time, end_crawl_time,
nr_videos, average_ping, average_download_time,
average_download_interruptions)
'''
# data is retrieved sorted from the database
start_crawl_time = data[0][TIMESTAMP_POSITION]\
[:MAX_TIMESTAMP_LENGTH]
end_crawl_time = data[-1][TIMESTAMP_POSITION]\
[:MAX_TIMESTAMP_LENGTH]
# total crawl time (sync time is added at the beginning and end for
# the plots)
total_time = (datetime.fromtimestamp(lib_database.time_to_epoch(
data[-1][TIMESTAMP_POSITION])) -
datetime.fromtimestamp(lib_database.time_to_epoch(
data[0][TIMESTAMP_POSITION])))
# each row in the dataset represents a video
nr_videos = len(data)
# filter values that are not None (can be zero)
not_none = lambda x: x is not None
# for some videos data cannot be retrieved
# average download time represents the average of DownloadTime
average_list = filter(not_none,
map(itemgetter(AVERAGE_PARAM.index('DownloadTime')), data))
average_download_time = average(average_list, len(average_list))
# average download interruptions represents the average of
# DownloadInterruptions
average_list = filter(not_none, map(itemgetter(
AVERAGE_PARAM.index('DownloadInterruptions')), data))
average_download_interruptions = average(average_list, len(average_list))
# average ping represents the average of PingAvg
average_list = filter(not_none, map(itemgetter(
AVERAGE_PARAM.index('PingAvg')), data))
average_ping = average(average_list, len(average_list))
return (total_time, start_crawl_time,
end_crawl_time, nr_videos, average_download_time,
average_download_interruptions, average_ping)
[docs]def average(values, known_values):
''' Computes the arithmetic mean of a list of numbers.
>>> average([20, 30, 70], 3)
40.0
>>> average([], 0)
nan
'''
return round((float(sum(values)) / known_values
if known_values else float('nan')), AVERAGE_PRECISION)
[docs]def logger_io():
'''Initialze the logger'''
config_pytomo.LOG = logging.getLogger('pytomo_io')
# to not have console output
#config_pytomo.LOG.propagate = False
config_pytomo.LOG.setLevel(config_pytomo.LOG_LEVEL)
timestamp = time.strftime("%Y-%m-%d.%H_%M_%S")
if config_pytomo.LOG_FILE == '-':
handler = logging.StreamHandler(sys.stdout)
else:
log_file = os.path.sep.join((config_pytomo. LOG_DIR,
'.'.join((timestamp, config_pytomo.LOG_FILE))))
try:
with open(log_file, 'a') as _:
pass
except IOError:
return 1
handler = logging.FileHandler(filename=log_file)
log_formatter = logging.Formatter("%(asctime)s - %(name)s - "
"%(levelname)s - %(message)s")
handler.setFormatter(log_formatter)
config_pytomo.LOG.addHandler(handler)
[docs]def main(argv=None):
"Program wrapper"
if argv is None:
argv = sys.argv[1:]
usage = ("%prog [-v] database")
parser = OptionParser(usage=usage)
# to run the doctest
parser.add_option('-v', '--verbose', dest = 'verbose',
action = 'store_true', default = False, help = 'verbose')
(options, args) = parser.parse_args(argv)
# Intialize the logger for standalone testing Logging
if not config_pytomo.LOG:
logger_io()
if options.verbose:
config_pytomo.LOG.setLevel(logging.DEBUG)
config_pytomo.LOG_FILE = '-'
return 0
if __name__ == '__main__':
import doctest
doctest.testmod()
sys.exit(main())