Package nflvid
[frames] | no frames]

Source Code for Package nflvid

  1  """ 
  2  Introduction 
  3  ============ 
  4  A simple library to download, slice and search NFL game footage on a 
  5  play-by-play basis. 
  6   
  7  This library comes with preloaded play-by-play meta data, which describes the 
  8  start time of each play in the game footage. However, the actual footage does 
  9  not come with this library and is not released by me. This package therefore 
 10  provides utilities to batch download NFL Game Footage from the original source. 
 11   
 12  Once game footage is downloaded, you can use this library to search plays and 
 13  construct a playlist to play in any video player. 
 14  """ 
 15   
 16  import gzip 
 17  import os 
 18  import os.path as path 
 19  import socket 
 20  import sys 
 21  import urllib2 
 22   
 23  import bs4 
 24   
 25  import eventlet 
 26  httplib2 = eventlet.import_patched('httplib2') 
 27  import eventlet.green.subprocess as subprocess 
 28   
 29  from nflgame import OrderedDict 
 30   
 31  _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz') 
 32  _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml' 
 33   
 34  _footage_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \ 
 35                 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8' 
 36   
 37  __play_cache = {}  # game eid -> play id -> Play 
 38   
 39   
40 -def footage_url(gobj, quality='1600'):
41 month, day = gobj.eid[4:6], gobj.eid[6:8] 42 return _footage_url \ 43 % (gobj.season(), month, day, gobj.gamekey, gobj.gamekey, 44 gobj.away.lower(), gobj.home.lower(), gobj.season(), quality)
45 46
47 -def footage_full(footage_dir, gobj):
48 """ 49 Returns the path to the full video for a given game inside an nflvid 50 footage directory. 51 52 If the full footage doesn't exist, then None is returned. 53 """ 54 fp = _full_path(footage_dir, gobj) 55 if not os.access(fp, os.R_OK): 56 return None 57 return fp
58 59
60 -def _full_path(footage_dir, g):
61 return path.join(_game_path(footage_dir, g), 'full.mp4')
62 63
64 -def _game_path(footage_dir, g):
65 return path.join(footage_dir, '%s-%s' % (g.eid, g.gamekey))
66 67
68 -def _nice_game(gobj):
69 return '(Season: %s, Week: %s, %s)' \ 70 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
71 72
73 -def footage_plays(footage_dir, gobj):
74 """ 75 Returns a list of all footage broken down by play inside an nflvid 76 footage directory. The list is sorted numerically by play id. 77 78 If no footage breakdown exists for the game provided, then an empty list 79 is returned. 80 """ 81 fp = path.join(_game_path(footage_dir, gobj)) 82 if not os.access(fp, os.R_OK): 83 return [] 84 entries = filter(lambda f: f != 'full.mp4', os.listdir(fp)) 85 return sorted(entries, key=lambda s: int(s[0:-4]))
86 87
88 -def download(footage_dir, gobj, quality='1600', dry_run=False):
89 """ 90 Starts an ffmpeg process to download the full footage of the given 91 game with the quality provided. The qualities available are: 92 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best. 93 94 The footage will be saved to the following path:: 95 96 footage_dir/{eid}-{gamekey}/full.mp4 97 98 If footage is already at that path, then a LookupError is raised. 99 100 A full game's worth of footage at a quality of 1600 is about 2GB. 101 """ 102 fp = _full_path(footage_dir, gobj) 103 if os.access(fp, os.R_OK): 104 raise LookupError('Footage path "%s" already exists.' % fp) 105 106 url = footage_url(gobj, quality) 107 108 # Let's check to see if the URL exists. We could let ffmpeg catch 109 # the error, but since this is a common error, let's show something 110 # nicer than a bunch of ffmpeg vomit. 111 resp, _ = httplib2.Http().request(url, 'HEAD') 112 if resp['status'] != '200': 113 print >> sys.stderr, 'BAD URL (http status %s) for game %s: %s' \ 114 % (resp['status'], _nice_game(gobj), url) 115 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj) 116 return 117 118 os.makedirs(_game_path(footage_dir, gobj)) 119 cmd = ['ffmpeg', '-i', url] 120 if dry_run: 121 cmd += ['-t', '30'] 122 cmd += ['-strict', '-2', fp] 123 try: 124 print >> sys.stderr, 'Downloading game %s %s' \ 125 % (gobj.eid, _nice_game(gobj)) 126 p = subprocess.Popen(cmd, 127 stdout=subprocess.PIPE, 128 stderr=subprocess.STDOUT) 129 output = p.communicate()[0].strip() 130 131 if p.returncode > 0: 132 err = subprocess.CalledProcessError(p.returncode, cmd) 133 err.output = output 134 raise err 135 136 print >> sys.stderr, 'DONE with game %s' % _nice_game(gobj) 137 except subprocess.CalledProcessError, e: 138 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n'))) 139 print >> sys.stderr, "Could not run '%s' (exit code %d):\n%s" \ 140 % (' '.join(cmd), e.returncode, indent(e.output)) 141 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj) 142 except OSError, e: 143 print >> sys.stderr, "Could not run '%s' (errno: %d): %s" \ 144 % (' '.join(cmd), e.errno, e.strerror) 145 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
146 147
148 -def plays(gobj):
149 """ 150 Returns an ordered dictionary of all plays for a particular game. 151 152 The game must be a nflgame.game.Game object. 153 154 If there is a problem retrieving the data, None is returned. 155 156 If the game is over, then the XML data is saved to disk. 157 """ 158 if gobj.game_over() and gobj.eid in __play_cache: 159 return __play_cache[gobj.eid] 160 161 rawxml = _get_xml_data((gobj.eid, gobj.gamekey)) 162 ps = _xml_play_data(rawxml) 163 if ps is None: 164 return None 165 __play_cache[gobj.eid] = ps 166 167 # Save the XML data to disk if the game is over. 168 if gobj.game_over(): 169 fp = _xmlf % (gobj.eid, gobj.gamekey) 170 try: 171 print >> gzip.open(fp, 'w+'), rawxml, 172 except IOError: 173 print >> sys.stderr, 'Could not cache XML data. Please make ' \ 174 '"%s" writable.' % path.dirname(fp) 175 return ps
176 177
178 -def play(gobj, playid):
179 """ 180 Returns a Play object given a game and a play id. The game must be 181 a nflgame.game.Game object. 182 183 If a play with the given id does not exist, None is returned. 184 """ 185 return plays(gobj).get(playid, None)
186 187
188 -class Play (object):
189 """ 190 Represents a single play with meta data that ties it to game footage. 191 The footage_start corresponds to the 'ArchiveTCIN', which is when 192 the play starts. Since there is no record of when a play stops, the 193 duration is computed by subtracting the start time from the start 194 time of the next play. If it's the last play recorded, then the 195 duration is None. 196 197 The play id is the foreign key that maps to play data stored in nflgame. 198 """
199 - def __init__(self, start, duration, playid):
200 self.start, self.duration, self.playid = start, duration, playid
201
202 - def __str__(self):
203 return '(%s, %s, %s)' % (self.playid, self.start, self.duration)
204 205
206 -class PlayTime (object):
207 """ 208 Represents a footage time point, in the format HH:MM:SS:MM 209 """
210 - def __init__(self, point):
211 self.point = point 212 213 try: 214 parts = map(int, self.point.split(':')) 215 except ValueError: 216 assert False, 'Bad play time format: %s' % self.point 217 218 if len(parts) != 4: 219 assert False, 'Expected 4 parts but got %d in: %s' \ 220 % (len(parts), self.point) 221 222 self.hh, self.mm, self.ss, self.milli = parts 223 224 # I believe milliseconds is given in tens of milliseconds. 225 self.milli *= 10
226
227 - def seconds(self):
228 """ 229 Returns this time point rounded to the nearest second. 230 """ 231 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss 232 if self.milli >= 50: 233 secs += 1 234 return secs
235
236 - def fractional(self):
237 """ 238 Returns this time point as fractional seconds based on milliseconds. 239 """ 240 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss 241 secs = (1000 * secs) + self.milli 242 return float(secs) / 1000.0
243
244 - def __cmp__(self, other):
245 return cmp(self.fractional(), other.fractional())
246
247 - def __sub__(self, other):
248 """ 249 Returns the difference rounded to nearest second between 250 two time points. The 'other' time point must take place before the 251 current time point. 252 """ 253 assert other <= self, '%s is not <= than %s' % (other, self) 254 return int(round(self.fractional() - other.fractional()))
255
256 - def __str__(self):
257 return self.point
258 259
260 -def _xml_play_data(data):
261 """ 262 Parses the XML raw data given into an ordered dictionary of Play 263 objects. The dictionary is keyed by play id. 264 """ 265 if data is None: 266 return None 267 268 # Load everything into a list first, since we need to look ahead to see 269 # the next play's start time to compute the current play's duration. 270 rows = [] 271 for row in bs4.BeautifulSoup(data).find_all('row'): 272 playid = row.find('id') 273 if not playid or not row.find('CATIN'): 274 continue 275 playid = playid.text().strip() 276 277 start = row.find('ArchiveTCIN') 278 if not start: 279 continue 280 start = PlayTime(start.text().strip()) 281 282 # If this start doesn't procede the last start time, skip it. 283 if len(rows) > 0 and start < rows[-1][1]: 284 continue 285 rows.append((playid, start)) 286 287 d = OrderedDict() 288 for i, (playid, start) in enumerate(rows): 289 duration = None 290 if i < len(rows) - 1: 291 duration = rows[i+1][1] - start 292 d[playid] = Play(start, duration, playid) 293 return d
294 295
296 -def _get_xml_data(game=None, fpath=None):
297 """ 298 Returns the XML play data corresponding to the game given. A game must 299 be specified as a tuple: the first element should be an eid and the second 300 element should be a game key. For example, ('2012102108', '55604'). 301 302 If the XML data is already on disk, it is read, decompressed and returned. 303 304 Otherwise, the XML data is downloaded from the NFL web site. If the data 305 doesn't exist yet or there was an error, _get_xml_data returns None. 306 307 If game is None, then the XML data is read from the file at fpath. 308 """ 309 assert game is not None or fpath is not None 310 311 if fpath is not None: 312 return gzip.open(fpath).read() 313 314 fpath = _xmlf % (game[0], game[1]) 315 if os.access(fpath, os.R_OK): 316 return gzip.open(fpath).read() 317 try: 318 year = int(game[0][0:4]) 319 month = int(game[0][4:6]) 320 if month <= 3: 321 year -= 1 322 u = _xml_base_url % (year, game[1]) # The year and the game key. 323 return urllib2.urlopen(u, timeout=10).read() 324 except urllib2.HTTPError, e: 325 print >> sys.stderr, e 326 except socket.timeout, e: 327 print >> sys.stderr, e 328 return None
329