Package nflvid
[frames] | no frames]

Source Code for Package nflvid

  1  """ 
  2  Introduction 
  3  ============ 
  4  A simple library to download, slice and search NFL game footage on a 
  5  play-by-play basis. 
  6   
  7  This library comes with preloaded play-by-play meta data, which describes the 
  8  start time of each play in the game footage. However, the actual footage does 
  9  not come with this library and is not released by me. This package therefore 
 10  provides utilities to batch download NFL Game Footage from the original source. 
 11   
 12  Once game footage is downloaded, you can use this library to search plays and 
 13  construct a playlist to play in any video player. 
 14  """ 
 15   
 16  import gzip 
 17  import os 
 18  import os.path as path 
 19  import socket 
 20  import sys 
 21  import urllib2 
 22   
 23  import bs4 
 24   
 25  import eventlet 
 26  httplib2 = eventlet.import_patched('httplib2') 
 27  import eventlet.green.subprocess as subprocess 
 28   
 29  from nflgame import OrderedDict 
 30   
 31  _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz') 
 32  _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml' 
 33   
 34  _footage_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \ 
 35                 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8' 
 36   
 37  __play_cache = {}  # game eid -> play id -> Play 
 38   
 39   
40 -def footage_url(gobj, quality='1600'):
41 month, day = gobj.eid[4:6], gobj.eid[6:8] 42 return _footage_url \ 43 % (gobj.season(), month, day, gobj.gamekey, gobj.gamekey, 44 gobj.away.lower(), gobj.home.lower(), gobj.season(), quality)
45 46
47 -def footage_full(footage_dir, gobj):
48 """ 49 Returns the path to the full video for a given game inside an nflvid 50 footage directory. 51 52 If the full footage doesn't exist, then None is returned. 53 """ 54 fp = _full_path(footage_dir, gobj) 55 if not os.access(fp, os.R_OK): 56 return None 57 return fp
58 59
60 -def footage_plays(footage_play_dir, gobj):
61 """ 62 Returns a list of all footage broken down by play inside an nflvid 63 footage directory. The list is sorted numerically by play id. 64 65 If no footage breakdown exists for the game provided, then an empty list 66 is returned. 67 """ 68 fp = _play_path(footage_play_dir, gobj) 69 if not os.access(fp, os.R_OK): 70 return [] 71 entries = filter(lambda f: f != 'full.mp4', os.listdir(fp)) 72 return sorted(entries, key=lambda s: int(s[0:-4]))
73 74
75 -def _full_path(footage_dir, g):
76 return path.join(footage_dir, '%s-%s.mp4' % (g.eid, g.gamekey))
77 78
79 -def _play_path(footage_play_dir, g):
80 return path.join(footage_play_dir, '%s-%s' % (g.eid, g.gamekey))
81 82
83 -def _nice_game(gobj):
84 return '(Season: %s, Week: %s, %s)' \ 85 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
86 87
88 -def unsliced_plays(footage_play_dir, gobj, dry_run=False):
89 """ 90 Scans the game directory inside footage_play_dir and returns a list 91 of plays that haven't been sliced yet. In particular, a play is only 92 considered sliced if the following file is readable, assuming {playid} 93 is its play id:: 94 95 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4 96 97 All plays for the game given that don't fit this criteria will be 98 returned in the list. 99 100 If the list is empty, then all plays for the game have been sliced. 101 Alternatively, None can be returned if there was a problem retrieving 102 the play-by-play meta data. 103 104 If dry_run is true, then only the first 10 plays of the game are 105 sliced. 106 """ 107 ps = plays(gobj) 108 outdir = _play_path(footage_play_dir, gobj) 109 110 unsliced = [] 111 if ps is None: 112 return None 113 for i, p in enumerate(ps.values()): 114 if dry_run and i >= 10: 115 break 116 pid = p.idstr() 117 if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK): 118 unsliced.append(p) 119 return unsliced
120 121
122 -def slice(footage_play_dir, full_footage_file, gobj, threads=4, dry_run=False):
123 """ 124 Uses ffmpeg to slice the given footage file into play-by-play pieces. 125 The full_footage_file should point to a full game downloaded with 126 nflvid-footage and gobj should be the corresponding nflgame.game.Game 127 object. 128 129 The footage_play_dir is where the pieces will be saved:: 130 131 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4 132 133 This function will not duplicate work. If a video file exists for 134 a particular play, then slice will not regenerate it. 135 136 Note that this function uses an eventlet green pool to run multiple 137 ffmpeg instances simultaneously. The maximum number of threads to 138 use is specified by threads. This function only terminates when all 139 threads have finished processing. 140 141 If dry_run is true, then only the first 10 plays of the game are 142 sliced. 143 """ 144 outdir = _play_path(footage_play_dir, gobj) 145 if not os.access(outdir, os.R_OK): 146 os.makedirs(outdir) 147 148 pool = eventlet.greenpool.GreenPool(threads) 149 for p in unsliced_plays(footage_play_dir, gobj, dry_run) or []: 150 pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p) 151 pool.waitall()
152 153
154 -def slice_play(footage_play_dir, full_footage_file, gobj, play):
155 """ 156 This is just like slice, but it only slices the play provided. 157 In typical cases, slice should be used since it makes sure not 158 to duplicate work. 159 160 This function will not check if the play-by-play directory for 161 gobj has been created. 162 """ 163 outdir = _play_path(footage_play_dir, gobj) 164 st = play.start 165 start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli) 166 outpath = path.join(outdir, '%s.mp4' % play.idstr()) 167 168 cmd = ['ffmpeg', 169 '-ss', start_time, 170 '-i', full_footage_file] 171 if play.duration is not None: 172 cmd += ['-t', '%d' % play.duration] 173 cmd += ['-map', '0', 174 '-strict', '-2', 175 outpath] 176 _run_command(cmd)
177 178
179 -def download(footage_dir, gobj, quality='1600', dry_run=False):
180 """ 181 Starts an ffmpeg process to download the full footage of the given 182 game with the quality provided. The qualities available are: 183 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best. 184 185 The footage will be saved to the following path:: 186 187 footage_dir/{eid}-{gamekey}.mp4 188 189 If footage is already at that path, then a LookupError is raised. 190 191 A full game's worth of footage at a quality of 1600 is about 2GB. 192 """ 193 fp = _full_path(footage_dir, gobj) 194 if os.access(fp, os.R_OK): 195 raise LookupError('Footage path "%s" already exists.' % fp) 196 197 url = footage_url(gobj, quality) 198 199 # Let's check to see if the URL exists. We could let ffmpeg catch 200 # the error, but since this is a common error, let's show something 201 # nicer than a bunch of ffmpeg vomit. 202 resp, _ = httplib2.Http().request(url, 'HEAD') 203 if resp['status'] != '200': 204 print >> sys.stderr, 'BAD URL (http status %s) for game %s: %s' \ 205 % (resp['status'], _nice_game(gobj), url) 206 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj) 207 return 208 209 cmd = ['ffmpeg', '-i', url] 210 if dry_run: 211 cmd += ['-t', '30'] 212 cmd += ['-strict', '-2', fp] 213 214 print >> sys.stderr, 'Downloading game %s %s' \ 215 % (gobj.eid, _nice_game(gobj)) 216 if not _run_command(cmd): 217 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj) 218 else: 219 print >> sys.stderr, 'DONE with game %s' % _nice_game(gobj)
220 221
222 -def _run_command(cmd):
223 try: 224 p = subprocess.Popen(cmd, 225 stdout=subprocess.PIPE, 226 stderr=subprocess.STDOUT) 227 output = p.communicate()[0].strip() 228 229 if p.returncode > 0: 230 err = subprocess.CalledProcessError(p.returncode, cmd) 231 err.output = output 232 raise err 233 except subprocess.CalledProcessError, e: 234 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n'))) 235 print >> sys.stderr, "Could not run '%s' (exit code %d):\n%s" \ 236 % (' '.join(cmd), e.returncode, indent(e.output)) 237 return False 238 except OSError, e: 239 print >> sys.stderr, "Could not run '%s' (errno: %d): %s" \ 240 % (' '.join(cmd), e.errno, e.strerror) 241 return False 242 return True
243 244
245 -def plays(gobj):
246 """ 247 Returns an ordered dictionary of all plays for a particular game. 248 249 The game must be a nflgame.game.Game object. 250 251 If there is a problem retrieving the data, None is returned. 252 253 If the game is over, then the XML data is saved to disk. 254 """ 255 if gobj.game_over() and gobj.eid in __play_cache: 256 return __play_cache[gobj.eid] 257 258 rawxml = _get_xml_data((gobj.eid, gobj.gamekey)) 259 ps = _xml_play_data(rawxml) 260 if ps is None: 261 return None 262 if len(ps) == 0: 263 print >> sys.stderr, 'Could not find ArchiveTCIN field in XML data. ' \ 264 'This field provides the start time of each play.' 265 return None 266 __play_cache[gobj.eid] = ps 267 268 # Save the XML data to disk if the game is over. 269 if gobj.game_over(): 270 fp = _xmlf % (gobj.eid, gobj.gamekey) 271 try: 272 print >> gzip.open(fp, 'w+'), rawxml, 273 except IOError: 274 print >> sys.stderr, 'Could not cache XML data. Please make ' \ 275 '"%s" writable.' % path.dirname(fp) 276 return ps
277 278
279 -def play(gobj, playid):
280 """ 281 Returns a Play object given a game and a play id. The game must be 282 a nflgame.game.Game object. 283 284 If a play with the given id does not exist, None is returned. 285 """ 286 return plays(gobj).get(playid, None)
287 288
289 -class Play (object):
290 """ 291 Represents a single play with meta data that ties it to game footage. 292 The footage_start corresponds to the 'ArchiveTCIN', which is when 293 the play starts. Since there is no record of when a play stops, the 294 duration is computed by subtracting the start time from the start 295 time of the next play. If it's the last play recorded, then the 296 duration is None. 297 298 The play id is the foreign key that maps to play data stored in nflgame. 299 """
300 - def __init__(self, start, duration, playid):
301 self.start, self.duration, self.playid = start, duration, playid
302
303 - def idstr(self):
304 """Returns a string play id padded with zeroes.""" 305 return '%04d' % int(self.playid)
306
307 - def __str__(self):
308 return '(%s, %s, %s)' % (self.playid, self.start, self.duration)
309 310
311 -class PlayTime (object):
312 """ 313 Represents a footage time point, in the format HH:MM:SS:MM 314 """
315 - def __init__(self, point):
316 self.point = point 317 318 try: 319 parts = map(int, self.point.split(':')) 320 except ValueError: 321 assert False, 'Bad play time format: %s' % self.point 322 323 if len(parts) != 4: 324 assert False, 'Expected 4 parts but got %d in: %s' \ 325 % (len(parts), self.point) 326 327 self.hh, self.mm, self.ss, self.milli = parts 328 329 # I believe milliseconds is given in tens of milliseconds. 330 self.milli *= 10
331
332 - def seconds(self):
333 """ 334 Returns this time point rounded to the nearest second. 335 """ 336 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss 337 if self.milli >= 50: 338 secs += 1 339 return secs
340
341 - def fractional(self):
342 """ 343 Returns this time point as fractional seconds based on milliseconds. 344 """ 345 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss 346 secs = (1000 * secs) + self.milli 347 return float(secs) / 1000.0
348
349 - def __cmp__(self, other):
350 return cmp(self.fractional(), other.fractional())
351
352 - def __sub__(self, other):
353 """ 354 Returns the difference rounded to nearest second between 355 two time points. The 'other' time point must take place before the 356 current time point. 357 """ 358 assert other <= self, '%s is not <= than %s' % (other, self) 359 return int(round(self.fractional() - other.fractional()))
360
361 - def __str__(self):
362 return self.point
363 364
365 -def _xml_play_data(data):
366 """ 367 Parses the XML raw data given into an ordered dictionary of Play 368 objects. The dictionary is keyed by play id. 369 """ 370 if data is None: 371 return None 372 373 # Load everything into a list first, since we need to look ahead to see 374 # the next play's start time to compute the current play's duration. 375 rows = [] 376 for row in bs4.BeautifulSoup(data).find_all('row'): 377 playid = row.find('id') 378 if not playid or not row.find('catin'): 379 continue 380 playid = playid.get_text().strip() 381 382 start = row.find('archivetcin') 383 if not start: 384 continue 385 start = PlayTime(start.get_text().strip()) 386 387 # If this start doesn't procede the last start time, skip it. 388 if len(rows) > 0 and start < rows[-1][1]: 389 continue 390 rows.append((playid, start)) 391 392 d = OrderedDict() 393 for i, (playid, start) in enumerate(rows): 394 duration = None 395 if i < len(rows) - 1: 396 duration = rows[i+1][1] - start 397 d[playid] = Play(start, duration, playid) 398 return d
399 400
401 -def _get_xml_data(game=None, fpath=None):
402 """ 403 Returns the XML play data corresponding to the game given. A game must 404 be specified as a tuple: the first element should be an eid and the second 405 element should be a game key. For example, ('2012102108', '55604'). 406 407 If the XML data is already on disk, it is read, decompressed and returned. 408 409 Otherwise, the XML data is downloaded from the NFL web site. If the data 410 doesn't exist yet or there was an error, _get_xml_data returns None. 411 412 If game is None, then the XML data is read from the file at fpath. 413 """ 414 assert game is not None or fpath is not None 415 416 if fpath is not None: 417 return gzip.open(fpath).read() 418 419 fpath = _xmlf % (game[0], game[1]) 420 if os.access(fpath, os.R_OK): 421 return gzip.open(fpath).read() 422 try: 423 year = int(game[0][0:4]) 424 month = int(game[0][4:6]) 425 if month <= 3: 426 year -= 1 427 u = _xml_base_url % (year, game[1]) # The year and the game key. 428 return urllib2.urlopen(u, timeout=10).read() 429 except urllib2.HTTPError, e: 430 print >> sys.stderr, e 431 except socket.timeout, e: 432 print >> sys.stderr, e 433 return None
434