Package nflvid
[frames] | no frames]

Source Code for Package nflvid

  1  """ 
  2  Introduction 
  3  ============ 
  4  A simple library to download, slice and search NFL game footage on a 
  5  play-by-play basis. 
  6   
  7  This library comes with preloaded play-by-play meta data, which describes the 
  8  start time of each play in the game footage. However, the actual footage does 
  9  not come with this library and is not released by me. This package therefore 
 10  provides utilities to batch download NFL Game Footage from the original source. 
 11   
 12  Once game footage is downloaded, you can use this library to search plays and 
 13  construct a playlist to play in any video player. 
 14  """ 
 15   
 16  import gzip 
 17  import os 
 18  import os.path as path 
 19  import socket 
 20  import sys 
 21  import urllib2 
 22   
 23  import bs4 
 24   
 25  import eventlet 
 26  httplib2 = eventlet.import_patched('httplib2') 
 27  import eventlet.green.subprocess as subprocess 
 28   
 29  from nflgame import OrderedDict 
 30   
 31  _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz') 
 32  _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml' 
 33   
 34  _footage_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \ 
 35                 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8' 
 36   
 37  __play_cache = {}  # game eid -> play id -> Play 
 38   
 39   
40 -def footage_url(gobj, quality='1600'):
41 month, day = gobj.eid[4:6], gobj.eid[6:8] 42 return _footage_url \ 43 % (gobj.season(), month, day, gobj.gamekey, gobj.gamekey, 44 gobj.away.lower(), gobj.home.lower(), gobj.season(), quality)
45 46
47 -def footage_full(footage_dir, gobj):
48 """ 49 Returns the path to the full video for a given game inside an nflvid 50 footage directory. 51 52 If the full footage doesn't exist, then None is returned. 53 """ 54 fp = _full_path(footage_dir, gobj) 55 if not os.access(fp, os.R_OK): 56 return None 57 return fp
58 59
60 -def footage_plays(footage_play_dir, gobj):
61 """ 62 Returns a list of all footage broken down by play inside an nflvid 63 footage directory. The list is sorted numerically by play id. 64 65 If no footage breakdown exists for the game provided, then an empty list 66 is returned. 67 """ 68 fp = _play_path(footage_play_dir, gobj) 69 if not os.access(fp, os.R_OK): 70 return [] 71 entries = filter(lambda f: f != 'full.mp4', os.listdir(fp)) 72 return sorted(entries, key=lambda s: int(s[0:-4]))
73 74
75 -def _full_path(footage_dir, g):
76 return path.join(footage_dir, '%s-%s.mp4' % (g.eid, g.gamekey))
77 78
79 -def _play_path(footage_play_dir, g):
80 return path.join(footage_play_dir, '%s-%s' % (g.eid, g.gamekey))
81 82
83 -def _nice_game(gobj):
84 return '(Season: %s, Week: %s, %s)' \ 85 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
86 87
88 -def unsliced_plays(footage_play_dir, gobj, dry_run=False):
89 """ 90 Scans the game directory inside footage_play_dir and returns a list 91 of plays that haven't been sliced yet. In particular, a play is only 92 considered sliced if the following file is readable, assuming {playid} 93 is its play id:: 94 95 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4 96 97 All plays for the game given that don't fit this criteria will be 98 returned in the list. 99 100 If the list is empty, then all plays for the game have been sliced. 101 102 If dry_run is true, then only the first 10 plays of the game are 103 sliced. 104 """ 105 ps = plays(gobj) 106 outdir = _play_path(footage_play_dir, gobj) 107 108 unsliced = [] 109 for i, p in enumerate(ps.values()): 110 if dry_run and i >= 10: 111 break 112 pid = p.idstr() 113 if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK): 114 unsliced.append(p) 115 return unsliced
116 117
118 -def slice(footage_play_dir, full_footage_file, gobj, threads=4, dry_run=False):
119 """ 120 Uses ffmpeg to slice the given footage file into play-by-play pieces. 121 The full_footage_file should point to a full game downloaded with 122 nflvid-footage and gobj should be the corresponding nflgame.game.Game 123 object. 124 125 The footage_play_dir is where the pieces will be saved:: 126 127 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4 128 129 This function will not duplicate work. If a video file exists for 130 a particular play, then slice will not regenerate it. 131 132 Note that this function uses an eventlet green pool to run multiple 133 ffmpeg instances simultaneously. The maximum number of threads to 134 use is specified by threads. This function only terminates when all 135 threads have finished processing. 136 137 If dry_run is true, then only the first 10 plays of the game are 138 sliced. 139 """ 140 outdir = _play_path(footage_play_dir, gobj) 141 if not os.access(outdir, os.R_OK): 142 os.makedirs(outdir) 143 144 pool = eventlet.greenpool.GreenPool(threads) 145 for p in unsliced_plays(footage_play_dir, gobj, dry_run): 146 pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p) 147 pool.waitall()
148 149
150 -def slice_play(footage_play_dir, full_footage_file, gobj, play):
151 """ 152 This is just like slice, but it only slices the play provided. 153 In typical cases, slice should be used since it makes sure not 154 to duplicate work. 155 156 This function will not check if the play-by-play directory for 157 gobj has been created. 158 """ 159 outdir = _play_path(footage_play_dir, gobj) 160 st = play.start 161 start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli) 162 outpath = path.join(outdir, '%s.mp4' % play.idstr()) 163 164 cmd = ['ffmpeg', 165 '-ss', start_time, 166 '-i', full_footage_file] 167 if play.duration is not None: 168 cmd += ['-t', '%d' % play.duration] 169 cmd += ['-map', '0', 170 '-strict', '-2', 171 outpath] 172 _run_command(cmd)
173 174
175 -def download(footage_dir, gobj, quality='1600', dry_run=False):
176 """ 177 Starts an ffmpeg process to download the full footage of the given 178 game with the quality provided. The qualities available are: 179 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best. 180 181 The footage will be saved to the following path:: 182 183 footage_dir/{eid}-{gamekey}.mp4 184 185 If footage is already at that path, then a LookupError is raised. 186 187 A full game's worth of footage at a quality of 1600 is about 2GB. 188 """ 189 fp = _full_path(footage_dir, gobj) 190 if os.access(fp, os.R_OK): 191 raise LookupError('Footage path "%s" already exists.' % fp) 192 193 url = footage_url(gobj, quality) 194 195 # Let's check to see if the URL exists. We could let ffmpeg catch 196 # the error, but since this is a common error, let's show something 197 # nicer than a bunch of ffmpeg vomit. 198 resp, _ = httplib2.Http().request(url, 'HEAD') 199 if resp['status'] != '200': 200 print >> sys.stderr, 'BAD URL (http status %s) for game %s: %s' \ 201 % (resp['status'], _nice_game(gobj), url) 202 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj) 203 return 204 205 cmd = ['ffmpeg', '-i', url] 206 if dry_run: 207 cmd += ['-t', '30'] 208 cmd += ['-strict', '-2', fp] 209 210 print >> sys.stderr, 'Downloading game %s %s' \ 211 % (gobj.eid, _nice_game(gobj)) 212 if not _run_command(cmd): 213 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj) 214 else: 215 print >> sys.stderr, 'DONE with game %s' % _nice_game(gobj)
216 217
218 -def _run_command(cmd):
219 try: 220 p = subprocess.Popen(cmd, 221 stdout=subprocess.PIPE, 222 stderr=subprocess.STDOUT) 223 output = p.communicate()[0].strip() 224 225 if p.returncode > 0: 226 err = subprocess.CalledProcessError(p.returncode, cmd) 227 err.output = output 228 raise err 229 except subprocess.CalledProcessError, e: 230 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n'))) 231 print >> sys.stderr, "Could not run '%s' (exit code %d):\n%s" \ 232 % (' '.join(cmd), e.returncode, indent(e.output)) 233 return False 234 except OSError, e: 235 print >> sys.stderr, "Could not run '%s' (errno: %d): %s" \ 236 % (' '.join(cmd), e.errno, e.strerror) 237 return False 238 return True
239 240
241 -def plays(gobj):
242 """ 243 Returns an ordered dictionary of all plays for a particular game. 244 245 The game must be a nflgame.game.Game object. 246 247 If there is a problem retrieving the data, None is returned. 248 249 If the game is over, then the XML data is saved to disk. 250 """ 251 if gobj.game_over() and gobj.eid in __play_cache: 252 return __play_cache[gobj.eid] 253 254 rawxml = _get_xml_data((gobj.eid, gobj.gamekey)) 255 ps = _xml_play_data(rawxml) 256 if ps is None: 257 return None 258 __play_cache[gobj.eid] = ps 259 260 # Save the XML data to disk if the game is over. 261 if gobj.game_over(): 262 fp = _xmlf % (gobj.eid, gobj.gamekey) 263 try: 264 print >> gzip.open(fp, 'w+'), rawxml, 265 except IOError: 266 print >> sys.stderr, 'Could not cache XML data. Please make ' \ 267 '"%s" writable.' % path.dirname(fp) 268 return ps
269 270
271 -def play(gobj, playid):
272 """ 273 Returns a Play object given a game and a play id. The game must be 274 a nflgame.game.Game object. 275 276 If a play with the given id does not exist, None is returned. 277 """ 278 return plays(gobj).get(playid, None)
279 280
281 -class Play (object):
282 """ 283 Represents a single play with meta data that ties it to game footage. 284 The footage_start corresponds to the 'ArchiveTCIN', which is when 285 the play starts. Since there is no record of when a play stops, the 286 duration is computed by subtracting the start time from the start 287 time of the next play. If it's the last play recorded, then the 288 duration is None. 289 290 The play id is the foreign key that maps to play data stored in nflgame. 291 """
292 - def __init__(self, start, duration, playid):
293 self.start, self.duration, self.playid = start, duration, playid
294
295 - def idstr(self):
296 """Returns a string play id padded with zeroes.""" 297 return '%04d' % int(self.playid)
298
299 - def __str__(self):
300 return '(%s, %s, %s)' % (self.playid, self.start, self.duration)
301 302
303 -class PlayTime (object):
304 """ 305 Represents a footage time point, in the format HH:MM:SS:MM 306 """
307 - def __init__(self, point):
308 self.point = point 309 310 try: 311 parts = map(int, self.point.split(':')) 312 except ValueError: 313 assert False, 'Bad play time format: %s' % self.point 314 315 if len(parts) != 4: 316 assert False, 'Expected 4 parts but got %d in: %s' \ 317 % (len(parts), self.point) 318 319 self.hh, self.mm, self.ss, self.milli = parts 320 321 # I believe milliseconds is given in tens of milliseconds. 322 self.milli *= 10
323
324 - def seconds(self):
325 """ 326 Returns this time point rounded to the nearest second. 327 """ 328 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss 329 if self.milli >= 50: 330 secs += 1 331 return secs
332
333 - def fractional(self):
334 """ 335 Returns this time point as fractional seconds based on milliseconds. 336 """ 337 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss 338 secs = (1000 * secs) + self.milli 339 return float(secs) / 1000.0
340
341 - def __cmp__(self, other):
342 return cmp(self.fractional(), other.fractional())
343
344 - def __sub__(self, other):
345 """ 346 Returns the difference rounded to nearest second between 347 two time points. The 'other' time point must take place before the 348 current time point. 349 """ 350 assert other <= self, '%s is not <= than %s' % (other, self) 351 return int(round(self.fractional() - other.fractional()))
352
353 - def __str__(self):
354 return self.point
355 356
357 -def _xml_play_data(data):
358 """ 359 Parses the XML raw data given into an ordered dictionary of Play 360 objects. The dictionary is keyed by play id. 361 """ 362 if data is None: 363 return None 364 365 # Load everything into a list first, since we need to look ahead to see 366 # the next play's start time to compute the current play's duration. 367 rows = [] 368 for row in bs4.BeautifulSoup(data).find_all('row'): 369 playid = row.find('id') 370 if not playid or not row.find('catin'): 371 continue 372 playid = playid.get_text().strip() 373 374 start = row.find('archivetcin') 375 if not start: 376 continue 377 start = PlayTime(start.get_text().strip()) 378 379 # If this start doesn't procede the last start time, skip it. 380 if len(rows) > 0 and start < rows[-1][1]: 381 continue 382 rows.append((playid, start)) 383 384 d = OrderedDict() 385 for i, (playid, start) in enumerate(rows): 386 duration = None 387 if i < len(rows) - 1: 388 duration = rows[i+1][1] - start 389 d[playid] = Play(start, duration, playid) 390 return d
391 392
393 -def _get_xml_data(game=None, fpath=None):
394 """ 395 Returns the XML play data corresponding to the game given. A game must 396 be specified as a tuple: the first element should be an eid and the second 397 element should be a game key. For example, ('2012102108', '55604'). 398 399 If the XML data is already on disk, it is read, decompressed and returned. 400 401 Otherwise, the XML data is downloaded from the NFL web site. If the data 402 doesn't exist yet or there was an error, _get_xml_data returns None. 403 404 If game is None, then the XML data is read from the file at fpath. 405 """ 406 assert game is not None or fpath is not None 407 408 if fpath is not None: 409 return gzip.open(fpath).read() 410 411 fpath = _xmlf % (game[0], game[1]) 412 if os.access(fpath, os.R_OK): 413 return gzip.open(fpath).read() 414 try: 415 year = int(game[0][0:4]) 416 month = int(game[0][4:6]) 417 if month <= 3: 418 year -= 1 419 u = _xml_base_url % (year, game[1]) # The year and the game key. 420 return urllib2.urlopen(u, timeout=10).read() 421 except urllib2.HTTPError, e: 422 print >> sys.stderr, e 423 except socket.timeout, e: 424 print >> sys.stderr, e 425 return None
426