1 """
2 Introduction
3 ============
4 A simple library to download, slice and search NFL game footage on a
5 play-by-play basis.
6
7 This library comes with preloaded play-by-play meta data, which describes the
8 start time of each play in the game footage. However, the actual footage does
9 not come with this library and is not released by me. This package therefore
10 provides utilities to batch download NFL Game Footage from the original source.
11
12 Once game footage is downloaded, you can use this library to search plays and
13 construct a playlist to play in any video player.
14 """
15
16 import gzip
17 import math
18 import os
19 import os.path as path
20 import socket
21 import sys
22 import urllib2
23
24 import bs4
25
26 import eventlet
27 httplib2 = eventlet.import_patched('httplib2')
28 import eventlet.green.subprocess as subprocess
29
30 from nflgame import OrderedDict
31
32 _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz')
33 _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml'
34 _coach_url = 'rtmp://neulionms.fcod.llnwd.net/a5306/e1/mp4:' \
35 'u/nfl/nfl/coachtapes/%s/%s_all_1600'
36 _coach_url = (
37 'rtmp://neulionms.fcod.llnwd.net',
38 'a5306/e1',
39 'mp4:u/nfl/nfl/coachtapes/%s/%s_all_1600',
40 )
41 _broadcast_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \
42 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8'
43
44 __broadcast_cache = {}
45 __coach_cache = {}
46
47
49 print >> sys.stderr, s
50
51
53 """
54 Returns the HTTP Live Stream URL (an m3u8 file) for the given game
55 and quality.
56
57 Note that this does not work with every game (yet). In particular,
58 URLs vary unpredictably (to me) from game to game.
59 """
60 month, day = gobj.eid[4:6], gobj.eid[6:8]
61 return _broadcast_url \
62 % (gobj.season(), month, day, gobj.gamekey, gobj.gamekey,
63 gobj.away.lower(), gobj.home.lower(), gobj.season(), quality)
64
65
67 """
68 Returns the rtmp URL as a triple for the coach footage
69 of the given game. The elemtns of the triple are::
70
71 (rtmp server, rtmp app name, rtmp playpath)
72
73 Coach video only comes in 1600 quality.
74 """
75 return (
76 _coach_url[0],
77 _coach_url[1],
78 _coach_url[2] % (gobj.season(), gobj.gamekey),
79 )
80
81
93
94
107
108
121
122
124 return path.join(footage_dir, '%s-%s.mp4' % (g.eid, g.gamekey))
125
126
128 return path.join(footage_play_dir, '%s-%s' % (g.eid, g.gamekey))
129
130
132 return '(Season: %s, Week: %s, %s)' \
133 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
134
135
136 -def unsliced_plays(footage_play_dir, gobj, coach=True, dry_run=False):
137 """
138 Scans the game directory inside footage_play_dir and returns a list
139 of plays that haven't been sliced yet. In particular, a play is only
140 considered sliced if the following file is readable, assuming {playid}
141 is its play id::
142
143 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
144
145 All plays for the game given that don't fit this criteria will be
146 returned in the list.
147
148 If the list is empty, then all plays for the game have been sliced.
149 Alternatively, None can be returned if there was a problem retrieving
150 the play-by-play meta data.
151
152 If coach is False, then play timings for broadcast footage will be
153 used instead of coach timings.
154
155 If dry_run is True, then only the first 10 plays of the game are
156 sliced.
157 """
158 ps = plays(gobj, coach)
159 outdir = _play_path(footage_play_dir, gobj)
160
161 unsliced = []
162 if ps is None:
163 return None
164 for i, p in enumerate(ps.values()):
165 if dry_run and i >= 10:
166 break
167 pid = p.idstr()
168 if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK):
169 unsliced.append(p)
170 return unsliced
171
172
173 -def slice(footage_play_dir, full_footage_file, gobj, coach=True,
174 threads=4, dry_run=False):
175 """
176 Uses ffmpeg to slice the given footage file into play-by-play pieces.
177 The full_footage_file should point to a full game downloaded with
178 nflvid-footage and gobj should be the corresponding nflgame.game.Game
179 object.
180
181 The footage_play_dir is where the pieces will be saved::
182
183 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
184
185 This function will not duplicate work. If a video file exists for
186 a particular play, then slice will not regenerate it.
187
188 Note that this function uses an eventlet green pool to run multiple
189 ffmpeg instances simultaneously. The maximum number of threads to
190 use is specified by threads. This function only terminates when all
191 threads have finished processing.
192
193 If coach is False, then play timings for broadcast footage will be
194 used instead of coach timings.
195
196 If dry_run is true, then only the first 10 plays of the game are
197 sliced.
198 """
199 outdir = _play_path(footage_play_dir, gobj)
200 if not os.access(outdir, os.R_OK):
201 os.makedirs(outdir)
202
203 unsliced = unsliced_plays(footage_play_dir, gobj, coach, dry_run)
204 if unsliced is None or len(unsliced) == 0:
205 _eprint(
206 'There are no unsliced plays remaining for game %s %s.\n'
207 'If they have not been sliced yet, then the XML play-by-play '
208 'meta data may not be available or is corrupt.'
209 % (gobj, _nice_game(gobj)))
210 return
211
212 pool = eventlet.greenpool.GreenPool(threads)
213 for p in unsliced:
214 pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p,
215 0, True)
216 pool.waitall()
217
218 _eprint('DONE slicing game %s' % _nice_game(gobj))
219
220
221 -def slice_play(footage_play_dir, full_footage_file, gobj, play,
222 max_duration=0, cut_scoreboard=True):
223 """
224 This is just like slice, but it only slices the play provided.
225 In typical cases, slice should be used since it makes sure not
226 to duplicate work.
227
228 This function will not check if the play-by-play directory for
229 gobj has been created.
230
231 max_duration is used to cap the length of a play. This drastically
232 cuts down on the time required to slice a game and the storage
233 requirements of a game at the cost of potentially missing bigger
234 plays. This is particularly useful if you are slicing broadcast
235 footage, where imposing a cap at about 15 seconds can decrease
236 storage and CPU requirements by more than half without missing much.
237
238 When cut_scoreboard is True, the first 3.0 seconds of
239 the play will be clipped to remove the scoreboard view.
240 """
241 outdir = _play_path(footage_play_dir, gobj)
242 st = play.start
243 outpath = path.join(outdir, '%s.mp4' % play.idstr())
244
245 et = play.end
246 if et is None:
247 et = st.add_seconds(40)
248 if max_duration > 0 and (et.seconds() - st.seconds()) > max_duration:
249 et = st.add_seconds(max_duration)
250
251 if cut_scoreboard:
252 st = st.add_seconds(3.0)
253
254 dr = PlayTime(seconds=et.fractional() - st.fractional())
255
256 start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli)
257 duration = '%02d:%02d:%02d.%d' % (dr.hh, dr.mm, dr.ss, dr.milli)
258 cmd = ['ffmpeg',
259 '-ss', start_time,
260 '-t', duration,
261 '-i', full_footage_file,
262 '-acodec', 'copy',
263 '-vcodec', 'copy',
264 outpath,
265 ]
266 _run_command(cmd)
267
268
270 """
271 Starts an ffmpeg process to download the full broadcast of the given
272 game with the quality provided. The qualities available are:
273 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best.
274
275 The footage will be saved to the following path::
276
277 footage_dir/{eid}-{gamekey}.mp4
278
279 If footage is already at that path, then a LookupError is raised.
280
281 A full game's worth of footage at a quality of 1600 is about 2GB.
282 """
283 fp = _full_path(footage_dir, gobj)
284 if os.access(fp, os.R_OK):
285 raise LookupError('Footage path "%s" already exists.' % fp)
286
287 url = broadcast_url(gobj, quality)
288
289
290
291
292 resp, _ = httplib2.Http().request(url, 'HEAD')
293 if resp['status'] != '200':
294 _eprint('BAD URL (http status %s) for game %s: %s'
295 % (resp['status'], _nice_game(gobj), url))
296 _eprint('FAILED to download game %s' % _nice_game(gobj))
297 return
298
299 cmd = ['ffmpeg',
300 '-timeout', '60',
301 '-i', url]
302 if dry_run:
303 cmd += ['-t', '30']
304 cmd += ['-absf', 'aac_adtstoasc',
305 '-acodec', 'copy',
306 '-vcodec', 'copy',
307 fp,
308 ]
309
310 _eprint('Downloading game %s %s' % (gobj.eid, _nice_game(gobj)))
311 if not _run_command(cmd):
312 _eprint('FAILED to download game %s' % _nice_game(gobj))
313 else:
314 _eprint('DONE with game %s' % _nice_game(gobj))
315
316
318 """
319 Starts an rtmpdump process to download the full coach footage of the
320 given game. Currently, the only quality available is 1600.
321
322 The footage will be saved to the following path::
323
324 footage_dir/{eid}-{gamekey}.mp4
325
326 If footage is already at that path, then a LookupError is raised.
327
328 A full game's worth of footage at a quality of 1600 is about 1GB.
329 """
330 fp = _full_path(footage_dir, gobj)
331 if os.access(fp, os.R_OK):
332 raise LookupError('Footage path "%s" already exists.' % fp)
333
334 server, app, path = coach_url(gobj)
335
336 cmd = ['rtmpdump',
337 '--rtmp', server,
338 '--app', app,
339 '--playpath', path,
340 '--timeout', '60',
341 ]
342 if dry_run:
343 cmd += ['--stop', '30']
344 cmd += ['-o', fp]
345
346 _eprint('Downloading game %s %s' % (gobj.eid, _nice_game(gobj)))
347 status = _run_command(cmd)
348 if status is None:
349 _eprint('DONE (incomplete) with game %s' % _nice_game(gobj))
350 elif not status:
351 _eprint('FAILED to download game %s' % _nice_game(gobj))
352 else:
353 _eprint('DONE with game %s' % _nice_game(gobj))
354
355
357 try:
358 p = subprocess.Popen(cmd,
359 stdout=subprocess.PIPE,
360 stderr=subprocess.STDOUT)
361 output = p.communicate()[0].strip()
362
363 if p.returncode > 0:
364 err = subprocess.CalledProcessError(p.returncode, cmd)
365 err.output = output
366 raise err
367 except subprocess.CalledProcessError, e:
368
369 if e.returncode == 2 and cmd[0] == 'rtmpdump':
370 return None
371 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n')))
372 _eprint("Could not run '%s' (exit code %d):\n%s"
373 % (' '.join(cmd), e.returncode, indent(e.output)))
374 return False
375 except OSError, e:
376 _eprint("Could not run '%s' (errno: %d): %s"
377 % (' '.join(cmd), e.errno, e.strerror))
378 return False
379 return True
380
381
382 -def plays(gobj, coach=True):
383 """
384 Returns an ordered dictionary of all plays for a particular game
385 with timings for the coach footage. If coach is False, then the
386 timings will be for the broadcast footage.
387
388 The game must be a nflgame.game.Game object.
389
390 If there is a problem retrieving the data, None is returned.
391
392 If the game is over, then the XML data is saved to disk.
393 """
394 if coach:
395 cache = __coach_cache
396 else:
397 cache = __broadcast_cache
398
399 if gobj.game_over() and gobj.eid in cache:
400 return cache[gobj.eid]
401
402 rawxml = _get_xml_data(gobj.eid, gobj.gamekey)
403 ps = _xml_plays(rawxml, coach)
404 if ps is None:
405 return None
406 if len(ps) == 0:
407 _eprint('Could not find timing nodes in XML data, '
408 'which provide the start time of each play.')
409 return None
410 __broadcast_cache[gobj.eid] = ps
411
412
413 fp = _xmlf % (gobj.eid, gobj.gamekey)
414 if gobj.game_over() and not os.access(fp, os.R_OK):
415 try:
416 print >> gzip.open(fp, 'w+'), rawxml,
417 except IOError:
418 _eprint('Could not cache XML data. Please make '
419 '"%s" writable.' % path.dirname(fp))
420 return ps
421
422
423 -def play(gobj, playid, coach=True):
424 """
425 Returns a Play object given a game and a play id with timings for
426 the coach footage. If coach is False, then the timings will be for
427 the broadcast footage.
428
429 The game must be a nflgame.game.Game object.
430
431 If a play with the given id does not exist, None is returned.
432 """
433 return plays(gobj).get(playid, None)
434
435
436 -class Play (object):
437 """
438 Represents a single play with meta data that ties it to game footage.
439 The footage_start corresponds to the 'ArchiveTCIN' or 'CATIN', which
440 is when the play starts. Since there is no record of when a play
441 stops, the end is computed by using the start time of the next play.
442 If it's the last play recorded, then the end time is None.
443
444 The play id is the foreign key that maps to play data stored in nflgame.
445 """
446 - def __init__(self, start, end, playid):
447 self.start, self.end, self.playid = start, end, playid
448
450 """Returns a string play id padded with zeroes."""
451 return '%04d' % int(self.playid)
452
454 return '(%s, %s, %s)' % (self.playid, self.start, self.end)
455
456
458 """
459 Represents a footage time point, in the format HH:MM:SS:MMM where
460 MMM can be either 2 or 3 digits.
461 """
462 - def __init__(self, point=None, seconds=None):
463 """
464 Construct a PlayTime object given a point in time in the format
465 HH:MM:SS:MMM where MMM can be either 2 or 3 digits.
466
467 Alternatively, seconds can be provided (which may be a float).
468 """
469 if seconds is not None:
470 milli = int(1000 * (seconds - math.floor(seconds)))
471
472 seconds = int(math.floor(seconds))
473 hh = seconds / 3600
474
475 seconds -= hh * 3600
476 mm = seconds / 60
477
478 seconds -= mm * 60
479 ss = seconds
480
481 self.hh, self.mm, self.ss, self.milli = hh, mm, ss, milli
482 self.__point = '%02d:%02d:%02d:%03d' % (hh, mm, ss, milli)
483 return
484
485 self.__point = point
486 self.__coach = False
487
488 try:
489 parts = self.__point.split(':')
490 if len(parts[3]) == 3:
491 self.__coach = True
492 parts = map(int, parts)
493 except ValueError:
494 assert False, 'Bad play time format: %s' % self.__point
495
496 if len(parts) != 4:
497 assert False, 'Expected 4 parts but got %d in: %s' \
498 % (len(parts), self.__point)
499
500 self.hh, self.mm, self.ss, self.milli = parts
501
502
503
504
505 if not self.__coach:
506 self.milli *= 10
507
513
515 """
516 Returns this time point rounded to the nearest second.
517 """
518 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
519 if self.milli >= 50:
520 secs += 1
521 return secs
522
524 """
525 Returns this time point as fractional seconds based on milliseconds.
526 """
527 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
528 secs = (1000 * secs) + self.milli
529 return float(secs) / 1000.0
530
533
535 """
536 Returns the difference rounded to nearest second between
537 two time points. The 'other' time point must take place before the
538 current time point.
539 """
540 assert other <= self, '%s is not <= than %s' % (other, self)
541 return int(round(self.fractional() - other.fractional()))
542
545
546
548 """
549 Parses the XML raw data given into an ordered dictionary of Play
550 objects corresponding to coach play timings. If coach is set to
551 False, then play timings for the broadcast are retrieved.
552
553 The dictionary is keyed by play id.
554 """
555 if data is None:
556 return None
557
558
559
560 rows = []
561 for row in bs4.BeautifulSoup(data).find_all('row'):
562 playid = row.find('id')
563 if not playid:
564 playid = row.get('playid', None)
565 if not playid:
566 continue
567 playid = playid.strip()
568 else:
569 playid = playid.get_text().strip()
570
571 if coach:
572 start = row.find('catin')
573 else:
574 start = row.find('archivetcin')
575 if not start:
576 continue
577 start = PlayTime(start.get_text().strip())
578
579
580 if len(rows) > 0 and start < rows[-1][1]:
581 continue
582 rows.append((playid, start, row))
583
584
585
586
587 def ignore(row):
588 if 'playdescription' in row.attrs:
589 if row['playdescription'].lower().startswith('timeout'):
590 return True
591 if row['playdescription'].lower().startswith('two-minute'):
592 return True
593
594
595 if 'preplaybyplay' in row.attrs:
596 if row['preplaybyplay'].lower().startswith('timeout'):
597 return True
598 return False
599
600 d = OrderedDict()
601 for i, (playid, start, row) in enumerate(rows):
602 if ignore(row):
603 continue
604 end = None
605 if i < len(rows) - 1:
606 end = rows[i+1][1]
607 d[playid] = Play(start, end, playid)
608 return d
609
610
612 """
613 Returns the XML play data corresponding to the game given. A game must
614 be specified in one of two ways: by providing the eid and gamekey or
615 by providing the file path to a gzipped XML file.
616
617 If the XML data is already on disk, it is read, decompressed and returned.
618
619 Otherwise, the XML data is downloaded from the NFL web site. If the data
620 doesn't exist yet or there was an error, _get_xml_data returns None.
621 """
622 assert (eid is not None and gamekey is not None) or fpath is not None
623
624 if fpath is not None:
625 return gzip.open(fpath).read()
626
627 fpath = _xmlf % (eid, gamekey)
628 if os.access(fpath, os.R_OK):
629 return gzip.open(fpath).read()
630 try:
631 year = int(eid[0:4])
632 month = int(eid[4:6])
633 if month <= 3:
634 year -= 1
635 u = _xml_base_url % (year, gamekey)
636 return urllib2.urlopen(u, timeout=10).read()
637 except urllib2.HTTPError, e:
638 _eprint(e)
639 except socket.timeout, e:
640 _eprint(e)
641 return None
642