1 """
2 Introduction
3 ============
4 A simple library to download, slice and search NFL game footage on a
5 play-by-play basis.
6
7 This library comes with preloaded play-by-play meta data, which describes the
8 start time of each play in the game footage. However, the actual footage does
9 not come with this library and is not released by me. This package therefore
10 provides utilities to batch download NFL Game Footage from the original source.
11
12 Once game footage is downloaded, you can use this library to search plays and
13 construct a playlist to play in any video player.
14 """
15
16 import gzip
17 import os
18 import os.path as path
19 import socket
20 import sys
21 import urllib2
22
23 import bs4
24
25 import eventlet
26 httplib2 = eventlet.import_patched('httplib2')
27 import eventlet.green.subprocess as subprocess
28
29 from nflgame import OrderedDict
30
31 _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz')
32 _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml'
33
34 _footage_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \
35 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8'
36
37 __play_cache = {}
38
39
45
46
58
59
73
74
76 return path.join(footage_dir, '%s-%s.mp4' % (g.eid, g.gamekey))
77
78
80 return path.join(footage_play_dir, '%s-%s' % (g.eid, g.gamekey))
81
82
84 return '(Season: %s, Week: %s, %s)' \
85 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
86
87
89 """
90 Scans the game directory inside footage_play_dir and returns a list
91 of plays that haven't been sliced yet. In particular, a play is only
92 considered sliced if the following file is readable, assuming {playid}
93 is its play id::
94
95 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
96
97 All plays for the game given that don't fit this criteria will be
98 returned in the list.
99
100 If the list is empty, then all plays for the game have been sliced.
101
102 If dry_run is true, then only the first 10 plays of the game are
103 sliced.
104 """
105 ps = plays(gobj)
106 outdir = _play_path(footage_play_dir, gobj)
107
108 unsliced = []
109 for i, p in enumerate(ps.values()):
110 if dry_run and i >= 10:
111 break
112 pid = p.idstr()
113 if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK):
114 unsliced.append(p)
115 return unsliced
116
117
118 -def slice(footage_play_dir, full_footage_file, gobj, threads=4, dry_run=False):
119 """
120 Uses ffmpeg to slice the given footage file into play-by-play pieces.
121 The full_footage_file should point to a full game downloaded with
122 nflvid-footage and gobj should be the corresponding nflgame.game.Game
123 object.
124
125 The footage_play_dir is where the pieces will be saved::
126
127 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
128
129 This function will not duplicate work. If a video file exists for
130 a particular play, then slice will not regenerate it.
131
132 Note that this function uses an eventlet green pool to run multiple
133 ffmpeg instances simultaneously. The maximum number of threads to
134 use is specified by threads. This function only terminates when all
135 threads have finished processing.
136
137 If dry_run is true, then only the first 10 plays of the game are
138 sliced.
139 """
140 outdir = _play_path(footage_play_dir, gobj)
141 if not os.access(outdir, os.R_OK):
142 os.makedirs(outdir)
143
144 pool = eventlet.greenpool.GreenPool(threads)
145 for p in unsliced_plays(footage_play_dir, gobj, dry_run):
146 pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p)
147 pool.waitall()
148
149
150 -def slice_play(footage_play_dir, full_footage_file, gobj, play):
151 """
152 This is just like slice, but it only slices the play provided.
153 In typical cases, slice should be used since it makes sure not
154 to duplicate work.
155
156 This function will not check if the play-by-play directory for
157 gobj has been created.
158 """
159 outdir = _play_path(footage_play_dir, gobj)
160 st = play.start
161 start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli)
162 outpath = path.join(outdir, '%s.mp4' % play.idstr())
163
164 cmd = ['ffmpeg',
165 '-ss', start_time,
166 '-i', full_footage_file]
167 if play.duration is not None:
168 cmd += ['-t', '%d' % play.duration]
169 cmd += ['-map', '0',
170 '-strict', '-2',
171 outpath]
172 _run_command(cmd)
173
174
175 -def download(footage_dir, gobj, quality='1600', dry_run=False):
176 """
177 Starts an ffmpeg process to download the full footage of the given
178 game with the quality provided. The qualities available are:
179 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best.
180
181 The footage will be saved to the following path::
182
183 footage_dir/{eid}-{gamekey}.mp4
184
185 If footage is already at that path, then a LookupError is raised.
186
187 A full game's worth of footage at a quality of 1600 is about 2GB.
188 """
189 fp = _full_path(footage_dir, gobj)
190 if os.access(fp, os.R_OK):
191 raise LookupError('Footage path "%s" already exists.' % fp)
192
193 url = footage_url(gobj, quality)
194
195
196
197
198 resp, _ = httplib2.Http().request(url, 'HEAD')
199 if resp['status'] != '200':
200 print >> sys.stderr, 'BAD URL (http status %s) for game %s: %s' \
201 % (resp['status'], _nice_game(gobj), url)
202 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
203 return
204
205 cmd = ['ffmpeg', '-i', url]
206 if dry_run:
207 cmd += ['-t', '30']
208 cmd += ['-strict', '-2', fp]
209
210 print >> sys.stderr, 'Downloading game %s %s' \
211 % (gobj.eid, _nice_game(gobj))
212 if not _run_command(cmd):
213 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
214 else:
215 print >> sys.stderr, 'DONE with game %s' % _nice_game(gobj)
216
217
219 try:
220 p = subprocess.Popen(cmd,
221 stdout=subprocess.PIPE,
222 stderr=subprocess.STDOUT)
223 output = p.communicate()[0].strip()
224
225 if p.returncode > 0:
226 err = subprocess.CalledProcessError(p.returncode, cmd)
227 err.output = output
228 raise err
229 except subprocess.CalledProcessError, e:
230 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n')))
231 print >> sys.stderr, "Could not run '%s' (exit code %d):\n%s" \
232 % (' '.join(cmd), e.returncode, indent(e.output))
233 return False
234 except OSError, e:
235 print >> sys.stderr, "Could not run '%s' (errno: %d): %s" \
236 % (' '.join(cmd), e.errno, e.strerror)
237 return False
238 return True
239
240
242 """
243 Returns an ordered dictionary of all plays for a particular game.
244
245 The game must be a nflgame.game.Game object.
246
247 If there is a problem retrieving the data, None is returned.
248
249 If the game is over, then the XML data is saved to disk.
250 """
251 if gobj.game_over() and gobj.eid in __play_cache:
252 return __play_cache[gobj.eid]
253
254 rawxml = _get_xml_data((gobj.eid, gobj.gamekey))
255 ps = _xml_play_data(rawxml)
256 if ps is None:
257 return None
258 __play_cache[gobj.eid] = ps
259
260
261 if gobj.game_over():
262 fp = _xmlf % (gobj.eid, gobj.gamekey)
263 try:
264 print >> gzip.open(fp, 'w+'), rawxml,
265 except IOError:
266 print >> sys.stderr, 'Could not cache XML data. Please make ' \
267 '"%s" writable.' % path.dirname(fp)
268 return ps
269
270
271 -def play(gobj, playid):
272 """
273 Returns a Play object given a game and a play id. The game must be
274 a nflgame.game.Game object.
275
276 If a play with the given id does not exist, None is returned.
277 """
278 return plays(gobj).get(playid, None)
279
280
281 -class Play (object):
282 """
283 Represents a single play with meta data that ties it to game footage.
284 The footage_start corresponds to the 'ArchiveTCIN', which is when
285 the play starts. Since there is no record of when a play stops, the
286 duration is computed by subtracting the start time from the start
287 time of the next play. If it's the last play recorded, then the
288 duration is None.
289
290 The play id is the foreign key that maps to play data stored in nflgame.
291 """
292 - def __init__(self, start, duration, playid):
293 self.start, self.duration, self.playid = start, duration, playid
294
296 """Returns a string play id padded with zeroes."""
297 return '%04d' % int(self.playid)
298
300 return '(%s, %s, %s)' % (self.playid, self.start, self.duration)
301
302
304 """
305 Represents a footage time point, in the format HH:MM:SS:MM
306 """
308 self.point = point
309
310 try:
311 parts = map(int, self.point.split(':'))
312 except ValueError:
313 assert False, 'Bad play time format: %s' % self.point
314
315 if len(parts) != 4:
316 assert False, 'Expected 4 parts but got %d in: %s' \
317 % (len(parts), self.point)
318
319 self.hh, self.mm, self.ss, self.milli = parts
320
321
322 self.milli *= 10
323
325 """
326 Returns this time point rounded to the nearest second.
327 """
328 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
329 if self.milli >= 50:
330 secs += 1
331 return secs
332
334 """
335 Returns this time point as fractional seconds based on milliseconds.
336 """
337 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
338 secs = (1000 * secs) + self.milli
339 return float(secs) / 1000.0
340
343
345 """
346 Returns the difference rounded to nearest second between
347 two time points. The 'other' time point must take place before the
348 current time point.
349 """
350 assert other <= self, '%s is not <= than %s' % (other, self)
351 return int(round(self.fractional() - other.fractional()))
352
355
356
358 """
359 Parses the XML raw data given into an ordered dictionary of Play
360 objects. The dictionary is keyed by play id.
361 """
362 if data is None:
363 return None
364
365
366
367 rows = []
368 for row in bs4.BeautifulSoup(data).find_all('row'):
369 playid = row.find('id')
370 if not playid or not row.find('catin'):
371 continue
372 playid = playid.get_text().strip()
373
374 start = row.find('archivetcin')
375 if not start:
376 continue
377 start = PlayTime(start.get_text().strip())
378
379
380 if len(rows) > 0 and start < rows[-1][1]:
381 continue
382 rows.append((playid, start))
383
384 d = OrderedDict()
385 for i, (playid, start) in enumerate(rows):
386 duration = None
387 if i < len(rows) - 1:
388 duration = rows[i+1][1] - start
389 d[playid] = Play(start, duration, playid)
390 return d
391
392
394 """
395 Returns the XML play data corresponding to the game given. A game must
396 be specified as a tuple: the first element should be an eid and the second
397 element should be a game key. For example, ('2012102108', '55604').
398
399 If the XML data is already on disk, it is read, decompressed and returned.
400
401 Otherwise, the XML data is downloaded from the NFL web site. If the data
402 doesn't exist yet or there was an error, _get_xml_data returns None.
403
404 If game is None, then the XML data is read from the file at fpath.
405 """
406 assert game is not None or fpath is not None
407
408 if fpath is not None:
409 return gzip.open(fpath).read()
410
411 fpath = _xmlf % (game[0], game[1])
412 if os.access(fpath, os.R_OK):
413 return gzip.open(fpath).read()
414 try:
415 year = int(game[0][0:4])
416 month = int(game[0][4:6])
417 if month <= 3:
418 year -= 1
419 u = _xml_base_url % (year, game[1])
420 return urllib2.urlopen(u, timeout=10).read()
421 except urllib2.HTTPError, e:
422 print >> sys.stderr, e
423 except socket.timeout, e:
424 print >> sys.stderr, e
425 return None
426