1 """
2 Introduction
3 ============
4 A simple library to download, slice and search NFL game footage on a
5 play-by-play basis.
6
7 This library comes with preloaded play-by-play meta data, which describes the
8 start time of each play in the game footage. However, the actual footage does
9 not come with this library and is not released by me. This package therefore
10 provides utilities to batch download NFL Game Footage from the original source.
11
12 Once game footage is downloaded, you can use this library to search plays and
13 construct a playlist to play in any video player.
14 """
15
16 import gzip
17 import os
18 import os.path as path
19 import socket
20 import sys
21 import urllib2
22
23 import bs4
24
25 import eventlet
26 httplib2 = eventlet.import_patched('httplib2')
27 import eventlet.green.subprocess as subprocess
28
29 from nflgame import OrderedDict
30
31 _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz')
32 _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml'
33
34 _footage_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \
35 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8'
36
37 __play_cache = {}
38
39
45
46
58
59
73
74
76 return path.join(footage_dir, '%s-%s.mp4' % (g.eid, g.gamekey))
77
78
80 return path.join(footage_play_dir, '%s-%s' % (g.eid, g.gamekey))
81
82
84 return '(Season: %s, Week: %s, %s)' \
85 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
86
87
89 """
90 Scans the game directory inside footage_play_dir and returns a list
91 of plays that haven't been sliced yet. In particular, a play is only
92 considered sliced if the following file is readable, assuming {playid}
93 is its play id::
94
95 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
96
97 All plays for the game given that don't fit this criteria will be
98 returned in the list.
99
100 If the list is empty, then all plays for the game have been sliced.
101 Alternatively, None can be returned if there was a problem retrieving
102 the play-by-play meta data.
103
104 If dry_run is true, then only the first 10 plays of the game are
105 sliced.
106 """
107 ps = plays(gobj)
108 outdir = _play_path(footage_play_dir, gobj)
109
110 unsliced = []
111 if ps is None:
112 return None
113 for i, p in enumerate(ps.values()):
114 if dry_run and i >= 10:
115 break
116 pid = p.idstr()
117 if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK):
118 unsliced.append(p)
119 return unsliced
120
121
122 -def slice(footage_play_dir, full_footage_file, gobj, threads=4, dry_run=False):
123 """
124 Uses ffmpeg to slice the given footage file into play-by-play pieces.
125 The full_footage_file should point to a full game downloaded with
126 nflvid-footage and gobj should be the corresponding nflgame.game.Game
127 object.
128
129 The footage_play_dir is where the pieces will be saved::
130
131 {footage_play_dir}/{eid}-{gamekey}/{playid}.mp4
132
133 This function will not duplicate work. If a video file exists for
134 a particular play, then slice will not regenerate it.
135
136 Note that this function uses an eventlet green pool to run multiple
137 ffmpeg instances simultaneously. The maximum number of threads to
138 use is specified by threads. This function only terminates when all
139 threads have finished processing.
140
141 If dry_run is true, then only the first 10 plays of the game are
142 sliced.
143 """
144 outdir = _play_path(footage_play_dir, gobj)
145 if not os.access(outdir, os.R_OK):
146 os.makedirs(outdir)
147
148 pool = eventlet.greenpool.GreenPool(threads)
149 for p in unsliced_plays(footage_play_dir, gobj, dry_run) or []:
150 pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p)
151 pool.waitall()
152
153
154 -def slice_play(footage_play_dir, full_footage_file, gobj, play):
155 """
156 This is just like slice, but it only slices the play provided.
157 In typical cases, slice should be used since it makes sure not
158 to duplicate work.
159
160 This function will not check if the play-by-play directory for
161 gobj has been created.
162 """
163 outdir = _play_path(footage_play_dir, gobj)
164 st = play.start
165 start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli)
166 outpath = path.join(outdir, '%s.mp4' % play.idstr())
167
168 cmd = ['ffmpeg',
169 '-ss', start_time,
170 '-i', full_footage_file]
171 if play.duration is not None:
172 cmd += ['-t', '%d' % play.duration]
173 cmd += ['-map', '0',
174 '-strict', '-2',
175 outpath]
176 _run_command(cmd)
177
178
179 -def download(footage_dir, gobj, quality='1600', dry_run=False):
180 """
181 Starts an ffmpeg process to download the full footage of the given
182 game with the quality provided. The qualities available are:
183 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best.
184
185 The footage will be saved to the following path::
186
187 footage_dir/{eid}-{gamekey}.mp4
188
189 If footage is already at that path, then a LookupError is raised.
190
191 A full game's worth of footage at a quality of 1600 is about 2GB.
192 """
193 fp = _full_path(footage_dir, gobj)
194 if os.access(fp, os.R_OK):
195 raise LookupError('Footage path "%s" already exists.' % fp)
196
197 url = footage_url(gobj, quality)
198
199
200
201
202 resp, _ = httplib2.Http().request(url, 'HEAD')
203 if resp['status'] != '200':
204 print >> sys.stderr, 'BAD URL (http status %s) for game %s: %s' \
205 % (resp['status'], _nice_game(gobj), url)
206 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
207 return
208
209 cmd = ['ffmpeg', '-i', url]
210 if dry_run:
211 cmd += ['-t', '30']
212 cmd += ['-strict', '-2', fp]
213
214 print >> sys.stderr, 'Downloading game %s %s' \
215 % (gobj.eid, _nice_game(gobj))
216 if not _run_command(cmd):
217 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
218 else:
219 print >> sys.stderr, 'DONE with game %s' % _nice_game(gobj)
220
221
223 try:
224 p = subprocess.Popen(cmd,
225 stdout=subprocess.PIPE,
226 stderr=subprocess.STDOUT)
227 output = p.communicate()[0].strip()
228
229 if p.returncode > 0:
230 err = subprocess.CalledProcessError(p.returncode, cmd)
231 err.output = output
232 raise err
233 except subprocess.CalledProcessError, e:
234 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n')))
235 print >> sys.stderr, "Could not run '%s' (exit code %d):\n%s" \
236 % (' '.join(cmd), e.returncode, indent(e.output))
237 return False
238 except OSError, e:
239 print >> sys.stderr, "Could not run '%s' (errno: %d): %s" \
240 % (' '.join(cmd), e.errno, e.strerror)
241 return False
242 return True
243
244
246 """
247 Returns an ordered dictionary of all plays for a particular game.
248
249 The game must be a nflgame.game.Game object.
250
251 If there is a problem retrieving the data, None is returned.
252
253 If the game is over, then the XML data is saved to disk.
254 """
255 if gobj.game_over() and gobj.eid in __play_cache:
256 return __play_cache[gobj.eid]
257
258 rawxml = _get_xml_data((gobj.eid, gobj.gamekey))
259 ps = _xml_play_data(rawxml)
260 if ps is None:
261 return None
262 if len(ps) == 0:
263 print >> sys.stderr, 'Could not find ArchiveTCIN field in XML data. ' \
264 'This field provides the start time of each play.'
265 return None
266 __play_cache[gobj.eid] = ps
267
268
269 if gobj.game_over():
270 fp = _xmlf % (gobj.eid, gobj.gamekey)
271 try:
272 print >> gzip.open(fp, 'w+'), rawxml,
273 except IOError:
274 print >> sys.stderr, 'Could not cache XML data. Please make ' \
275 '"%s" writable.' % path.dirname(fp)
276 return ps
277
278
279 -def play(gobj, playid):
280 """
281 Returns a Play object given a game and a play id. The game must be
282 a nflgame.game.Game object.
283
284 If a play with the given id does not exist, None is returned.
285 """
286 return plays(gobj).get(playid, None)
287
288
289 -class Play (object):
290 """
291 Represents a single play with meta data that ties it to game footage.
292 The footage_start corresponds to the 'ArchiveTCIN', which is when
293 the play starts. Since there is no record of when a play stops, the
294 duration is computed by subtracting the start time from the start
295 time of the next play. If it's the last play recorded, then the
296 duration is None.
297
298 The play id is the foreign key that maps to play data stored in nflgame.
299 """
300 - def __init__(self, start, duration, playid):
301 self.start, self.duration, self.playid = start, duration, playid
302
304 """Returns a string play id padded with zeroes."""
305 return '%04d' % int(self.playid)
306
308 return '(%s, %s, %s)' % (self.playid, self.start, self.duration)
309
310
312 """
313 Represents a footage time point, in the format HH:MM:SS:MM
314 """
316 self.point = point
317
318 try:
319 parts = map(int, self.point.split(':'))
320 except ValueError:
321 assert False, 'Bad play time format: %s' % self.point
322
323 if len(parts) != 4:
324 assert False, 'Expected 4 parts but got %d in: %s' \
325 % (len(parts), self.point)
326
327 self.hh, self.mm, self.ss, self.milli = parts
328
329
330 self.milli *= 10
331
333 """
334 Returns this time point rounded to the nearest second.
335 """
336 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
337 if self.milli >= 50:
338 secs += 1
339 return secs
340
342 """
343 Returns this time point as fractional seconds based on milliseconds.
344 """
345 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
346 secs = (1000 * secs) + self.milli
347 return float(secs) / 1000.0
348
351
353 """
354 Returns the difference rounded to nearest second between
355 two time points. The 'other' time point must take place before the
356 current time point.
357 """
358 assert other <= self, '%s is not <= than %s' % (other, self)
359 return int(round(self.fractional() - other.fractional()))
360
363
364
366 """
367 Parses the XML raw data given into an ordered dictionary of Play
368 objects. The dictionary is keyed by play id.
369 """
370 if data is None:
371 return None
372
373
374
375 rows = []
376 for row in bs4.BeautifulSoup(data).find_all('row'):
377 playid = row.find('id')
378 if not playid or not row.find('catin'):
379 continue
380 playid = playid.get_text().strip()
381
382 start = row.find('archivetcin')
383 if not start:
384 continue
385 start = PlayTime(start.get_text().strip())
386
387
388 if len(rows) > 0 and start < rows[-1][1]:
389 continue
390 rows.append((playid, start))
391
392 d = OrderedDict()
393 for i, (playid, start) in enumerate(rows):
394 duration = None
395 if i < len(rows) - 1:
396 duration = rows[i+1][1] - start
397 d[playid] = Play(start, duration, playid)
398 return d
399
400
402 """
403 Returns the XML play data corresponding to the game given. A game must
404 be specified as a tuple: the first element should be an eid and the second
405 element should be a game key. For example, ('2012102108', '55604').
406
407 If the XML data is already on disk, it is read, decompressed and returned.
408
409 Otherwise, the XML data is downloaded from the NFL web site. If the data
410 doesn't exist yet or there was an error, _get_xml_data returns None.
411
412 If game is None, then the XML data is read from the file at fpath.
413 """
414 assert game is not None or fpath is not None
415
416 if fpath is not None:
417 return gzip.open(fpath).read()
418
419 fpath = _xmlf % (game[0], game[1])
420 if os.access(fpath, os.R_OK):
421 return gzip.open(fpath).read()
422 try:
423 year = int(game[0][0:4])
424 month = int(game[0][4:6])
425 if month <= 3:
426 year -= 1
427 u = _xml_base_url % (year, game[1])
428 return urllib2.urlopen(u, timeout=10).read()
429 except urllib2.HTTPError, e:
430 print >> sys.stderr, e
431 except socket.timeout, e:
432 print >> sys.stderr, e
433 return None
434