1 """
2 Introduction
3 ============
4 A simple library to download, slice and search NFL game footage on a
5 play-by-play basis.
6
7 This library comes with preloaded play-by-play meta data, which describes the
8 start time of each play in the game footage. However, the actual footage does
9 not come with this library and is not released by me. This package therefore
10 provides utilities to batch download NFL Game Footage from the original source.
11
12 Once game footage is downloaded, you can use this library to search plays and
13 construct a playlist to play in any video player.
14 """
15
16 import gzip
17 import os
18 import os.path as path
19 import socket
20 import sys
21 import urllib2
22
23 import bs4
24
25 import eventlet
26 httplib2 = eventlet.import_patched('httplib2')
27 import eventlet.green.subprocess as subprocess
28
29 from nflgame import OrderedDict
30
31 _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s-%s.xml.gz')
32 _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml'
33
34 _footage_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \
35 '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_1_%s.mp4.m3u8'
36
37 __play_cache = {}
38
39
45
46
58
59
61 return path.join(_game_path(footage_dir, g), 'full.mp4')
62
63
65 return path.join(footage_dir, '%s-%s' % (g.eid, g.gamekey))
66
67
69 return '(Season: %s, Week: %s, %s)' \
70 % (gobj.schedule['year'], gobj.schedule['week'], gobj)
71
72
86
87
88 -def download(footage_dir, gobj, quality='1600', dry_run=False):
89 """
90 Starts an ffmpeg process to download the full footage of the given
91 game with the quality provided. The qualities available are:
92 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best.
93
94 The footage will be saved to the following path::
95
96 footage_dir/{eid}-{gamekey}/full.mp4
97
98 If footage is already at that path, then a LookupError is raised.
99
100 A full game's worth of footage at a quality of 1600 is about 2GB.
101 """
102 fp = _full_path(footage_dir, gobj)
103 if os.access(fp, os.R_OK):
104 raise LookupError('Footage path "%s" already exists.' % fp)
105
106 url = footage_url(gobj, quality)
107
108
109
110
111 resp, _ = httplib2.Http().request(url, 'HEAD')
112 if resp['status'] != '200':
113 print >> sys.stderr, 'BAD URL (http status %s) for game %s: %s' \
114 % (resp['status'], _nice_game(gobj), url)
115 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
116 return
117
118 os.makedirs(_game_path(footage_dir, gobj))
119 cmd = ['ffmpeg', '-i', url]
120 if dry_run:
121 cmd += ['-t', '30']
122 cmd += ['-strict', '-2', fp]
123 try:
124 print >> sys.stderr, 'Downloading game %s %s' \
125 % (gobj.eid, _nice_game(gobj))
126 p = subprocess.Popen(cmd,
127 stdout=subprocess.PIPE,
128 stderr=subprocess.STDOUT)
129 output = p.communicate()[0].strip()
130
131 if p.returncode > 0:
132 err = subprocess.CalledProcessError(p.returncode, cmd)
133 err.output = output
134 raise err
135
136 print >> sys.stderr, 'DONE with game %s' % _nice_game(gobj)
137 except subprocess.CalledProcessError, e:
138 indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n')))
139 print >> sys.stderr, "Could not run '%s' (exit code %d):\n%s" \
140 % (' '.join(cmd), e.returncode, indent(e.output))
141 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
142 except OSError, e:
143 print >> sys.stderr, "Could not run '%s' (errno: %d): %s" \
144 % (' '.join(cmd), e.errno, e.strerror)
145 print >> sys.stderr, 'FAILED to download game %s' % _nice_game(gobj)
146
147
149 """
150 Returns an ordered dictionary of all plays for a particular game.
151
152 The game must be a nflgame.game.Game object.
153
154 If there is a problem retrieving the data, None is returned.
155
156 If the game is over, then the XML data is saved to disk.
157 """
158 if gobj.game_over() and gobj.eid in __play_cache:
159 return __play_cache[gobj.eid]
160
161 rawxml = _get_xml_data((gobj.eid, gobj.gamekey))
162 ps = _xml_play_data(rawxml)
163 if ps is None:
164 return None
165 __play_cache[gobj.eid] = ps
166
167
168 if gobj.game_over():
169 fp = _xmlf % (gobj.eid, gobj.gamekey)
170 try:
171 print >> gzip.open(fp, 'w+'), rawxml,
172 except IOError:
173 print >> sys.stderr, 'Could not cache XML data. Please make ' \
174 '"%s" writable.' % path.dirname(fp)
175 return ps
176
177
178 -def play(gobj, playid):
179 """
180 Returns a Play object given a game and a play id. The game must be
181 a nflgame.game.Game object.
182
183 If a play with the given id does not exist, None is returned.
184 """
185 return plays(gobj).get(playid, None)
186
187
188 -class Play (object):
189 """
190 Represents a single play with meta data that ties it to game footage.
191 The footage_start corresponds to the 'ArchiveTCIN', which is when
192 the play starts. Since there is no record of when a play stops, the
193 duration is computed by subtracting the start time from the start
194 time of the next play. If it's the last play recorded, then the
195 duration is None.
196
197 The play id is the foreign key that maps to play data stored in nflgame.
198 """
199 - def __init__(self, start, duration, playid):
200 self.start, self.duration, self.playid = start, duration, playid
201
203 return '(%s, %s, %s)' % (self.playid, self.start, self.duration)
204
205
207 """
208 Represents a footage time point, in the format HH:MM:SS:MM
209 """
211 self.point = point
212
213 try:
214 parts = map(int, self.point.split(':'))
215 except ValueError:
216 assert False, 'Bad play time format: %s' % self.point
217
218 if len(parts) != 4:
219 assert False, 'Expected 4 parts but got %d in: %s' \
220 % (len(parts), self.point)
221
222 self.hh, self.mm, self.ss, self.milli = parts
223
224
225 self.milli *= 10
226
228 """
229 Returns this time point rounded to the nearest second.
230 """
231 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
232 if self.milli >= 50:
233 secs += 1
234 return secs
235
237 """
238 Returns this time point as fractional seconds based on milliseconds.
239 """
240 secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss
241 secs = (1000 * secs) + self.milli
242 return float(secs) / 1000.0
243
246
248 """
249 Returns the difference rounded to nearest second between
250 two time points. The 'other' time point must take place before the
251 current time point.
252 """
253 assert other <= self, '%s is not <= than %s' % (other, self)
254 return int(round(self.fractional() - other.fractional()))
255
258
259
261 """
262 Parses the XML raw data given into an ordered dictionary of Play
263 objects. The dictionary is keyed by play id.
264 """
265 if data is None:
266 return None
267
268
269
270 rows = []
271 for row in bs4.BeautifulSoup(data).find_all('row'):
272 playid = row.find('id')
273 if not playid or not row.find('CATIN'):
274 continue
275 playid = playid.text().strip()
276
277 start = row.find('ArchiveTCIN')
278 if not start:
279 continue
280 start = PlayTime(start.text().strip())
281
282
283 if len(rows) > 0 and start < rows[-1][1]:
284 continue
285 rows.append((playid, start))
286
287 d = OrderedDict()
288 for i, (playid, start) in enumerate(rows):
289 duration = None
290 if i < len(rows) - 1:
291 duration = rows[i+1][1] - start
292 d[playid] = Play(start, duration, playid)
293 return d
294
295
297 """
298 Returns the XML play data corresponding to the game given. A game must
299 be specified as a tuple: the first element should be an eid and the second
300 element should be a game key. For example, ('2012102108', '55604').
301
302 If the XML data is already on disk, it is read, decompressed and returned.
303
304 Otherwise, the XML data is downloaded from the NFL web site. If the data
305 doesn't exist yet or there was an error, _get_xml_data returns None.
306
307 If game is None, then the XML data is read from the file at fpath.
308 """
309 assert game is not None or fpath is not None
310
311 if fpath is not None:
312 return gzip.open(fpath).read()
313
314 fpath = _xmlf % (game[0], game[1])
315 if os.access(fpath, os.R_OK):
316 return gzip.open(fpath).read()
317 try:
318 year = int(game[0][0:4])
319 month = int(game[0][4:6])
320 if month <= 3:
321 year -= 1
322 u = _xml_base_url % (year, game[1])
323 return urllib2.urlopen(u, timeout=10).read()
324 except urllib2.HTTPError, e:
325 print >> sys.stderr, e
326 except socket.timeout, e:
327 print >> sys.stderr, e
328 return None
329