1 from collections import namedtuple
2 import os
3 import os.path as path
4 import gzip
5 import json
6 import sys
7 import urllib2
8
9 from nflgame import OrderedDict
10 import nflgame.player
11 import nflgame.schedule
12 import nflgame.seq
13 import nflgame.statmap
14
15 _MAX_INT = sys.maxint
16
17 _jsonf = path.join(path.split(__file__)[0], 'gamecenter-json', '%s.json.gz')
18 _json_base_url = "http://www.nfl.com/liveupdate/game-center/%s/%s_gtd.json"
19
20 GameDiff = namedtuple('GameDiff', ['before', 'after', 'plays', 'players'])
21 """
22 Represents the difference between two points in time of the same game
23 in terms of plays and player statistics.
24 """
25
26 TeamStats = namedtuple('TeamStats',
27 ['first_downs', 'total_yds', 'passing_yds',
28 'rushing_yds', 'penalty_cnt', 'penalty_yds',
29 'turnovers', 'punt_cnt', 'punt_yds', 'punt_avg',
30 'pos_time'])
31 """A collection of team statistics for an entire game."""
35 """
36 Represents field position.
37
38 The representation here is an integer offset where the 50 yard line
39 corresponds to '0'. Being in the own territory corresponds to a negative
40 offset while being in the opponent's territory corresponds to a positive
41 offset.
42
43 e.g., NE has the ball on the NE 45, the offset is -5.
44 e.g., NE has the ball on the NYG 2, the offset is 48.
45
46 This representation allows for gains in any particular play to be added
47 to the field offset to get the new field position as the result of the
48 play.
49 """
50 - def __new__(cls, pos_team=None, yardline=None, offset=None):
51 if not yardline and offset is None:
52 return None
53 return object.__new__(cls)
54
55 - def __init__(self, pos_team=None, yardline=None, offset=None):
56 """
57 pos_team is the team on offense, and yardline is a string formatted
58 like 'team-territory yard-line'. e.g., "NE 32".
59
60 An offset can be given directly by specifying an integer for offset.
61 """
62 if isinstance(offset, int):
63 self.offset = offset
64 return
65 if yardline == '50':
66 self.offset = 0
67 return
68
69 territory, yd_str = yardline.split()
70 yd = int(yd_str)
71 if territory == pos_team:
72 self.offset = -(50 - yd)
73 else:
74 self.offset = 50 - yd
75
77 if isinstance(other, int):
78 return cmp(self.offset, other)
79 return cmp(self.offset, other.offset)
80
82 if self.offset > 0:
83 return 'OPP %d' % (50 - self.offset)
84 elif self.offset < 0:
85 return 'OWN %d' % (50 + self.offset)
86 else:
87 return 'MIDFIELD'
88
90 """
91 Returns a new field position with the yards added to self.
92 Yards may be negative.
93 """
94 newoffset = max(-50, min(50, self.offset + yards))
95 return FieldPosition(offset=newoffset)
96
99 """
100 Represents the amount of time a drive lasted in (minutes, seconds).
101 """
103 self.clock = clock
104
105 try:
106 self.minutes, self.seconds = map(int, self.clock.split(':'))
107 except ValueError:
108 self.minutes, self.seconds = 0, 0
109
111 """
112 Returns the total number of seconds that this possession lasted for.
113 """
114 return self.seconds + self.minutes * 60
115
117 a, b = (self.minutes, self.seconds), (other.minutes, other.seconds)
118 return cmp(a, b)
119
127
136
139
142 """
143 Represents the current time in a game. Namely, it keeps track of the
144 quarter and clock time. Also, GameClock can represent whether
145 the game hasn't started yet, is half time or if it's over.
146 """
148 self.qtr = qtr
149 self.clock = clock
150
151 try:
152 self.__minutes, self.__seconds = map(int, self.clock.split(':'))
153 except ValueError:
154 self.__minutes, self.__seconds = 0, 0
155 except AttributeError:
156 self.__minutes, self.__seconds = 0, 0
157 try:
158 self.__qtr = int(self.qtr)
159 if self.__qtr >= 3:
160 self.__qtr += 1
161 except ValueError:
162 if self.is_pregame():
163 self.__qtr = 0
164 elif self.is_halftime():
165 self.__qtr = 3
166 elif self.is_final():
167 self.__qtr = sys.maxint
168 else:
169 assert False, 'Unknown QTR value: "%s"' % self.qtr
170
171 @property
174
175 @quarter.setter
177 if isinstance(value, int):
178 assert value >= 0 and value <= 4
179 self.qtr = str(value)
180 self.__qtr = value
181 else:
182 self.qtr = value
183 self.__qtr = 0
184
186 return self.qtr == 'Pregame'
187
189 return self.qtr == 'Halftime'
190
192 return 'final' in self.qtr.lower()
193
195 if self.__qtr != other.__qtr:
196 return cmp(self.__qtr, other.__qtr)
197 elif self.__minutes != other.__minutes:
198 return cmp(other.__minutes, self.__minutes)
199 return cmp(other.__seconds, self.__seconds)
200
202 """
203 Returns a nicely formatted string indicating the current time of the
204 game. Examples include "Q1 10:52", "Q4 1:25", "Pregame", "Halftime"
205 and "Final".
206 """
207 try:
208 q = int(self.qtr)
209 return 'Q%d %s' % (q, self.clock)
210 except ValueError:
211 return self.qtr
212
213
214 -class Game (object):
215 """
216 Game represents a single pre- or regular-season game. It provides a window
217 into the statistics of every player that played into the game, along with
218 the winner of the game, the score and a list of all the scoring plays.
219 """
220
221 - def __new__(cls, eid=None, fpath=None):
222
223 try:
224 rawData = _get_json_data(eid, fpath)
225 except urllib2.URLError:
226 return None
227 if rawData is None or rawData.strip() == '{}':
228 return None
229 game = object.__new__(cls)
230 game.rawData = rawData
231
232 try:
233 if eid is not None:
234 game.eid = eid
235 game.data = json.loads(game.rawData)[game.eid]
236 else:
237 game.eid = None
238 game.data = json.loads(game.rawData)
239 for k, v in game.data.iteritems():
240 if isinstance(v, dict):
241 game.eid = k
242 game.data = v
243 break
244 assert game.eid is not None
245 except ValueError:
246 return None
247
248 return game
249
250 - def __init__(self, eid=None, fpath=None):
251 """
252 Creates a new Game instance given a game identifier.
253
254 The game identifier is used by NFL.com's GameCenter live update web
255 pages. It is used to construct a URL to download JSON data for the
256 game.
257
258 If the game has been completed, the JSON data will be cached to disk
259 so that subsequent accesses will not re-download the data but instead
260 read it from disk.
261
262 When the JSON data is written to disk, it is compressed using gzip.
263 """
264
265 self.schedule = nflgame.schedule.games_byid.get(self.eid, None)
266
267
268 self.home = self.data['home']['abbr']
269 self.away = self.data['away']['abbr']
270 self.stats_home = _json_team_stats(self.data['home']['stats']['team'])
271 self.stats_away = _json_team_stats(self.data['away']['stats']['team'])
272
273
274 self.gamekey = nflgame.schedule.games_byid[self.eid]['gamekey']
275 self.time = GameClock(self.data['qtr'], self.data['clock'])
276 self.down = _tryint(self.data['down'])
277 self.togo = _tryint(self.data['togo'])
278 self.score_home = int(self.data['home']['score']['T'])
279 self.score_away = int(self.data['away']['score']['T'])
280 for q in (1, 2, 3, 4, 5):
281 for team in ('home', 'away'):
282 score = self.data[team]['score'][str(q)]
283 self.__dict__['score_%s_q%d' % (team, q)] = int(score)
284
285 if not self.game_over():
286 self.winner = None
287 else:
288 if self.score_home > self.score_away:
289 self.winner = self.home
290 self.loser = self.away
291 elif self.score_away > self.score_home:
292 self.winner = self.away
293 self.loser = self.home
294 else:
295 self.winner = '%s/%s' % (self.home, self.away)
296 self.loser = '%s/%s' % (self.home, self.away)
297
298
299 self.scores = []
300 for k in sorted(map(int, self.data['scrsummary'])):
301 play = self.data['scrsummary'][str(k)]
302 s = '%s - Q%d - %s - %s' \
303 % (play['team'], play['qtr'], play['type'], play['desc'])
304 self.scores.append(s)
305
306
307 if self.game_over() and not os.access(_jsonf % eid, os.R_OK):
308 self.save()
309
311 """Returns true if team (i.e., 'NE') is the home team."""
312 return team == self.home
313
315 """Returns the year of the season this game belongs to."""
316 year = int(self.eid[0:4])
317 month = int(self.eid[4:6])
318 if month <= 3:
319 year -= 1
320 return year
321
323 """game_over returns true if the game is no longer being played."""
324 return self.time.is_final()
325
327 """playing returns true if the game is currently being played."""
328 return not self.time.is_pregame() and not self.time.is_final()
329
330 - def save(self, fpath=None):
331 """
332 Save the JSON data to fpath. This is done automatically if the
333 game is over.
334 """
335 if fpath is None:
336 fpath = _jsonf % self.eid
337 try:
338 print >> gzip.open(fpath, 'w+'), self.rawData,
339 except IOError:
340 print >> sys.stderr, "Could not cache JSON data. Please " \
341 "make '%s' writable." \
342 % os.path.dirname(fpath)
343
345 """
346 Returns a string of the score of the game.
347 e.g., "NE (32) vs. NYG (0)".
348 """
349 return '%s (%d) at %s (%d)' \
350 % (self.away, self.score_away, self.home, self.score_home)
351
353 """
354 Returns a GenPlayers sequence of player statistics that combines
355 game statistics and play statistics by taking the max value of
356 each corresponding statistic.
357
358 This is useful when accuracy is desirable. Namely, using only
359 play-by-play data or using only game statistics can be unreliable.
360 That is, both are inconsistently correct.
361
362 Taking the max values of each statistic reduces the chance of being
363 wrong (particularly for stats that are in both play-by-play data
364 and game statistics), but does not eliminate them.
365 """
366 game_players = list(self.players)
367 play_players = list(self.drives.plays().players())
368 max_players = OrderedDict()
369
370
371
372
373
374
375 for pplay in play_players:
376 newp = nflgame.player.GamePlayerStats(pplay.playerid,
377 pplay.name, pplay.home,
378 pplay.team)
379 maxstats = {}
380 for stat, val in pplay._stats.iteritems():
381 maxstats[stat] = val
382
383 newp._overwrite_stats(maxstats)
384 max_players[pplay.playerid] = newp
385
386 for newp in max_players.itervalues():
387 for pgame in game_players:
388 if pgame.playerid != newp.playerid:
389 continue
390
391 maxstats = {}
392 for stat, val in pgame._stats.iteritems():
393 maxstats[stat] = max([val,
394 newp._stats.get(stat, -_MAX_INT)])
395
396 newp._overwrite_stats(maxstats)
397 break
398 return nflgame.seq.GenPlayerStats(max_players)
399
401 if name == 'players':
402 self.__players = _json_game_player_stats(self, self.data)
403 self.players = nflgame.seq.GenPlayerStats(self.__players)
404 return self.players
405 if name == 'drives':
406 self.__drives = _json_drives(self, self.home, self.data['drives'])
407 self.drives = nflgame.seq.GenDrives(self.__drives)
408 return self.drives
409
411 return diff(other, self)
412
415
416
417 -def diff(before, after):
418 """
419 Returns the difference between two points of time in a game in terms of
420 plays and player statistics. The return value is a GameDiff namedtuple
421 with two attributes: plays and players. Each contains *only* the data
422 that is in the after game but not in the before game.
423
424 This is useful for sending alerts where you're guaranteed to see each
425 play statistic only once (assuming NFL.com behaves itself).
426 """
427 assert after.eid == before.eid
428
429 plays = []
430 after_plays = list(after.drives.plays())
431 before_plays = list(before.drives.plays())
432 for play in after_plays:
433 if play not in before_plays:
434 plays.append(play)
435
436
437
438
439
440
441 _players = OrderedDict()
442 after_players = list(after.max_player_stats())
443 before_players = list(before.max_player_stats())
444 for aplayer in after_players:
445 has_before = False
446 for bplayer in before_players:
447 if aplayer.playerid == bplayer.playerid:
448 has_before = True
449 pdiff = aplayer - bplayer
450 if pdiff is not None:
451 _players[aplayer.playerid] = pdiff
452 if not has_before:
453 _players[aplayer.playerid] = aplayer
454 players = nflgame.seq.GenPlayerStats(_players)
455
456 return GameDiff(before=before, after=after, plays=plays, players=players)
457
458
459 -class Drive (object):
460 """
461 Drive represents a single drive in an NFL game. It contains a list
462 of all plays that happened in the drive, in chronological order.
463 It also contains meta information about the drive such as the start
464 and stop times and field position, length of possession, the number
465 of first downs and a short descriptive string of the result of the
466 drive.
467
468 """
469 - def __init__(self, game, drive_num, home_team, data):
470 if data is None:
471 return
472 self.game = game
473 self.drive_num = drive_num
474 self.team = data['posteam']
475 self.home = self.team == home_team
476 self.first_downs = int(data['fds'])
477 self.result = data['result']
478 self.penalty_yds = int(data['penyds'])
479 self.total_yds = int(data['ydsgained'])
480 self.pos_time = PossessionTime(data['postime'])
481 self.play_cnt = int(data['numplays'])
482 self.field_start = FieldPosition(self.team, data['start']['yrdln'])
483 self.time_start = GameClock(data['start']['qtr'],
484 data['start']['time'])
485
486
487
488 if data['end']['yrdln'].strip():
489 self.field_end = FieldPosition(self.team, data['end']['yrdln'])
490 else:
491 self.field_end = None
492 playids = sorted(map(int, data['plays'].keys()), reverse=True)
493 for pid in playids:
494 yrdln = data['plays'][str(pid)]['yrdln'].strip()
495 if yrdln:
496 self.field_end = FieldPosition(self.team, yrdln)
497 break
498 if self.field_end is None:
499 self.field_end = FieldPosition(self.team, '50')
500
501
502
503
504
505
506
507 maxq = str(max(map(int, [p['qtr'] for p in data['plays'].values()])))
508 self.time_end = GameClock(maxq, data['end']['time'])
509
510
511
512
513
514 if self.time_end <= self.time_start \
515 and self.time_end.quarter in (1, 3):
516 self.time_end.quarter += 1
517
518 self.__plays = _json_plays(self, data['plays'])
519 self.plays = nflgame.seq.GenPlays(self.__plays)
520
522 """
523 Adds the statistics of two drives together.
524
525 Note that once two drives are added, the following fields
526 automatically get None values: result, field_start, field_end,
527 time_start and time_end.
528 """
529 assert self.team == other.team, \
530 'Cannot add drives from different teams "%s" and "%s".' \
531 % (self.team, other.team)
532 new_drive = Drive(None, 0, '', None)
533 new_drive.team = self.team
534 new_drive.home = self.home
535 new_drive.first_downs = self.first_downs + other.first_downs
536 new_drive.penalty_yds = self.penalty_yds + other.penalty_yds
537 new_drive.total_yds = self.total_yds + other.total_yds
538 new_drive.pos_time = self.pos_time + other.pos_time
539 new_drive.play_cnt = self.play_cnt + other.play_cnt
540 new_drive.__plays = self.__plays + other.__plays
541 new_drive.result = None
542 new_drive.field_start = None
543 new_drive.field_end = None
544 new_drive.time_start = None
545 new_drive.time_end = None
546 return new_drive
547
549 return '%s (Start: %s, End: %s) %s' \
550 % (self.team, self.time_start, self.time_end, self.result)
551
552
553 -class Play (object):
554 """
555 Play represents a single play. It contains a list of all players
556 that participated in the play (including offense, defense and special
557 teams). The play also includes meta information about what down it
558 is, field position, clock time, etc.
559
560 Play objects also contain team-level statistics, such as whether the
561 play was a first down, a fourth down failure, etc.
562 """
563 - def __init__(self, drive, playid, data):
564 self.data = data
565 self.drive = drive
566 self.playid = playid
567 self.team = data['posteam']
568 self.home = self.drive.home
569 self.desc = data['desc']
570 self.note = data['note']
571 self.down = int(data['down'])
572 self.yards_togo = int(data['ydstogo'])
573 self.touchdown = 'touchdown' in self.desc.lower()
574 self._stats = {}
575
576 if not self.team:
577 self.time, self.yardline = None, None
578 else:
579 self.time = GameClock(data['qtr'], data['time'])
580 self.yardline = FieldPosition(self.team, data['yrdln'])
581
582
583
584 if '0' in data['players']:
585 for info in data['players']['0']:
586 if info['statId'] not in nflgame.statmap.idmap:
587 continue
588 statvals = nflgame.statmap.values(info['statId'],
589 info['yards'])
590 for k, v in statvals.iteritems():
591 v = self.__dict__.get(k, 0) + v
592 self.__dict__[k] = v
593 self._stats[k] = v
594
595
596 self.events = _json_play_events(data['players'])
597
598
599
600
601
602 self.__players = _json_play_players(self, data['players'])
603 self.players = nflgame.seq.GenPlayerStats(self.__players)
604 for p in self.players:
605 for k, v in p.stats.iteritems():
606
607
608
609
610 self.__dict__[k] = v
611 self._stats[k] = v
612
614 """Whether a player with id playerid participated in this play."""
615 return playerid in self.__players
616
618 if self.team:
619 if self.down != 0:
620 return '(%s, %s, %d and %d) %s' \
621 % (self.team, self.data['yrdln'],
622 self.down, self.yards_togo, self.desc)
623 else:
624 return '(%s, %s) %s' \
625 % (self.team, self.data['yrdln'], self.desc)
626 return self.desc
627
629 """
630 We use the play description to determine equality because the
631 play description can be changed. (Like when a play is reversed.)
632 """
633 return self.playid == other.playid and self.desc == other.desc
634
637
640 """
641 Takes a team stats JSON entry and converts it to a TeamStats namedtuple.
642 """
643 return TeamStats(
644 first_downs=int(data['totfd']),
645 total_yds=int(data['totyds']),
646 passing_yds=int(data['pyds']),
647 rushing_yds=int(data['ryds']),
648 penalty_cnt=int(data['pen']),
649 penalty_yds=int(data['penyds']),
650 turnovers=int(data['trnovr']),
651 punt_cnt=int(data['pt']),
652 punt_yds=int(data['ptyds']),
653 punt_avg=int(data['ptavg']),
654 pos_time=PossessionTime(data['top']))
655
658 """
659 Takes a home or away JSON entry and converts it to a list of Drive
660 objects.
661 """
662 drive_nums = []
663 for drive_num in data:
664 try:
665 drive_nums.append(int(drive_num))
666 except:
667 pass
668 drives = []
669 for i, drive_num in enumerate(sorted(drive_nums), 1):
670 drives.append(Drive(game, i, home_team, data[str(drive_num)]))
671 return drives
672
675 """
676 Takes a single JSON drive entry (data) and converts it to a list
677 of Play objects. This includes trying to resolve duplicate play
678 conflicts by only taking the first instance of a play.
679 """
680 plays = []
681 seen_ids = set()
682 seen_desc = set()
683 for playid in map(str, sorted(map(int, data))):
684 p = data[playid]
685 desc = (p['desc'], p['time'], p['yrdln'], p['qtr'])
686 if playid in seen_ids or desc in seen_desc:
687 continue
688 seen_ids.add(playid)
689 seen_desc.add(desc)
690 plays.append(Play(drive, playid, data[playid]))
691 return plays
692
695 """
696 Takes a single JSON play entry (data) and converts it to an OrderedDict
697 of player statistics.
698
699 play is the instance of Play that this data is part of. It is used
700 to determine whether the player belong to the home team or not.
701 """
702 players = OrderedDict()
703 for playerid, statcats in data.iteritems():
704 if playerid == '0':
705 continue
706 for info in statcats:
707 if info['statId'] not in nflgame.statmap.idmap:
708 continue
709 if playerid not in players:
710 home = play.drive.game.is_home(info['clubcode'])
711 if home:
712 team_name = play.drive.game.home
713 else:
714 team_name = play.drive.game.away
715 stats = nflgame.player.PlayPlayerStats(playerid,
716 info['playerName'],
717 home, team_name)
718 players[playerid] = stats
719 statvals = nflgame.statmap.values(info['statId'], info['yards'])
720 players[playerid]._add_stats(statvals)
721 return players
722
725 """
726 Takes a single JSON play entry (data) and converts it to a list of events.
727 """
728 temp = list()
729 for playerid, statcats in data.iteritems():
730 for info in statcats:
731 if info['statId'] not in nflgame.statmap.idmap:
732 continue
733 statvals = nflgame.statmap.values(info['statId'], info['yards'])
734 statvals['playerid'] = None if playerid == '0' else playerid
735 statvals['playername'] = info['playerName'] or None
736 statvals['team'] = info['clubcode']
737 temp.append((int(info['sequence']), statvals))
738 return [t[1] for t in sorted(temp, key=lambda t: t[0])]
739
742 """
743 Parses the 'home' and 'away' team stats and returns an OrderedDict
744 mapping player id to their total game statistics as instances of
745 nflgame.player.GamePlayerStats.
746 """
747 players = OrderedDict()
748 for team in ('home', 'away'):
749 for category in nflgame.statmap.categories:
750 if category not in data[team]['stats']:
751 continue
752 for pid, raw in data[team]['stats'][category].iteritems():
753 stats = {}
754 for k, v in raw.iteritems():
755 if k == 'name':
756 continue
757 stats['%s_%s' % (category, k)] = v
758 if pid not in players:
759 home = team == 'home'
760 if home:
761 team_name = game.home
762 else:
763 team_name = game.away
764 players[pid] = nflgame.player.GamePlayerStats(pid,
765 raw['name'],
766 home,
767 team_name)
768 players[pid]._add_stats(stats)
769 return players
770
773 """
774 Returns the JSON data corresponding to the game represented by eid.
775
776 If the JSON data is already on disk, it is read, decompressed and returned.
777
778 Otherwise, the JSON data is downloaded from the NFL web site. If the data
779 doesn't exist yet or there was an error, _get_json_data returns None.
780
781 If eid is None, then the JSON data is read from the file at fpath.
782 """
783 assert eid is not None or fpath is not None
784
785 if fpath is not None:
786 return gzip.open(fpath).read()
787
788 fpath = _jsonf % eid
789 if os.access(fpath, os.R_OK):
790 return gzip.open(fpath).read()
791 try:
792 return urllib2.urlopen(_json_base_url % (eid, eid), timeout=5).read()
793 except urllib2.HTTPError:
794 pass
795 return None
796
799 """
800 Tries to convert v to an integer. If it fails, return 0.
801 """
802 try:
803 return int(v)
804 except:
805 return 0
806