1+ #!/usr/bin/env python
2+
3+ import sys
4+ # Later when complexity will arise we will use option parser
5+ # import argparse
6+ from BeautifulSoup import BeautifulSoup
7+ import requests
8+
9+ htmlclient = {
10+ "User-Agent" : "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.56 Safari/536.5"
11+ }
12+ urilist = []
13+ scorecards = []
14+ espnurl = "http://www.espncricinfo.com"
15+ voice = {
16+ "0" :"Seems you are good @English. Use the index number." ,
17+ "1" :"Network Error. Try again later." ,
18+ "2" :""
19+ }
20+
21+ def prompt ():
22+ '''
23+ Returns response from the user
24+ '''
25+ try :
26+ return int (raw_input ("Select series: " ))
27+ except ValueError :
28+ print voice ["0" ]
29+ scrapresults (prompt ())
30+
31+ def prompt1 ():
32+ '''
33+ Returns response from the user
34+ '''
35+ try :
36+ return int (raw_input ("Enter the Scorecard index to view full scorecard: " ))
37+ except ValueError :
38+ print voice ["0" ]
39+ scrap_scorecard (prompt1 ())
40+
41+ def scrapresults (choice ):
42+ try :
43+ global htmlclient , urilist , espnurl , scorecards
44+ url = espnurl + urilist [choice - 1 ].replace ("content/current/" , "engine/" )
45+ response = requests .get (url , headers = htmlclient )
46+
47+ # Scraping Results page for the selected series
48+ if response .status_code == 200 :
49+ soup = BeautifulSoup (response .text )
50+ print soup .title .text + "\n "
51+
52+ # More work it to be done in this loop as fetch scorecards, man of the match
53+ for matchestable in soup .findAll ("div" , attrs = {"class" :"div630Pad" }):
54+ scores = [ score .text for score in matchestable .findAll ("p" , attrs = {"class" :"potMatchText mat_scores" }) ]
55+ statuses = [ status .text for status in matchestable .findAll ("p" , attrs = {"class" :"potMatchText mat_status" }) ]
56+ matches = [ matches .text for matches in matchestable .findAll ("p" , attrs = {"class" :"potMatchHeading" }) ]
57+ scorecards = [ tempo .a .get ("href" ) for tempo in matchestable .findAll ("span" , attrs = {"class" :"potMatchLink" })]
58+
59+ for num , (score , status , match ) in enumerate (zip (scores , statuses , matches )):
60+ if not score :
61+ continue
62+ else :
63+ print match
64+ print score
65+ print status
66+ print "Scorecard index: " , num + 1 , "\n "
67+ return
68+ else :
69+ print voice ["1" ]
70+ except IndexError :
71+ print "Use the index number left side. Try again."
72+ scrapresults (prompt ())
73+ except TypeError :
74+ # Too many function call creating this.
75+ #
76+ #Traceback (most recent call last):
77+ # File "cricinfo.py", line 113, in <module>
78+ # main()
79+ # File "cricinfo.py", line 108, in main
80+ # scrapresults(prompt())
81+ # File "cricinfo.py", line 29, in prompt
82+ # scrapresults(prompt())
83+ # File "cricinfo.py", line 29, in prompt
84+ # scrapresults(prompt())
85+ # File "cricinfo.py", line 48, in scrapresults
86+ # url = espnurl + urilist[choice-1].replace("content/current/", "engine/")
87+ #TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
88+ pass
89+
90+ def scrap_scorecard (choice ):
91+ try :
92+ global scorecards
93+ url = espnurl + scorecards [choice - 1 ]
94+
95+ print url
96+ except IndexError :
97+ print "Use Scorecard index. Try again."
98+ scrap_scorecard (prompt1 ())
99+ except TypeError :
100+ # Same as above
101+ pass
102+
103+ def main ():
104+ global htmlclient , urilist , espnurl
105+ url = espnurl + "/ci/engine/match/scores/live.html"
106+ response = requests .get (url , headers = htmlclient )
107+
108+ # Scraping Live matches page and getting the info for current international series as a list
109+ if response .status_code == 200 :
110+ soup = BeautifulSoup (response .text )
111+ for mainNav in soup .findAll ("div" , attrs = {"id" :"mainNav" }):
112+ for table in mainNav .findAll ("table" , attrs = {"width" :"270" , "border" :"0" , "cellspacing" :"0" , "cellpadding" :"0" }):
113+ urilist = [ uri .get ("href" ) for uri in table .findAll ("a" ) ]
114+ else :
115+ print voice ["1" ]
116+
117+ # Removing unnecessary 'ci' entry from list
118+ del urilist [- 1 ]
119+
120+ # View the series to user for selection
121+ for num , url in enumerate (urilist ):
122+ print num + 1 , url .split ("/" )[1 ]
123+
124+ scrapresults (prompt ())
125+ scrap_scorecard (prompt1 ())
126+
127+ if __name__ == '__main__' :
128+ try :
129+ main ()
130+ except (KeyboardInterrupt , EOFError ):
131+ sys .exit (0 )
132+ # http://www.espncricinfo.com/ci/engine/match/654033.html
0 commit comments