Skip to content

Commit 89da12e

Browse files
viz to chp3, plus github example scrapper + csv
1 parent f6b484e commit 89da12e

File tree

3 files changed

+270
-60
lines changed

3 files changed

+270
-60
lines changed

Chapter3_MCMC/IntroMCMC.ipynb

Lines changed: 61 additions & 60 deletions
Large diffs are not rendered by default.

Chapter3_MCMC/data/github_data.csv

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
Python, Javascript, Ruby, Java, Shell, PHP, days_since_creation, has_wiki, author_following, author_followers, starred_count, forked_count
2+
0,1,0,0,0,0,523,1,0,0,2,1
3+
0,1,0,0,0,0,778,1,193,95,1,0
4+
0,0,0,1,0,0,531,1,1,4,3,0
5+
0,0,1,0,0,0,396,1,1,2,2,0
6+
0,0,0,0,1,0,846,1,3,5,1,0
7+
0,0,1,0,0,0,520,1,155,56,1,0
8+
0,1,0,0,0,0,384,1,0,0,2,0
9+
1,0,0,0,0,0,705,1,33,9,1,0
10+
0,0,0,0,0,1,726,1,0,7,11,1
11+
1,0,0,0,0,0,413,1,0,2,1,0
12+
1,0,0,0,0,0,141,1,3,0,1,0
13+
1,0,0,0,0,0,1410,1,65,82,1,0
14+
0,0,0,0,0,1,175,1,0,0,0,0
15+
0,0,1,0,0,0,365,1,49,85,1,0
16+
0,0,0,0,0,1,580,1,9,4,1,0
17+
0,0,0,1,0,0,471,1,1,0,1,0
18+
0,1,0,0,0,0,353,1,5,2,2,1
19+
0,0,1,0,0,0,973,1,0,0,1,0
20+
0,0,1,0,0,0,591,1,0,0,1,0
21+
0,0,1,0,0,0,85,1,0,0,0,0
22+
0,1,0,0,0,0,431,1,0,0,1,0
23+
1,0,0,0,0,0,188,1,1,1,0,0
24+
0,0,1,0,0,0,144,1,0,0,0,0
25+
0,1,0,0,0,0,102,1,0,0,0,0
26+
0,1,0,0,0,0,249,1,0,0,2,0
27+
0,1,0,0,0,0,395,1,2,0,1,0
28+
0,1,0,0,0,0,433,1,12,69,1,0
29+
0,0,0,1,0,0,615,1,0,0,1,0
30+
0,0,1,0,0,0,707,1,0,0,1,0
31+
0,0,0,1,0,0,256,1,0,1,1,0
32+
0,0,1,0,0,0,363,1,0,0,1,0
33+
0,0,1,0,0,0,1818,1,98,162,16,6
34+
0,1,0,0,0,0,86,1,13,7,0,0
35+
0,1,0,0,0,0,284,1,1,3,2,1
36+
0,0,0,1,0,0,867,1,9,5,2,0
37+
0,0,1,0,0,0,1203,1,1,0,1,0
38+
0,1,0,0,0,0,436,1,34,4,0,0
39+
0,1,0,0,0,0,720,1,6,73,6,0
40+
0,0,1,0,0,0,948,1,0,84,18,4
41+
1,0,0,0,0,0,383,1,0,0,2,1
42+
0,0,0,1,0,0,62,1,0,0,0,0
43+
0,0,0,1,0,0,927,1,0,0,40,17
44+
0,1,0,0,0,0,1303,1,0,0,3,0
45+
0,1,0,0,0,0,113,1,0,0,0,0
46+
0,0,1,0,0,0,810,1,4,5,1,0
47+
0,0,0,0,0,1,43,1,7,11,2,0
48+
0,0,0,0,0,1,369,1,0,0,1,0
49+
0,1,0,0,0,0,449,1,106,4398,399,60
50+
0,1,0,0,0,0,200,1,0,0,0,0
51+
0,1,0,0,0,0,122,1,4,2,0,0
52+
0,0,1,0,0,0,738,1,0,0,1,0
53+
0,1,0,0,0,0,705,1,0,0,2,0
54+
0,0,1,0,0,0,291,1,0,0,1,0
55+
0,1,0,0,0,0,621,1,3,23,1,0
56+
0,0,1,0,0,0,368,1,0,0,1,0
57+
0,0,1,0,0,0,851,1,0,0,1,0
58+
0,0,0,1,0,0,58,1,0,0,0,0
59+
0,0,0,1,0,0,706,1,0,8,2,1
60+
0,0,0,0,0,1,817,1,0,1,1,1
61+
0,0,0,0,1,0,1475,1,0,4,1,0
62+
0,0,1,0,0,0,584,1,0,0,1,0
63+
1,0,0,0,0,0,841,1,0,7,1,0
64+
0,0,0,0,0,1,382,1,0,74,1,0
65+
0,0,1,0,0,0,213,1,1,0,1,0
66+
0,0,1,0,0,0,93,1,0,0,0,0
67+
0,1,0,0,0,0,356,1,1,10,2,0
68+
0,0,1,0,0,0,123,1,0,1,0,0
69+
0,0,1,0,0,0,113,1,35,63,0,0
70+
0,0,0,0,0,1,302,1,13,2,3,1
71+
0,1,0,0,0,0,550,0,19,18,2,0
72+
0,0,1,0,0,0,721,1,2,0,1,0
73+
0,0,1,0,0,0,160,1,5,6,11,6
74+
1,0,0,0,0,0,399,1,7,6,1,0
75+
0,1,0,0,0,0,206,1,15,116,2,0
76+
0,0,0,1,0,0,319,1,0,0,2,1
77+
0,1,0,0,0,0,111,1,2,3,0,0
78+
0,0,0,0,0,1,632,1,10,3,2,1
79+
0,0,0,0,0,1,1044,1,16,23,3,1
80+
0,0,0,0,0,1,194,1,0,0,0,0
81+
0,1,0,0,0,0,908,1,56,503,43,8
82+
0,0,1,0,0,0,89,1,0,0,0,0
83+
0,1,0,0,0,0,1311,1,107,119,4,1
84+
0,1,0,0,0,0,78,1,0,0,0,0
85+
0,0,1,0,0,0,564,1,0,2,1,0
86+
0,0,1,0,0,0,1049,1,0,0,1,0
87+
0,0,0,1,0,0,322,1,0,0,2,1
88+
0,0,1,0,0,0,1443,1,11,7,3,8
89+
0,0,1,0,0,0,310,1,1,0,1,0
90+
0,0,0,0,0,1,515,1,0,2,1,0
91+
0,0,1,0,0,0,563,1,0,0,1,0
92+
1,0,0,0,0,0,344,1,0,0,1,0
93+
0,1,0,0,0,0,132,1,2,2,0,0
94+
0,0,1,0,0,0,837,1,0,0,1,0
95+
0,1,0,0,0,0,459,1,1,4,1,0
96+
0,0,0,0,0,1,768,1,8,2,3,0
97+
0,0,0,0,0,1,410,1,0,4,2,1
98+
1,0,0,0,0,0,380,1,0,0,9,0
99+
0,0,0,1,0,0,859,1,14,22,1,0
100+
1,0,0,0,0,0,628,1,0,1,1,0
101+
0,0,1,0,0,0,206,0,55,50,21,2
102+
0,0,0,1,0,0,117,1,1,2,0,0
103+
0,0,0,0,1,0,237,1,0,0,1,0
104+
0,0,1,0,0,0,462,1,0,1,1,0
105+
1,0,0,0,0,0,628,1,2,3,1,0
106+
0,0,0,1,0,0,504,1,1,6,1,0
107+
0,0,1,0,0,0,283,1,0,0,1,0
108+
0,1,0,0,0,0,654,0,4,7,1,0
109+
0,1,0,0,0,0,97,1,0,0,0,0
110+
1,0,0,0,0,0,396,1,3,11,1,0
111+
0,0,0,0,1,0,179,1,0,0,0,0
112+
0,1,0,0,0,0,1743,1,3,0,1,0
113+
0,0,1,0,0,0,949,1,1,4,75,24
114+
0,0,0,1,0,0,307,1,0,0,1,1
115+
0,0,1,0,0,0,264,1,0,0,1,0
116+
0,0,1,0,0,0,193,1,4,5,0,0
117+
0,0,0,0,1,0,1148,1,4,37,2,1
118+
0,0,1,0,0,0,150,1,0,0,0,0
119+
0,0,0,0,1,0,250,1,19,9,1,0
120+
0,0,1,0,0,0,1726,1,7,287,1,0
121+
0,1,0,0,0,0,340,1,0,0,1,0

Chapter3_MCMC/github_pull.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#github data scrapper
2+
3+
"""
4+
variables of interest:
5+
indp. variables
6+
- language, given as a binary variable. Need 4 positions for 5 langagues
7+
- #number of days created ago, 1 position
8+
- has wiki? Boolean, 1 position
9+
- followers, 1 position
10+
- following, 1 position
11+
- constant
12+
13+
dep. variables
14+
-stars/watchers
15+
-forks
16+
17+
"""
18+
from requests import get
19+
from json import loads
20+
import datetime
21+
import numpy as np
22+
23+
24+
MAX = 8000000
25+
today = datetime.datetime.today()
26+
randint = np.random.randint
27+
N = 120 #sample size.
28+
auth = ("username", "password" )
29+
30+
language_mappings = {"Python": 0, "JavaScript": 1, "Ruby": 2, "Java":3, "Shell":4, "PHP":5}
31+
32+
#define data matrix:
33+
X = np.zeros( (N , 12), dtype = int )
34+
35+
for i in xrange(N):
36+
is_fork = True
37+
is_valid_language = False
38+
39+
while is_fork == True or is_valid_language == False:
40+
is_fork = True
41+
is_valid_language = False
42+
43+
params = {"since":randint(0, MAX ) }
44+
r = get("https://api.github.com/repositories", params = params, auth=auth )
45+
results = loads( r.text )[0]
46+
#im only interested in the first one, and if it is not a fork.
47+
is_fork = results["fork"]
48+
49+
r = get( results["url"], auth = auth)
50+
51+
#check the language
52+
repo_results = loads( r.text )
53+
try:
54+
language_mappings[ repo_results["language" ] ]
55+
is_valid_language = True
56+
except:
57+
pass
58+
59+
60+
61+
#languages
62+
X[ i, language_mappings[ repo_results["language" ] ] ] = 1
63+
64+
#delta time
65+
X[ i, 6] = ( today - datetime.datetime.strptime( repo_results["created_at"][:10], "%Y-%m-%d" ) ).days
66+
67+
#haswiki
68+
X[i, 7] = repo_results["has_wiki"]
69+
70+
#get user information
71+
r = get( results["owner"]["url"] , auth = auth)
72+
user_results = loads( r.text )
73+
X[i, 8] = user_results["following"]
74+
X[i, 9] = user_results["followers"]
75+
76+
#get dep. data
77+
X[i, 10] = repo_results["watchers_count"]
78+
X[i, 11] = repo_results["forks_count"]
79+
print
80+
print " -------------- "
81+
print i, ": ", results["full_name"], repo_results["language" ], repo_results["watchers_count"], repo_results["forks_count"]
82+
print " -------------- "
83+
print
84+
85+
np.savetxt("data/github_data.csv", X, delimiter=",", fmt="%d" )
86+
87+
88+

0 commit comments

Comments
 (0)