|
8 | 8 | {
|
9 | 9 | "cell_type": "code",
|
10 | 10 | "execution_count": 1,
|
11 |
| - "metadata": { |
12 |
| - "collapsed": true |
13 |
| - }, |
| 11 | + "metadata": {}, |
14 | 12 | "outputs": [],
|
15 | 13 | "source": [
|
16 | 14 | "import os\n",
|
17 | 15 | "import numpy as np\n",
|
18 | 16 | "import pandas as pd\n",
|
19 |
| - "home_folder = os.path.expanduser(\"~\")\n", |
20 |
| - "data_folder = os.path.join(home_folder, \"Data\", \"basketball\")\n", |
| 17 | + "home_folder = \".\"\n", |
| 18 | + "data_folder = os.path.join(home_folder, \"data\")\n", |
21 | 19 | "data_filename = os.path.join(data_folder, \"leagues_NBA_2014_games_games.csv\")"
|
22 | 20 | ]
|
23 | 21 | },
|
|
409 | 407 | "name": "stdout",
|
410 | 408 | "output_type": "stream",
|
411 | 409 | "text": [
|
412 |
| - "Home Win percentage: 58.0%\n" |
| 410 | + "Home Win 百分比: 58.0%\n" |
413 | 411 | ]
|
414 | 412 | },
|
415 | 413 | {
|
|
550 | 548 | }
|
551 | 549 | ],
|
552 | 550 | "source": [
|
553 |
| - "print(\"Home Win percentage: {0:.1f}%\".format(100 * results[\"HomeWin\"].sum() / results[\"HomeWin\"].count()))\n", |
| 551 | + "print(\"Home Win 百分比: {0:.1f}%\".format(100 * results[\"HomeWin\"].sum() / results[\"HomeWin\"].count()))\n", |
554 | 552 | "results[\"HomeLastWin\"] = False\n",
|
555 | 553 | "results[\"VisitorLastWin\"] = False\n",
|
556 | 554 | "# This creates two new columns, all set to False\n",
|
|
757 | 755 | "cell_type": "code",
|
758 | 756 | "execution_count": 8,
|
759 | 757 | "metadata": {},
|
760 |
| - "outputs": [ |
761 |
| - { |
762 |
| - "name": "stderr", |
763 |
| - "output_type": "stream", |
764 |
| - "text": [ |
765 |
| - "/home/dlinking-lxy/more-space/pyworks/venv/lib/python3.5/site-packages/ipykernel_launcher.py:13: DeprecationWarning: \n", |
766 |
| - ".ix is deprecated. Please use\n", |
767 |
| - ".loc for label based indexing or\n", |
768 |
| - ".iloc for positional indexing\n", |
769 |
| - "\n", |
770 |
| - "See the documentation here:\n", |
771 |
| - "http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix\n", |
772 |
| - " del sys.path[0]\n" |
773 |
| - ] |
774 |
| - } |
775 |
| - ], |
| 758 | + "outputs": [], |
776 | 759 | "source": [
|
777 | 760 | "# What about win streaks?\n",
|
778 | 761 | "results[\"HomeWinStreak\"] = 0\n",
|
|
786 | 769 | " visitor_team = row[\"Visitor Team\"]\n",
|
787 | 770 | " row[\"HomeWinStreak\"] = win_streak[home_team]\n",
|
788 | 771 | " row[\"VisitorWinStreak\"] = win_streak[visitor_team]\n",
|
789 |
| - " results.ix[index] = row \n", |
| 772 | + " results.loc[index] = row \n", |
790 | 773 | " # Set current win\n",
|
791 | 774 | " if row[\"HomeWin\"]:\n",
|
792 | 775 | " win_streak[home_team] += 1\n",
|
|
820 | 803 | },
|
821 | 804 | {
|
822 | 805 | "cell_type": "code",
|
823 |
| - "execution_count": 10, |
| 806 | + "execution_count": 13, |
824 | 807 | "metadata": {},
|
825 | 808 | "outputs": [
|
826 | 809 | {
|
|
1693 | 1676 | "[30 rows x 24 columns]"
|
1694 | 1677 | ]
|
1695 | 1678 | },
|
1696 |
| - "execution_count": 10, |
| 1679 | + "execution_count": 13, |
1697 | 1680 | "metadata": {},
|
1698 | 1681 | "output_type": "execute_result"
|
1699 | 1682 | }
|
|
1707 | 1690 | },
|
1708 | 1691 | {
|
1709 | 1692 | "cell_type": "code",
|
1710 |
| - "execution_count": 11, |
| 1693 | + "execution_count": 15, |
1711 | 1694 | "metadata": {},
|
1712 | 1695 | "outputs": [
|
1713 | 1696 | {
|
|
1860 | 1843 | "4 False 0 0 0 "
|
1861 | 1844 | ]
|
1862 | 1845 | },
|
1863 |
| - "execution_count": 11, |
| 1846 | + "execution_count": 15, |
1864 | 1847 | "metadata": {},
|
1865 | 1848 | "output_type": "execute_result"
|
1866 | 1849 | }
|
|
1884 | 1867 | },
|
1885 | 1868 | {
|
1886 | 1869 | "cell_type": "code",
|
1887 |
| - "execution_count": 12, |
| 1870 | + "execution_count": 26, |
1888 | 1871 | "metadata": {},
|
1889 | 1872 | "outputs": [
|
1890 | 1873 | {
|
1891 | 1874 | "name": "stdout",
|
1892 | 1875 | "output_type": "stream",
|
1893 | 1876 | "text": [
|
1894 | 1877 | "Using whether the home team is ranked higher\n",
|
1895 |
| - "Accuracy: 60.2%\n" |
| 1878 | + "准确率: 60.2%\n" |
1896 | 1879 | ]
|
1897 | 1880 | }
|
1898 | 1881 | ],
|
|
1901 | 1884 | "clf = DecisionTreeClassifier(random_state=14)\n",
|
1902 | 1885 | "scores = cross_val_score(clf, X_homehigher, y_true, scoring='accuracy')\n",
|
1903 | 1886 | "print(\"Using whether the home team is ranked higher\")\n",
|
1904 |
| - "print(\"Accuracy: {0:.1f}%\".format(np.mean(scores) * 100))" |
| 1887 | + "print(\"准确率: {0:.1f}%\".format(np.mean(scores) * 100))" |
1905 | 1888 | ]
|
1906 | 1889 | },
|
1907 | 1890 | {
|
1908 | 1891 | "cell_type": "code",
|
1909 |
| - "execution_count": 13, |
| 1892 | + "execution_count": 25, |
1910 | 1893 | "metadata": {},
|
1911 | 1894 | "outputs": [
|
1912 | 1895 | {
|
1913 | 1896 | "name": "stdout",
|
1914 | 1897 | "output_type": "stream",
|
1915 | 1898 | "text": [
|
1916 |
| - "Accuracy: 60.5%\n" |
1917 |
| - ] |
1918 |
| - }, |
1919 |
| - { |
1920 |
| - "name": "stderr", |
1921 |
| - "output_type": "stream", |
1922 |
| - "text": [ |
1923 |
| - "/home/dlinking-lxy/more-space/pyworks/venv/lib/python3.5/site-packages/sklearn/grid_search.py:43: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20.\n", |
1924 |
| - " DeprecationWarning)\n" |
| 1899 | + "准确率: 60.5%\n" |
1925 | 1900 | ]
|
1926 | 1901 | }
|
1927 | 1902 | ],
|
|
1934 | 1909 | "clf = DecisionTreeClassifier(random_state=14)\n",
|
1935 | 1910 | "grid = GridSearchCV(clf, parameter_space)\n",
|
1936 | 1911 | "grid.fit(X_homehigher, y_true)\n",
|
1937 |
| - "print(\"Accuracy: {0:.1f}%\".format(grid.best_score_ * 100))" |
| 1912 | + "print(\"准确率: {0:.1f}%\".format(grid.best_score_ * 100))" |
1938 | 1913 | ]
|
1939 | 1914 | },
|
1940 | 1915 | {
|
1941 | 1916 | "cell_type": "code",
|
1942 |
| - "execution_count": 14, |
| 1917 | + "execution_count": 18, |
1943 | 1918 | "metadata": {},
|
1944 | 1919 | "outputs": [
|
1945 | 1920 | {
|
|
2127 | 2102 | "5 0 "
|
2128 | 2103 | ]
|
2129 | 2104 | },
|
2130 |
| - "execution_count": 14, |
| 2105 | + "execution_count": 18, |
2131 | 2106 | "metadata": {},
|
2132 | 2107 | "output_type": "execute_result"
|
2133 | 2108 | }
|
|
2152 | 2127 | },
|
2153 | 2128 | {
|
2154 | 2129 | "cell_type": "code",
|
2155 |
| - "execution_count": 15, |
| 2130 | + "execution_count": 19, |
2156 | 2131 | "metadata": {},
|
2157 | 2132 | "outputs": [
|
2158 | 2133 | {
|
2159 | 2134 | "name": "stdout",
|
2160 | 2135 | "output_type": "stream",
|
2161 | 2136 | "text": [
|
2162 | 2137 | "Using whether the home team is ranked higher\n",
|
2163 |
| - "Accuracy: 60.5%\n" |
| 2138 | + "准确率: 60.5%\n" |
2164 | 2139 | ]
|
2165 | 2140 | }
|
2166 | 2141 | ],
|
|
2169 | 2144 | "clf = DecisionTreeClassifier(random_state=14)\n",
|
2170 | 2145 | "scores = cross_val_score(clf, X_home_higher, y_true, scoring='accuracy')\n",
|
2171 | 2146 | "print(\"Using whether the home team is ranked higher\")\n",
|
2172 |
| - "print(\"Accuracy: {0:.1f}%\".format(np.mean(scores) * 100))\n" |
| 2147 | + "print(\"准确率: {0:.1f}%\".format(np.mean(scores) * 100))\n" |
2173 | 2148 | ]
|
2174 | 2149 | },
|
2175 | 2150 | {
|
2176 | 2151 | "cell_type": "code",
|
2177 |
| - "execution_count": 16, |
| 2152 | + "execution_count": 20, |
2178 | 2153 | "metadata": {},
|
2179 | 2154 | "outputs": [
|
2180 | 2155 | {
|
2181 | 2156 | "name": "stdout",
|
2182 | 2157 | "output_type": "stream",
|
2183 | 2158 | "text": [
|
2184 |
| - "Accuracy: 61.2%\n" |
| 2159 | + "准确率: 61.2%\n" |
2185 | 2160 | ]
|
2186 | 2161 | }
|
2187 | 2162 | ],
|
|
2198 | 2173 | "\n",
|
2199 | 2174 | "clf = DecisionTreeClassifier(random_state=14)\n",
|
2200 | 2175 | "scores = cross_val_score(clf, X_teams, y_true, scoring='accuracy')\n",
|
2201 |
| - "print(\"Accuracy: {0:.1f}%\".format(np.mean(scores) * 100))" |
| 2176 | + "print(\"准确率: {0:.1f}%\".format(np.mean(scores) * 100))" |
2202 | 2177 | ]
|
2203 | 2178 | },
|
2204 | 2179 | {
|
2205 | 2180 | "cell_type": "code",
|
2206 |
| - "execution_count": 17, |
| 2181 | + "execution_count": 21, |
2207 | 2182 | "metadata": {},
|
2208 | 2183 | "outputs": [
|
2209 | 2184 | {
|
2210 | 2185 | "name": "stdout",
|
2211 | 2186 | "output_type": "stream",
|
2212 | 2187 | "text": [
|
2213 | 2188 | "Using full team labels is ranked higher\n",
|
2214 |
| - "Accuracy: 60.5%\n" |
| 2189 | + "准确率: 60.5%\n" |
2215 | 2190 | ]
|
2216 | 2191 | }
|
2217 | 2192 | ],
|
|
2220 | 2195 | "clf = RandomForestClassifier(random_state=14)\n",
|
2221 | 2196 | "scores = cross_val_score(clf, X_teams, y_true, scoring='accuracy')\n",
|
2222 | 2197 | "print(\"Using full team labels is ranked higher\")\n",
|
2223 |
| - "print(\"Accuracy: {0:.1f}%\".format(np.mean(scores) * 100))" |
| 2198 | + "print(\"准确率: {0:.1f}%\".format(np.mean(scores) * 100))" |
2224 | 2199 | ]
|
2225 | 2200 | },
|
2226 | 2201 | {
|
2227 | 2202 | "cell_type": "code",
|
2228 |
| - "execution_count": 18, |
| 2203 | + "execution_count": 22, |
2229 | 2204 | "metadata": {},
|
2230 | 2205 | "outputs": [
|
2231 | 2206 | {
|
|
2243 | 2218 | },
|
2244 | 2219 | {
|
2245 | 2220 | "cell_type": "code",
|
2246 |
| - "execution_count": 19, |
| 2221 | + "execution_count": 23, |
2247 | 2222 | "metadata": {},
|
2248 | 2223 | "outputs": [
|
2249 | 2224 | {
|
2250 | 2225 | "name": "stdout",
|
2251 | 2226 | "output_type": "stream",
|
2252 | 2227 | "text": [
|
2253 | 2228 | "Using whether the home team is ranked higher\n",
|
2254 |
| - "Accuracy: 60.9%\n" |
| 2229 | + "准确率: 60.9%\n" |
2255 | 2230 | ]
|
2256 | 2231 | }
|
2257 | 2232 | ],
|
2258 | 2233 | "source": [
|
2259 | 2234 | "clf = RandomForestClassifier(random_state=14)\n",
|
2260 | 2235 | "scores = cross_val_score(clf, X_all, y_true, scoring='accuracy')\n",
|
2261 | 2236 | "print(\"Using whether the home team is ranked higher\")\n",
|
2262 |
| - "print(\"Accuracy: {0:.1f}%\".format(np.mean(scores) * 100))" |
| 2237 | + "print(\"准确率: {0:.1f}%\".format(np.mean(scores) * 100))" |
2263 | 2238 | ]
|
2264 | 2239 | },
|
2265 | 2240 | {
|
2266 | 2241 | "cell_type": "code",
|
2267 |
| - "execution_count": 20, |
| 2242 | + "execution_count": 24, |
2268 | 2243 | "metadata": {},
|
2269 | 2244 | "outputs": [
|
2270 | 2245 | {
|
2271 | 2246 | "name": "stdout",
|
2272 | 2247 | "output_type": "stream",
|
2273 | 2248 | "text": [
|
2274 |
| - "Accuracy: 63.8%\n", |
| 2249 | + "准确率: 63.8%\n", |
2275 | 2250 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
|
2276 | 2251 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
|
2277 | 2252 | " min_impurity_split=1e-07, min_samples_leaf=6,\n",
|
|
2297 | 2272 | "clf = RandomForestClassifier(random_state=14)\n",
|
2298 | 2273 | "grid = GridSearchCV(clf, parameter_space)\n",
|
2299 | 2274 | "grid.fit(X_all, y_true)\n",
|
2300 |
| - "print(\"Accuracy: {0:.1f}%\".format(grid.best_score_ * 100))\n", |
| 2275 | + "print(\"准确率: {0:.1f}%\".format(grid.best_score_ * 100))\n", |
2301 | 2276 | "print(grid.best_estimator_)"
|
2302 | 2277 | ]
|
| 2278 | + }, |
| 2279 | + { |
| 2280 | + "cell_type": "code", |
| 2281 | + "execution_count": null, |
| 2282 | + "metadata": {}, |
| 2283 | + "outputs": [], |
| 2284 | + "source": [] |
2303 | 2285 | }
|
2304 | 2286 | ],
|
2305 | 2287 | "metadata": {
|
|
0 commit comments