@@ -45,7 +45,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
4545
4646 The KDD Cup '99 dataset was created by processing the tcpdump portions
4747 of the 1998 DARPA Intrusion Detection System (IDS) Evaluation dataset,
48- created by MIT Lincoln Lab [1] . The artificial data was generated using
48+ created by MIT Lincoln Lab [1]. The artificial data was generated using
4949 a closed network and hand-injected attacks to produce a large number of
5050 different types of attack with normal activity in the background.
5151 As the initial goal was to produce a large training set for supervised
@@ -134,7 +134,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
134134 shuffle : bool, default=False
135135 Whether to shuffle dataset.
136136
137- percent10 : bool, default=False
137+ percent10 : bool, default=True
138138 Whether to load only 10 percent of the data.
139139
140140 download_if_missing : bool, default=True
@@ -155,9 +155,11 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
155155 Detection Evaluation Richard Lippmann, Joshua W. Haines,
156156 David J. Fried, Jonathan Korba, Kumar Das
157157
158- .. [2] A Geometric Framework for Unsupervised Anomaly Detection: Detecting
159- Intrusions in Unlabeled Data (2002) by Eleazar Eskin, Andrew Arnold,
160- Michael Prerau, Leonid Portnoy, Sal Stolfo
158+ .. [2] K. Yamanishi, J.-I. Takeuchi, G. Williams, and P. Milne. Online
159+ unsupervised outlier detection using finite mixtures with
160+ discounting learning algorithms. In Proceedings of the sixth
161+ ACM SIGKDD international conference on Knowledge discovery
162+ and data mining, pages 320-324. ACM Press, 2000.
161163
162164 """
163165 kddcup99 = _fetch_brute_kddcup99 (shuffle = shuffle , percent10 = percent10 ,
@@ -214,7 +216,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
214216
215217def _fetch_brute_kddcup99 (subset = None , data_home = None ,
216218 download_if_missing = True , random_state = None ,
217- shuffle = False , percent10 = False ):
219+ shuffle = False , percent10 = True ):
218220
219221 """Load the kddcup99 dataset, downloading it if necessary.
220222
@@ -242,7 +244,7 @@ def _fetch_brute_kddcup99(subset=None, data_home=None,
242244 shuffle : bool, default=False
243245 Whether to shuffle dataset.
244246
245- percent10 : bool, default=False
247+ percent10 : bool, default=True
246248 Whether to load only 10 percent of the data.
247249
248250 Returns
0 commit comments