File tree Expand file tree Collapse file tree 1 file changed +9
-25
lines changed Expand file tree Collapse file tree 1 file changed +9
-25
lines changed Original file line number Diff line number Diff line change 66# It is made available under the MIT License
77
88from __future__ import print_function
9- import numpy as np
109import logging
1110import gensim
11+
12+ # Set up logging in order to get progress information as the model is being built:
1213logging .basicConfig (
1314 format = '%(asctime)s : %(levelname)s : %(message)s' ,
1415 level = logging .INFO )
16+
17+ # Load the preprocessed corpus (id2word & mm):
1518id2word = gensim .corpora .Dictionary .load_from_text (
1619 'data/wiki_en_output_wordids.txt.bz2' )
1720mm = gensim .corpora .MmCorpus ('data/wiki_en_output_tfidf.mm' )
21+
22+ # Calling the constructor is enough to build the model
23+ # This call will take a few hours!
1824model = gensim .models .ldamodel .LdaModel (
1925 corpus = mm ,
2026 id2word = id2word ,
2127 num_topics = 100 ,
2228 update_every = 1 ,
2329 chunksize = 10000 ,
2430 passes = 1 )
25- model .save ('wiki_lda.pkl' )
26- topics = [model [doc ] for doc in mm ]
27- lens = np .array ([len (t ) for t in topics ])
28- print (np .mean (lens <= 10 ))
29- print (np .mean (lens ))
3031
31- counts = np .zeros (100 )
32- for doc_top in topics :
33- for ti , _ in doc_top :
34- counts [ti ] += 1
35-
36- for doc_top in topics :
37- for ti , _ in doc_top :
38- counts [ti ] += 1
39-
40- words = model .show_topic (counts .argmax (), 64 )
41- print (words )
42- print ()
43- print ()
44- print ()
45- words = model .show_topic (counts .argmin (), 64 )
46- print (words )
47- print ()
48- print ()
49- print ()
32+ # Save the model so we do not need to learn it again.
33+ model .save ('wiki_lda.pkl' )
You can’t perform that action at this time.
0 commit comments