File tree Expand file tree Collapse file tree 1 file changed +19
-1
lines changed Expand file tree Collapse file tree 1 file changed +19
-1
lines changed Original file line number Diff line number Diff line change 88from __future__ import print_function
99import logging
1010import gensim
11+ import numpy as np
12+
13+ NR_OF_TOPICS = 100
1114
1215# Set up logging in order to get progress information as the model is being built:
1316logging .basicConfig (
2427model = gensim .models .ldamodel .LdaModel (
2528 corpus = mm ,
2629 id2word = id2word ,
27- num_topics = 100 ,
30+ num_topics = NR_OF_TOPICS ,
2831 update_every = 1 ,
2932 chunksize = 10000 ,
3033 passes = 1 )
3134
3235# Save the model so we do not need to learn it again.
3336model .save ('wiki_lda.pkl' )
37+
38+ # Compute the document/topic matrix
39+ topics = np .zeros ((len (mm ), model .num_topics ))
40+ for di ,doc in enumerate (mm ):
41+ doc_top = model [doc ]
42+ for ti ,tv in doc_top :
43+ topics [di ,ti ] += tv
44+ np .save ('topics.npy' , topics )
45+
46+ # Alternatively, we create a sparse matrix and save that. This alternative
47+ # saves disk space, at the cost of slightly more complex code:
48+
49+ ## from scipy import sparse, io
50+ ## sp = sparse.csr_matrix(topics)
51+ ## io.savemat('topics.mat', {'topics': sp})
You can’t perform that action at this time.
0 commit comments