Skip to content

Commit 1d2560f

Browse files
author
moneyDboat
committed
modify .sh
1 parent 5062907 commit 1d2560f

File tree

4 files changed

+58
-32
lines changed

4 files changed

+58
-32
lines changed
40 Bytes
Binary file not shown.

data_manager.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,27 @@ class Data_Factory():
2121

2222
def load(self, path):
2323
R = pickle.load(open(path + "/ratings.all", "rb"))
24-
print ("Load preprocessed rating data - %s" % (path + "/ratings.all"))
24+
print("Load preprocessed rating data - %s" % (path + "/ratings.all"))
2525
D_all = pickle.load(open(path + "/document.all", "rb"))
26-
print ("Load preprocessed document data - %s" % (path + "/document.all"))
26+
print("Load preprocessed document data - %s" % (path + "/document.all"))
2727
return R, D_all
2828

2929
def save(self, path, R, D_all):
3030
if not os.path.exists(path):
3131
os.makedirs(path)
32-
print ("Saving preprocessed rating data - %s" % (path + "/ratings.all"))
32+
print("Saving preprocessed rating data - %s" % (path + "/ratings.all"))
3333
pickle.dump(R, open(path + "/ratings.all", "wb"))
34-
print ("Done!")
35-
print ("Saving preprocessed document data - %s" % (path + "/document.all"))
34+
print("Done!")
35+
print("Saving preprocessed document data - %s" % (path + "/document.all"))
3636
pickle.dump(D_all, open(path + "/document.all", "wb"))
37-
print ("Done!")
37+
print("Done!")
3838

3939
def read_rating(self, path):
4040
results = []
4141
if os.path.isfile(path):
4242
raw_ratings = open(path, 'r')
4343
else:
44-
print ("Path (preprocessed) is wrong!")
44+
print("Path (preprocessed) is wrong!")
4545
sys.exit()
4646
index_list = []
4747
rating_list = []
@@ -109,7 +109,7 @@ def read_pretrained_word2vec(self, path, vocab, dim):
109109
return W
110110

111111
def split_data(self, ratio, R):
112-
print ("Randomly splitting rating data into training set (%.1f) and test set (%.1f)..." % (1 - ratio, ratio))
112+
print("Randomly splitting rating data into training set (%.1f) and test set (%.1f)..." % (1 - ratio, ratio))
113113
train = []
114114
for i in range(R.shape[0]):
115115
user_rating = R[i].nonzero()[1]
@@ -132,7 +132,7 @@ def split_data(self, ratio, R):
132132

133133
num_addition = int((1 - ratio) * total_size) - len(train)
134134
if num_addition < 0:
135-
print ('this ratio cannot be handled')
135+
print('this ratio cannot be handled')
136136
sys.exit()
137137
else:
138138
train.extend(remain_rating_list[:num_addition])
@@ -247,7 +247,7 @@ def generate_train_valid_test_file_from_R(self, path, R, ratio):
247247
f_train_user.close()
248248
f_valid_user.close()
249249
f_test_user.close()
250-
print ("\ttrain_user.dat, valid_user.dat, test_user.dat files are generated.")
250+
print("\ttrain_user.dat, valid_user.dat, test_user.dat files are generated.")
251251

252252
f_train_item = open(path + "/train_item.dat", "w")
253253
f_valid_item = open(path + "/valid_item.dat", "w")
@@ -331,34 +331,37 @@ def preprocess(self, path_rating, path_itemtext, min_rating,
331331
# Validate data paths
332332
if os.path.isfile(path_rating):
333333
raw_ratings = open(path_rating, 'r')
334-
print ("Path - rating data: %s" % path_rating)
334+
print("Path - rating data: %s" % path_rating)
335335
else:
336-
print ("Path(rating) is wrong!")
336+
print("Path(rating) is wrong!")
337337
sys.exit()
338338

339339
if os.path.isfile(path_itemtext):
340340
raw_content = open(path_itemtext, 'r')
341-
print ("Path - document data: %s" % path_itemtext)
341+
print("Path - document data: %s" % path_itemtext)
342342
else:
343-
print ("Path(item text) is wrong!")
343+
print("Path(item text) is wrong!")
344344
sys.exit()
345345

346346
# 1st scan document file to filter items which have documents
347347
tmp_id_plot = set()
348348
all_line = raw_content.read().splitlines()
349-
#content format:(1::a little boy |)
349+
# content format:(1::a little boy |)
350350
for line in all_line:
351351
tmp = line.split('::')
352352
i = tmp[0]
353-
tmp_plot = tmp[1].split('|')
353+
try:
354+
tmp_plot = tmp[1].split('|')
355+
except:
356+
print(tmp[0])
354357
if tmp_plot[0] == '':
355358
continue
356-
#tmp_id_plot to remove rating that has no content
359+
# tmp_id_plot to remove rating that has no content
357360
tmp_id_plot.add(i)
358361
raw_content.close()
359362

360-
print ("Preprocessing rating data...")
361-
print ("\tCounting # ratings of each user and removing users having less than %d ratings..." % min_rating)
363+
print("Preprocessing rating data...")
364+
print("\tCounting # ratings of each user and removing users having less than %d ratings..." % min_rating)
362365
# 1st scan rating file to check # ratings of each user
363366
all_line = raw_ratings.read().splitlines()
364367
tmp_user = {}
@@ -420,11 +423,11 @@ def preprocess(self, path_rating, path_itemtext, min_rating,
420423
# sparse matrix
421424
R = csr_matrix((rating, (user, item)))
422425

423-
print ("Finish preprocessing rating data - # user: %d, # item: %d, # ratings: %d" % (R.shape[0], R.shape[1], R.nnz))
426+
print("Finish preprocessing rating data - # user: %d, # item: %d, # ratings: %d" % (R.shape[0], R.shape[1], R.nnz))
424427

425428
# 2nd scan document file to make idx2plot dictionary according to
426429
# indices of items in rating matrix
427-
print ("Preprocessing item document...")
430+
print("Preprocessing item document...")
428431

429432
# Read Document File
430433
raw_content = open(path_itemtext, 'r')
@@ -439,8 +442,8 @@ def preprocess(self, path_rating, path_itemtext, min_rating,
439442
eachid_plot = (' '.join(tmp_plot)).split()[:max_length]
440443
map_idtoplot[i] = ' '.join(eachid_plot)
441444

442-
print ("\tRemoving stop words...")
443-
print ("\tFiltering words by TF-IDF score with max_df: %.1f, vocab_size: %d" % (_max_df, _vocab_size))
445+
print("\tRemoving stop words...")
446+
print("\tFiltering words by TF-IDF score with max_df: %.1f, vocab_size: %d" % (_max_df, _vocab_size))
444447

445448
# Make vocabulary by document
446449
vectorizer = TfidfVectorizer(max_df=_max_df, stop_words={
@@ -466,6 +469,6 @@ def preprocess(self, path_rating, path_itemtext, min_rating,
466469
'X_vocab': X_vocab,
467470
}
468471

469-
print ("Finish preprocessing document data!")
472+
print("Finish preprocessing document data!")
470473

471474
return R, D_all

run_test_ConvMF.sh

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
1+
##!/usr/bin/env bash
2+
#python ./run.py \
3+
#-d ./data/preprocessed/ml-1m/0.2/ \
4+
#-a ./data/preprocessed/ml-1m/ \
5+
#-o ./result/ml-1m/1_100_200 \
6+
#-e 50 \
7+
#-p ./data/glove/glove.6B.50d.txt \
8+
#-u 10 \
9+
#-v 100 \
10+
#-g True
11+
12+
113
#!/usr/bin/env bash
214
python ./run.py \
3-
-d ./data/preprocessed/ml-1m/0.2/ \
4-
-a ./data/preprocessed/ml-1m/ \
5-
-o ./result/ml-1m/1_100_200 \
15+
-d ./data/preprocessed/aiv/0.2/ \
16+
-a ./data/preprocessed/aiv/ \
17+
-o ./result/ml-1
18+
m/1_100_200 \
619
-e 50 \
720
-p ./data/glove/glove.6B.50d.txt \
821
-u 10 \

run_test_preprocess.sh

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
1+
##!/usr/bin/env bash
2+
#python ./run.py \
3+
#-d ./data/preprocessed/ml-1m/0.2/ \
4+
#-a ./data/preprocessed/ml-1m/ \
5+
#-c True \
6+
#-r ./data/rare/movielens/ml-1m_ratings.dat \
7+
#-i ./data/rare/movielens/ml_plot.dat \
8+
#-m 1
9+
10+
111
#!/usr/bin/env bash
212
python ./run.py \
3-
-d ./data/preprocessed/ml-1m/0.2/ \
4-
-a ./data/preprocessed/ml-1m/ \
13+
-d ./data/preprocessed/aiv/0.2/ \
14+
-a ./data/preprocessed/aiv/ \
515
-c True \
6-
-r ./data/rare/movielens/ml-1m_ratings.dat \
7-
-i ./data/rare/movielens/ml_plot.dat \
8-
-m 1
16+
-r ./data/rare/aiv/Amazon_Instant_Video_ratings.txt \
17+
-i ./data/rare/aiv/Amazon_Instant_Video_items.txt \
18+
-m 1

0 commit comments

Comments
 (0)