| 
1 |  | -"""  | 
2 |  | -Test the TextRNN class   | 
3 |  | -2016/12/22  | 
4 |  | -"""  | 
5 |  | -import os  | 
6 |  | -import sys  | 
7 |  | -import numpy as np  | 
8 |  | -import tensorflow as tf  | 
9 |  | -from sklearn.model_selection import train_test_split  | 
10 |  | -from tensorflow.contrib import learn  | 
11 |  | - | 
12 |  | -from data_helpers import load_data_and_labels, batch_iter  | 
13 |  | -from text_cnn import TextCNN  | 
14 |  | - | 
15 |  | - | 
16 |  | -# Load original data  | 
17 |  | -path = sys.path[0]  | 
18 |  | -pos_filename = path + "/data/rt-polarity.pos"  | 
19 |  | -neg_filename = path + "/data/rt-polarity.neg"  | 
20 |  | - | 
21 |  | -X_data, y_data = load_data_and_labels(pos_filename, neg_filename)  | 
22 |  | -max_document_length = max([len(sen.split(" ")) for sen in X_data])  | 
23 |  | -print("Max_document_length:,", max_document_length)  | 
24 |  | -# Create the vacabulary  | 
25 |  | -vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)  | 
26 |  | -# The idx data  | 
27 |  | -x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32)  | 
28 |  | -y = np.array(y_data, dtype=np.int32)  | 
29 |  | -vocabulary_size = len(vocab_processor.vocabulary_)  | 
30 |  | -print("The size of vocabulary:", vocabulary_size)  | 
31 |  | -# Split the data  | 
32 |  | -X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111)  | 
33 |  | -print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape))  | 
34 |  | -print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape))  | 
35 |  | - | 
36 |  | -# The parameters of RNN  | 
37 |  | -seq_len = X_train.shape[1]  | 
38 |  | -vocab_size = vocabulary_size  | 
39 |  | -embedding_size = 128  | 
40 |  | -filter_sizes = [2, 3, 4]  | 
41 |  | -num_filters = 128  | 
42 |  | -num_classes = y_train.shape[1]  | 
43 |  | -l2_reg_lambda = 0.0  | 
44 |  | - | 
45 |  | -# Construct RNN model  | 
46 |  | -text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes=  | 
47 |  | -                        filter_sizes, num_filters=num_filters, num_classes=num_classes)  | 
48 |  | -loss = text_rnn_model.loss  | 
49 |  | -train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)  | 
50 |  | -accuracy = text_rnn_model.accuracy  | 
51 |  | -# The parameters for training  | 
52 |  | -batch_size = 64  | 
53 |  | -training_epochs = 10  | 
54 |  | -dispaly_every = 1  | 
55 |  | -dropout_keep_prob = 0.5  | 
56 |  | - | 
57 |  | -batch_num = int(X_train.shape[0]/batch_size)  | 
58 |  | - | 
59 |  | -sess = tf.Session()  | 
60 |  | -sess.run(tf.global_variables_initializer())  | 
61 |  | -print("Starting training...")  | 
62 |  | -for epoch in range(training_epochs):  | 
63 |  | -    avg_cost = 0  | 
64 |  | -    for batch in range(batch_num):  | 
65 |  | -        _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size],  | 
66 |  | -                                    text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size],  | 
67 |  | -                                    text_rnn_model.dropout_keep_prob:dropout_keep_prob})  | 
68 |  | -        avg_cost += cost  | 
69 |  | -    if epoch % dispaly_every == 0:  | 
70 |  | -        cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test,  | 
71 |  | -                                    text_rnn_model.y: y_test,  | 
72 |  | -                                    text_rnn_model.dropout_keep_prob: 1.0})  | 
73 |  | -        print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc))  | 
74 |  | -
  | 
 | 1 | +"""  | 
 | 2 | +Test the TextRNN class  | 
 | 3 | +2016/12/22  | 
 | 4 | +"""  | 
 | 5 | +import os  | 
 | 6 | +import sys  | 
 | 7 | +import numpy as np  | 
 | 8 | +import tensorflow as tf  | 
 | 9 | +from sklearn.model_selection import train_test_split  | 
 | 10 | +from tensorflow.contrib import learn  | 
 | 11 | + | 
 | 12 | +from data_helpers import load_data_and_labels, batch_iter  | 
 | 13 | +from text_cnn import TextCNN  | 
 | 14 | +import pudb;pu.db  | 
 | 15 | + | 
 | 16 | +# Load original data  | 
 | 17 | +path = sys.path[0]  | 
 | 18 | +pos_filename = path + "/data/rt-polarity.pos"  | 
 | 19 | +neg_filename = path + "/data/rt-polarity.neg"  | 
 | 20 | + | 
 | 21 | +X_data, y_data = load_data_and_labels(pos_filename, neg_filename)  | 
 | 22 | +max_document_length = max([len(sen.split(" ")) for sen in X_data])  | 
 | 23 | +print("Max_document_length:,", max_document_length)  | 
 | 24 | +# Create the vacabulary  | 
 | 25 | +vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)  | 
 | 26 | +# The idx data  | 
 | 27 | +x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32)  | 
 | 28 | +y = np.array(y_data, dtype=np.int32)  | 
 | 29 | +vocabulary_size = len(vocab_processor.vocabulary_)  | 
 | 30 | +print("The size of vocabulary:", vocabulary_size)  | 
 | 31 | +# Split the data  | 
 | 32 | +X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111)  | 
 | 33 | +print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape))  | 
 | 34 | +print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape))  | 
 | 35 | + | 
 | 36 | +# The parameters of RNN  | 
 | 37 | +seq_len = X_train.shape[1]  | 
 | 38 | +vocab_size = vocabulary_size  | 
 | 39 | +embedding_size = 128  | 
 | 40 | +filter_sizes = [2, 3, 4]  | 
 | 41 | +num_filters = 128  | 
 | 42 | +num_classes = y_train.shape[1]  | 
 | 43 | +l2_reg_lambda = 0.0  | 
 | 44 | + | 
 | 45 | +# Construct RNN model  | 
 | 46 | +text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes=  | 
 | 47 | +                        filter_sizes, num_filters=num_filters, num_classes=num_classes)  | 
 | 48 | +loss = text_rnn_model.loss  | 
 | 49 | +train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)  | 
 | 50 | +accuracy = text_rnn_model.accuracy  | 
 | 51 | +# The parameters for training  | 
 | 52 | +batch_size = 64  | 
 | 53 | +training_epochs = 10  | 
 | 54 | +dispaly_every = 1  | 
 | 55 | +dropout_keep_prob = 0.5  | 
 | 56 | + | 
 | 57 | +batch_num = int(X_train.shape[0]/batch_size)  | 
 | 58 | + | 
 | 59 | +sess = tf.Session()  | 
 | 60 | +sess.run(tf.global_variables_initializer())  | 
 | 61 | +print("Starting training...")  | 
 | 62 | +for epoch in range(training_epochs):  | 
 | 63 | +    avg_cost = 0  | 
 | 64 | +    for batch in range(batch_num):  | 
 | 65 | +        _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size],  | 
 | 66 | +                                    text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size],  | 
 | 67 | +                                    text_rnn_model.dropout_keep_prob:dropout_keep_prob})  | 
 | 68 | +        avg_cost += cost  | 
 | 69 | +    if epoch % dispaly_every == 0:  | 
 | 70 | +        cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test,  | 
 | 71 | +                                    text_rnn_model.y: y_test,  | 
 | 72 | +                                    text_rnn_model.dropout_keep_prob: 1.0})  | 
 | 73 | +        print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc))  | 
 | 74 | + | 
0 commit comments