|  | 
| 1 |  | -""" | 
| 2 |  | -Test the TextRNN class  | 
| 3 |  | -2016/12/22 | 
| 4 |  | -""" | 
| 5 |  | -import os | 
| 6 |  | -import sys | 
| 7 |  | -import numpy as np | 
| 8 |  | -import tensorflow as tf | 
| 9 |  | -from sklearn.model_selection import train_test_split | 
| 10 |  | -from tensorflow.contrib import learn | 
| 11 |  | - | 
| 12 |  | -from data_helpers import load_data_and_labels, batch_iter | 
| 13 |  | -from text_cnn import TextCNN | 
| 14 |  | - | 
| 15 |  | - | 
| 16 |  | -# Load original data | 
| 17 |  | -path = sys.path[0] | 
| 18 |  | -pos_filename = path + "/data/rt-polarity.pos" | 
| 19 |  | -neg_filename = path + "/data/rt-polarity.neg" | 
| 20 |  | - | 
| 21 |  | -X_data, y_data = load_data_and_labels(pos_filename, neg_filename) | 
| 22 |  | -max_document_length = max([len(sen.split(" ")) for sen in X_data]) | 
| 23 |  | -print("Max_document_length:,", max_document_length) | 
| 24 |  | -# Create the vacabulary | 
| 25 |  | -vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) | 
| 26 |  | -# The idx data | 
| 27 |  | -x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32) | 
| 28 |  | -y = np.array(y_data, dtype=np.int32) | 
| 29 |  | -vocabulary_size = len(vocab_processor.vocabulary_) | 
| 30 |  | -print("The size of vocabulary:", vocabulary_size) | 
| 31 |  | -# Split the data | 
| 32 |  | -X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111) | 
| 33 |  | -print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape)) | 
| 34 |  | -print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape)) | 
| 35 |  | - | 
| 36 |  | -# The parameters of RNN | 
| 37 |  | -seq_len = X_train.shape[1] | 
| 38 |  | -vocab_size = vocabulary_size | 
| 39 |  | -embedding_size = 128 | 
| 40 |  | -filter_sizes = [2, 3, 4] | 
| 41 |  | -num_filters = 128 | 
| 42 |  | -num_classes = y_train.shape[1] | 
| 43 |  | -l2_reg_lambda = 0.0 | 
| 44 |  | - | 
| 45 |  | -# Construct RNN model | 
| 46 |  | -text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes= | 
| 47 |  | -                        filter_sizes, num_filters=num_filters, num_classes=num_classes) | 
| 48 |  | -loss = text_rnn_model.loss | 
| 49 |  | -train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) | 
| 50 |  | -accuracy = text_rnn_model.accuracy | 
| 51 |  | -# The parameters for training | 
| 52 |  | -batch_size = 64 | 
| 53 |  | -training_epochs = 10 | 
| 54 |  | -dispaly_every = 1 | 
| 55 |  | -dropout_keep_prob = 0.5 | 
| 56 |  | - | 
| 57 |  | -batch_num = int(X_train.shape[0]/batch_size) | 
| 58 |  | - | 
| 59 |  | -sess = tf.Session() | 
| 60 |  | -sess.run(tf.global_variables_initializer()) | 
| 61 |  | -print("Starting training...") | 
| 62 |  | -for epoch in range(training_epochs): | 
| 63 |  | -    avg_cost = 0 | 
| 64 |  | -    for batch in range(batch_num): | 
| 65 |  | -        _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size], | 
| 66 |  | -                                    text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size], | 
| 67 |  | -                                    text_rnn_model.dropout_keep_prob:dropout_keep_prob}) | 
| 68 |  | -        avg_cost += cost | 
| 69 |  | -    if epoch % dispaly_every == 0: | 
| 70 |  | -        cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test, | 
| 71 |  | -                                    text_rnn_model.y: y_test, | 
| 72 |  | -                                    text_rnn_model.dropout_keep_prob: 1.0}) | 
| 73 |  | -        print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc)) | 
| 74 |  | -
 | 
|  | 1 | +""" | 
|  | 2 | +Test the TextRNN class | 
|  | 3 | +2016/12/22 | 
|  | 4 | +""" | 
|  | 5 | +import os | 
|  | 6 | +import sys | 
|  | 7 | +import numpy as np | 
|  | 8 | +import tensorflow as tf | 
|  | 9 | +from sklearn.model_selection import train_test_split | 
|  | 10 | +from tensorflow.contrib import learn | 
|  | 11 | + | 
|  | 12 | +from data_helpers import load_data_and_labels, batch_iter | 
|  | 13 | +from text_cnn import TextCNN | 
|  | 14 | +import pudb;pu.db | 
|  | 15 | + | 
|  | 16 | +# Load original data | 
|  | 17 | +path = sys.path[0] | 
|  | 18 | +pos_filename = path + "/data/rt-polarity.pos" | 
|  | 19 | +neg_filename = path + "/data/rt-polarity.neg" | 
|  | 20 | + | 
|  | 21 | +X_data, y_data = load_data_and_labels(pos_filename, neg_filename) | 
|  | 22 | +max_document_length = max([len(sen.split(" ")) for sen in X_data]) | 
|  | 23 | +print("Max_document_length:,", max_document_length) | 
|  | 24 | +# Create the vacabulary | 
|  | 25 | +vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) | 
|  | 26 | +# The idx data | 
|  | 27 | +x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32) | 
|  | 28 | +y = np.array(y_data, dtype=np.int32) | 
|  | 29 | +vocabulary_size = len(vocab_processor.vocabulary_) | 
|  | 30 | +print("The size of vocabulary:", vocabulary_size) | 
|  | 31 | +# Split the data | 
|  | 32 | +X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111) | 
|  | 33 | +print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape)) | 
|  | 34 | +print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape)) | 
|  | 35 | + | 
|  | 36 | +# The parameters of RNN | 
|  | 37 | +seq_len = X_train.shape[1] | 
|  | 38 | +vocab_size = vocabulary_size | 
|  | 39 | +embedding_size = 128 | 
|  | 40 | +filter_sizes = [2, 3, 4] | 
|  | 41 | +num_filters = 128 | 
|  | 42 | +num_classes = y_train.shape[1] | 
|  | 43 | +l2_reg_lambda = 0.0 | 
|  | 44 | + | 
|  | 45 | +# Construct RNN model | 
|  | 46 | +text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes= | 
|  | 47 | +                        filter_sizes, num_filters=num_filters, num_classes=num_classes) | 
|  | 48 | +loss = text_rnn_model.loss | 
|  | 49 | +train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) | 
|  | 50 | +accuracy = text_rnn_model.accuracy | 
|  | 51 | +# The parameters for training | 
|  | 52 | +batch_size = 64 | 
|  | 53 | +training_epochs = 10 | 
|  | 54 | +dispaly_every = 1 | 
|  | 55 | +dropout_keep_prob = 0.5 | 
|  | 56 | + | 
|  | 57 | +batch_num = int(X_train.shape[0]/batch_size) | 
|  | 58 | + | 
|  | 59 | +sess = tf.Session() | 
|  | 60 | +sess.run(tf.global_variables_initializer()) | 
|  | 61 | +print("Starting training...") | 
|  | 62 | +for epoch in range(training_epochs): | 
|  | 63 | +    avg_cost = 0 | 
|  | 64 | +    for batch in range(batch_num): | 
|  | 65 | +        _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size], | 
|  | 66 | +                                    text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size], | 
|  | 67 | +                                    text_rnn_model.dropout_keep_prob:dropout_keep_prob}) | 
|  | 68 | +        avg_cost += cost | 
|  | 69 | +    if epoch % dispaly_every == 0: | 
|  | 70 | +        cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test, | 
|  | 71 | +                                    text_rnn_model.y: y_test, | 
|  | 72 | +                                    text_rnn_model.dropout_keep_prob: 1.0}) | 
|  | 73 | +        print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc)) | 
|  | 74 | + | 
0 commit comments