|
1 | | -# to use CPU uncomment below code |
2 | | -# import os |
3 | | -# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 |
4 | | -# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
5 | | - |
6 | | -# import tensorflow as tf |
7 | | - |
8 | | -# config = tf.ConfigProto(intra_op_parallelism_threads=5, |
9 | | -# inter_op_parallelism_threads=5, |
10 | | -# allow_soft_placement=True, |
11 | | -# device_count = {'CPU' : 1, |
12 | | -# 'GPU' : 0} |
13 | | -# ) |
14 | | - |
15 | | - |
16 | | -from keras.preprocessing.text import Tokenizer |
17 | | -from keras.preprocessing.sequence import pad_sequences |
18 | | -from keras.utils import to_categorical |
19 | | -from keras.callbacks import ModelCheckpoint, TensorBoard |
| 1 | +import tensorflow as tf |
| 2 | +gpus = tf.config.experimental.list_physical_devices('GPU') |
| 3 | +if gpus: |
| 4 | + # only use GPU memory that we need, not allocate all the GPU memory |
| 5 | + tf.config.experimental.set_memory_growth(gpus[0], enable=True) |
| 6 | + |
| 7 | +from tensorflow.keras.preprocessing.text import Tokenizer |
| 8 | +from tensorflow.keras.preprocessing.sequence import pad_sequences |
| 9 | +from tensorflow.keras.utils import to_categorical |
| 10 | +from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard |
20 | 11 | from sklearn.model_selection import train_test_split |
21 | 12 | import time |
22 | 13 | import numpy as np |
23 | 14 | import pickle |
24 | 15 |
|
25 | | -from utils import get_embedding_vectors, get_model, SEQUENCE_LENGTH, EMBEDDING_SIZE, TEST_SIZE |
26 | | -from utils import BATCH_SIZE, EPOCHS, int2label, label2int |
| 16 | +from utils import get_model, SEQUENCE_LENGTH, TEST_SIZE |
| 17 | +from utils import BATCH_SIZE, EPOCHS, label2int |
27 | 18 |
|
28 | 19 |
|
29 | 20 | def load_data(): |
@@ -69,26 +60,25 @@ def load_data(): |
69 | 60 |
|
70 | 61 | y = [ label2int[label] for label in y ] |
71 | 62 | y = to_categorical(y) |
72 | | - |
73 | 63 | print(y[0]) |
74 | 64 |
|
75 | 65 | # split and shuffle |
76 | 66 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=7) |
77 | | - |
| 67 | +# print our data shapes |
| 68 | +print("X_train.shape:", X_train.shape) |
| 69 | +print("X_test.shape:", X_test.shape) |
| 70 | +print("y_train.shape:", y_train.shape) |
| 71 | +print("y_test.shape:", y_test.shape) |
78 | 72 | # constructs the model with 128 LSTM units |
79 | 73 | model = get_model(tokenizer=tokenizer, lstm_units=128) |
80 | 74 |
|
81 | 75 | # initialize our ModelCheckpoint and TensorBoard callbacks |
82 | 76 | # model checkpoint for saving best weights |
83 | | -model_checkpoint = ModelCheckpoint("results/spam_classifier_{val_loss:.2f}", save_best_only=True, |
| 77 | +model_checkpoint = ModelCheckpoint("results/spam_classifier_{val_loss:.2f}.h5", save_best_only=True, |
84 | 78 | verbose=1) |
85 | 79 | # for better visualization |
86 | 80 | tensorboard = TensorBoard(f"logs/spam_classifier_{time.time()}") |
87 | | -# print our data shapes |
88 | | -print("X_train.shape:", X_train.shape) |
89 | | -print("X_test.shape:", X_test.shape) |
90 | | -print("y_train.shape:", y_train.shape) |
91 | | -print("y_test.shape:", y_test.shape) |
| 81 | + |
92 | 82 | # train the model |
93 | 83 | model.fit(X_train, y_train, validation_data=(X_test, y_test), |
94 | 84 | batch_size=BATCH_SIZE, epochs=EPOCHS, |
|
0 commit comments