diff --git a/captioning/__pycache__/__init__.cpython-38.pyc b/captioning/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..cf48a2d5
Binary files /dev/null and b/captioning/__pycache__/__init__.cpython-38.pyc differ
diff --git a/captioning/data/__pycache__/__init__.cpython-38.pyc b/captioning/data/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..e5c5dbe2
Binary files /dev/null and b/captioning/data/__pycache__/__init__.cpython-38.pyc differ
diff --git a/captioning/data/__pycache__/dataloader.cpython-38.pyc b/captioning/data/__pycache__/dataloader.cpython-38.pyc
new file mode 100644
index 00000000..dc50ffe8
Binary files /dev/null and b/captioning/data/__pycache__/dataloader.cpython-38.pyc differ
diff --git a/captioning/data/dataloader.py b/captioning/data/dataloader.py
index 7f2ed030..e1558b82 100644
--- a/captioning/data/dataloader.py
+++ b/captioning/data/dataloader.py
@@ -107,6 +107,7 @@ def __init__(self, opt):
         # load the json file which contains additional information about the dataset
         print('DataLoader loading json file: ', opt.input_json)
         self.info = json.load(open(self.opt.input_json))
+        self.images = self.info['images']
         if 'ix_to_word' in self.info:
             self.ix_to_word = self.info['ix_to_word']
             self.vocab_size = len(self.ix_to_word)
@@ -136,13 +137,12 @@ def __init__(self, opt):
         self.att_loader = HybridLoader(self.opt.input_att_dir, '.npz', in_memory=self.data_in_memory)
         self.box_loader = HybridLoader(self.opt.input_box_dir, '.npy', in_memory=self.data_in_memory)
 
-        self.num_images = len(self.info['images']) # self.label_start_ix.shape[0]
+        self.num_images = len(self.images) # self.label_start_ix.shape[0]
         print('read %d image features' %(self.num_images))
 
         # separate out indexes for each of the provided splits
         self.split_ix = {'train': [], 'val': [], 'test': []}
-        for ix in range(len(self.info['images'])):
-            img = self.info['images'][ix]
+        for ix, img in enumerate(self.images):
             if not 'split' in img:
                 self.split_ix['train'].append(ix)
                 self.split_ix['val'].append(ix)
@@ -156,14 +156,58 @@ def __init__(self, opt):
             elif opt.train_only == 0: # restval
                 self.split_ix['train'].append(ix)
 
+        if opt.data_augmentation:
+            self.seq_per_img_da = opt.seq_per_img_da
+
+            # load the json file which contains additional information about the dataset
+            print('DataLoader da loading json file: ', opt.input_json_da)
+            self.info_da = json.load(open(self.opt.input_json_da))
+            self.images_da = self.info_da['images']
+            # if 'ix_to_word' in self.info_da:
+            #     self.ix_to_word_da = self.info_da['ix_to_word']
+            #     self.vocab_size_da = len(self.ix_to_word_da)
+            #     print('da vocab size is ', self.vocab_size_da)
+
+            # open the hdf5 file
+            print('DataLoader da loading h5 file: ', opt.input_fc_dir_da, opt.input_att_dir_da, opt.input_label_h5_da)
+
+            if self.opt.input_label_h5 != 'none':
+                self.h5_label_file_da = h5py.File(self.opt.input_label_h5_da, 'r', driver='core')
+                # load in the sequence data
+                seq_size = self.h5_label_file_da['labels'].shape
+                self.label_da = self.h5_label_file_da['labels'][:]
+                self.seq_length_da = seq_size[1]
+                print('max sequence length in da data is', self.seq_length_da)
+                # load the pointers in full to RAM (should be small enough)
+                self.label_start_ix_da = self.h5_label_file_da['label_start_ix'][:]
+                self.label_end_ix_da = self.h5_label_file_da['label_end_ix'][:]
+            else:
+                self.seq_length_da = 1
+
+            self.fc_loader_da = HybridLoader(self.opt.input_fc_dir_da, '.npy', in_memory=self.data_in_memory)
+            self.att_loader_da = HybridLoader(self.opt.input_att_dir_da, '.npz', in_memory=self.data_in_memory)
+
+            self.num_images_da = len(self.images_da) # self.label_start_ix.shape[0]
+            print('read %d da image features' %(self.num_images_da))
+
+            for ix, img in enumerate(self.images_da):
+                # data augmentation only for training
+                if img['split'] == 'train' or img['split'] == 'restval':
+                    self.split_ix['train'].append(ix + self.num_images)
+
         print('assigned %d images to split train' %len(self.split_ix['train']))
         print('assigned %d images to split val' %len(self.split_ix['val']))
         print('assigned %d images to split test' %len(self.split_ix['test']))
 
+
     def get_captions(self, ix, seq_per_img):
         # fetch the sequence labels
-        ix1 = self.label_start_ix[ix] - 1 #label_start_ix starts from 1
-        ix2 = self.label_end_ix[ix] - 1
+        if self.opt.data_augmentation and ix >= self.num_images:
+            ix1 = self.label_start_ix_da[ix - self.num_images] - 1 #label_start_ix starts from 1
+            ix2 = self.label_end_ix_da[ix - self.num_images] - 1
+        else:
+            ix1 = self.label_start_ix[ix] - 1 #label_start_ix starts from 1
+            ix2 = self.label_end_ix[ix] - 1
         ncap = ix2 - ix1 + 1 # number of captions available for this image
         assert ncap > 0, 'an image does not have any label. this can be handled but right now isn\'t'
 
@@ -172,10 +216,16 @@ def get_captions(self, ix, seq_per_img):
             seq = np.zeros([seq_per_img, self.seq_length], dtype = 'int')
             for q in range(seq_per_img):
                 ixl = random.randint(ix1,ix2)
-                seq[q, :] = self.label[ixl, :self.seq_length]
+                if self.opt.data_augmentation and ix >= self.num_images:
+                    seq[q, :] = self.label_da[ixl, :self.seq_length]
+                else:
+                    seq[q, :] = self.label[ixl, :self.seq_length]
         else:
             ixl = random.randint(ix1, ix2 - seq_per_img + 1)
-            seq = self.label[ixl: ixl + seq_per_img, :self.seq_length]
+            if self.opt.data_augmentation and ix >= self.num_images:
+                seq = self.label_da[ixl: ixl + seq_per_img, :self.seq_length]
+            else:
+                seq = self.label[ixl: ixl + seq_per_img, :self.seq_length]
 
         return seq
 
@@ -210,15 +260,22 @@ def collate_func(self, batch, split):
             # Used for reward evaluation
             if hasattr(self, 'h5_label_file'):
                 # if there is ground truth
-                gts.append(self.label[self.label_start_ix[ix] - 1: self.label_end_ix[ix]])
+                if self.opt.data_augmentation and ix >= self.num_images:
+                    gts.append(self.label_da[self.label_start_ix_da[ix - self.num_images] - 1: self.label_end_ix_da[ix - self.num_images]])
+                else:
+                    gts.append(self.label[self.label_start_ix[ix] - 1: self.label_end_ix[ix]])
             else:
                 gts.append([])
         
             # record associated info as well
             info_dict = {}
             info_dict['ix'] = ix
-            info_dict['id'] = self.info['images'][ix]['id']
-            info_dict['file_path'] = self.info['images'][ix].get('file_path', '')
+            if self.opt.data_augmentation and ix >= self.num_images:
+                info_dict['id'] = self.images_da[ix - self.num_images]['id']
+                # info_dict['file_path'] = self.images_da[ix - self.num_images].get('file_path', '')
+            else:
+                info_dict['id'] = self.images[ix]['id']
+                info_dict['file_path'] = self.info['images'][ix].get('file_path', '')
             infos.append(info_dict)
 
         # #sort by att_feat length
@@ -264,16 +321,19 @@ def __getitem__(self, index):
         """
         ix, it_pos_now, wrapped = index #self.split_ix[index]
         if self.use_att:
-            att_feat = self.att_loader.get(str(self.info['images'][ix]['id']))
+            if self.opt.data_augmentation and ix >= self.num_images:
+                att_feat = self.att_loader_da.get(self.images_da[ix - self.num_images]['id'])
+            else:
+                att_feat = self.att_loader.get(str(self.images[ix]['id']))
             # Reshape to K x C
             att_feat = att_feat.reshape(-1, att_feat.shape[-1])
             if self.norm_att_feat:
                 att_feat = att_feat / np.linalg.norm(att_feat, 2, 1, keepdims=True)
             if self.use_box:
-                box_feat = self.box_loader.get(str(self.info['images'][ix]['id']))
+                box_feat = self.box_loader.get(str(self.images[ix]['id']))
                 # devided by image width and height
                 x1,y1,x2,y2 = np.hsplit(box_feat, 4)
-                h,w = self.info['images'][ix]['height'], self.info['images'][ix]['width']
+                h,w = self.images[ix]['height'], self.images[ix]['width']
                 box_feat = np.hstack((x1/w, y1/h, x2/w, y2/h, (x2-x1)*(y2-y1)/(w*h))) # question? x2-x1+1??
                 if self.norm_box_feat:
                     box_feat = box_feat / np.linalg.norm(box_feat, 2, 1, keepdims=True)
@@ -283,11 +343,14 @@ def __getitem__(self, index):
         else:
             att_feat = np.zeros((0,0), dtype='float32')
         if self.use_fc:
-            try:
-                fc_feat = self.fc_loader.get(str(self.info['images'][ix]['id']))
-            except:
+            # try:
+            if self.opt.data_augmentation and ix >= self.num_images:
+                fc_feat = self.fc_loader_da.get(str(self.images_da[ix - self.num_images]['id']))
+            else:
+                fc_feat = self.fc_loader.get(str(self.images[ix]['id']))
+            # except:
                 # Use average of attention when there is no fc provided (For bottomup feature)
-                fc_feat = att_feat.mean(0)
+                # fc_feat = att_feat.mean(0)
         else:
             fc_feat = np.zeros((0), dtype='float32')
         if hasattr(self, 'h5_label_file'):
@@ -299,7 +362,10 @@ def __getitem__(self, index):
                 ix, it_pos_now, wrapped)
 
     def __len__(self):
-        return len(self.info['images'])
+        if self.opt.data_augmentation:
+            return len(self.images) + len(self.images_da)
+        else:
+            return len(self.images)
 
 class DataLoader:
     def __init__(self, opt):
diff --git a/captioning/models/__pycache__/AoAModel.cpython-38.pyc b/captioning/models/__pycache__/AoAModel.cpython-38.pyc
new file mode 100644
index 00000000..7c09cb8b
Binary files /dev/null and b/captioning/models/__pycache__/AoAModel.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/AttModel.cpython-38.pyc b/captioning/models/__pycache__/AttModel.cpython-38.pyc
new file mode 100644
index 00000000..2395612c
Binary files /dev/null and b/captioning/models/__pycache__/AttModel.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/BertCapModel.cpython-38.pyc b/captioning/models/__pycache__/BertCapModel.cpython-38.pyc
new file mode 100644
index 00000000..4dd920f7
Binary files /dev/null and b/captioning/models/__pycache__/BertCapModel.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/CaptionModel.cpython-38.pyc b/captioning/models/__pycache__/CaptionModel.cpython-38.pyc
new file mode 100644
index 00000000..dfbc99f3
Binary files /dev/null and b/captioning/models/__pycache__/CaptionModel.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/FCModel.cpython-38.pyc b/captioning/models/__pycache__/FCModel.cpython-38.pyc
new file mode 100644
index 00000000..c4214186
Binary files /dev/null and b/captioning/models/__pycache__/FCModel.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/M2Transformer.cpython-38.pyc b/captioning/models/__pycache__/M2Transformer.cpython-38.pyc
new file mode 100644
index 00000000..24df91b5
Binary files /dev/null and b/captioning/models/__pycache__/M2Transformer.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/ShowTellModel.cpython-38.pyc b/captioning/models/__pycache__/ShowTellModel.cpython-38.pyc
new file mode 100644
index 00000000..561a02bc
Binary files /dev/null and b/captioning/models/__pycache__/ShowTellModel.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/TransformerModel.cpython-38.pyc b/captioning/models/__pycache__/TransformerModel.cpython-38.pyc
new file mode 100644
index 00000000..441cc364
Binary files /dev/null and b/captioning/models/__pycache__/TransformerModel.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/__init__.cpython-38.pyc b/captioning/models/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..cdaf1888
Binary files /dev/null and b/captioning/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/cachedTransformer.cpython-38.pyc b/captioning/models/__pycache__/cachedTransformer.cpython-38.pyc
new file mode 100644
index 00000000..25b04332
Binary files /dev/null and b/captioning/models/__pycache__/cachedTransformer.cpython-38.pyc differ
diff --git a/captioning/models/__pycache__/utils.cpython-38.pyc b/captioning/models/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 00000000..e3b72448
Binary files /dev/null and b/captioning/models/__pycache__/utils.cpython-38.pyc differ
diff --git a/captioning/modules/__pycache__/loss_wrapper.cpython-38.pyc b/captioning/modules/__pycache__/loss_wrapper.cpython-38.pyc
new file mode 100644
index 00000000..98df0a3e
Binary files /dev/null and b/captioning/modules/__pycache__/loss_wrapper.cpython-38.pyc differ
diff --git a/captioning/modules/__pycache__/losses.cpython-38.pyc b/captioning/modules/__pycache__/losses.cpython-38.pyc
new file mode 100644
index 00000000..260eb6f6
Binary files /dev/null and b/captioning/modules/__pycache__/losses.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/__init__.cpython-38.pyc b/captioning/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..390d0779
Binary files /dev/null and b/captioning/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/config.cpython-38.pyc b/captioning/utils/__pycache__/config.cpython-38.pyc
new file mode 100644
index 00000000..d5690ae1
Binary files /dev/null and b/captioning/utils/__pycache__/config.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/eval_utils.cpython-38.pyc b/captioning/utils/__pycache__/eval_utils.cpython-38.pyc
new file mode 100644
index 00000000..43c8e924
Binary files /dev/null and b/captioning/utils/__pycache__/eval_utils.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/misc.cpython-38.pyc b/captioning/utils/__pycache__/misc.cpython-38.pyc
new file mode 100644
index 00000000..90eed6bc
Binary files /dev/null and b/captioning/utils/__pycache__/misc.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/opts.cpython-38.pyc b/captioning/utils/__pycache__/opts.cpython-38.pyc
new file mode 100644
index 00000000..ddb2ab62
Binary files /dev/null and b/captioning/utils/__pycache__/opts.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/resnet.cpython-38.pyc b/captioning/utils/__pycache__/resnet.cpython-38.pyc
new file mode 100644
index 00000000..2ed6fcca
Binary files /dev/null and b/captioning/utils/__pycache__/resnet.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/resnet_utils.cpython-38.pyc b/captioning/utils/__pycache__/resnet_utils.cpython-38.pyc
new file mode 100644
index 00000000..cd133081
Binary files /dev/null and b/captioning/utils/__pycache__/resnet_utils.cpython-38.pyc differ
diff --git a/captioning/utils/__pycache__/rewards.cpython-38.pyc b/captioning/utils/__pycache__/rewards.cpython-38.pyc
new file mode 100644
index 00000000..0f12b33d
Binary files /dev/null and b/captioning/utils/__pycache__/rewards.cpython-38.pyc differ
diff --git a/captioning/utils/eval_utils.py b/captioning/utils/eval_utils.py
index c4bc7f44..78abffc8 100644
--- a/captioning/utils/eval_utils.py
+++ b/captioning/utils/eval_utils.py
@@ -17,12 +17,9 @@
 from . import misc as utils
 
 # load coco-caption if available
-try:
-    sys.path.append("coco-caption")
-    from pycocotools.coco import COCO
-    from pycocoevalcap.eval import COCOEvalCap
-except:
-    print('Warning: coco-caption not available')
+from pycocotools.coco import COCO
+from pycocoevalcap.eval import COCOEvalCap
+
 
 bad_endings = ['a','an','the','in','for','at','of','with','before','after','on','upon','near','to','is','are','am']
 bad_endings += ['the']
diff --git a/captioning/utils/opts.py b/captioning/utils/opts.py
index 282bd72c..f260a8d2 100644
--- a/captioning/utils/opts.py
+++ b/captioning/utils/opts.py
@@ -38,6 +38,20 @@ def parse_opt():
     parser.add_argument('--cached_tokens', type=str, default='coco-train-idxs',
                     help='Cached token file for calculating cider score during self critical training.')
 
+    # Data Augmentation
+    parser.add_argument('--data_augmentation', type=bool, default=False,
+                    help='True if use generated data as augmentation')
+    parser.add_argument('--input_json_da', type=str, default='data/coco.json',
+                    help='path to the json file containing additional info and vocab')
+    parser.add_argument('--input_fc_dir_da', type=str, default='data/cocotalk_fc',
+                    help='path to the directory containing the preprocessed fc feats')
+    parser.add_argument('--input_att_dir_da', type=str, default='data/cocotalk_att',
+                    help='path to the directory containing the preprocessed att feats')
+    parser.add_argument('--input_label_h5_da', type=str, default='data/coco_label.h5',
+                    help='path to the h5file containing the preprocessed dataset')
+    parser.add_argument('--seq_per_img_da', type=int, default=5,
+                    help='number of captions to sample for each image during training. Done for efficiency since CNN forward pass is expensive. E.g. coco has 5 sents/image')
+
     # Model settings
     parser.add_argument('--caption_model', type=str, default="show_tell",
                     help='show_tell, show_attend_tell, all_img, fc, att2in, att2in2, att2all2, adaatt, adaattmo, updown, stackatt, denseatt, transformer')
diff --git a/configs/fc.yml b/configs/fc.yml
index 979b69ee..0359e186 100644
--- a/configs/fc.yml
+++ b/configs/fc.yml
@@ -1,16 +1,24 @@
 caption_model: newfc
-input_json: data/cocotalk.json
-input_fc_dir: data/cocotalk_fc
-input_att_dir: data/cocotalk_att
-input_label_h5: data/cocotalk_label.h5
+input_json: /data/share/image-caption/cocotalk.json
+input_fc_dir: /data/share/image-caption/cocotalk_fc
+input_att_dir: /data/share/image-caption/cocotalk_att
+input_label_h5: /data/share/image-caption/cocotalk_label.h5
 learning_rate: 0.0005
 learning_rate_decay_start: 0
 scheduled_sampling_start: 0
-# checkpoint_path: $ckpt_path
-# $start_from
+checkpoint_path: /data/private/mxy/exp/image-caption/da/coco_lostgan_sub
+# start_from: /data/share/image-caption/fc_nsc
 language_eval: 1
-save_checkpoint_every: 3000
+save_checkpoint_every: 1000
 val_images_use: 5000
 
-batch_size: 10
-max_epochs: 30
\ No newline at end of file
+batch_size: 100
+max_epochs: 30
+
+# configs for da
+data_augmentation: True
+input_json_da: /data/share/image-caption/cocotalk_da_lostgan_sub.json
+input_fc_dir_da: /data/share/image-caption/cocotalk_da_lostgan_sub_fc
+input_att_dir_da: /data/share/image-caption/cocotalk_da_lostgan_sub_att
+input_label_h5_da: /data/share/image-caption/cocotalk_da_lostgan_label.h5
+seq_per_img_da: 1
\ No newline at end of file
diff --git a/prepro.sh b/prepro.sh
new file mode 100644
index 00000000..752c508f
--- /dev/null
+++ b/prepro.sh
@@ -0,0 +1,49 @@
+set -ex
+
+DATA_ROOT=/data/share/image-caption
+MODEL=/data/private/mxy/data/image-caption
+
+# original
+JSON=$DATA_ROOT/dataset_flickr30k.json
+OUT_DIR=$DATA_ROOT/f30ktalk
+IMG_ROOT=/data/share/UNITER/origin_imgs/flickr30k/flickr30k-images
+
+# da
+# JSON_DA=$DATA_ROOT/dataset_coco.json
+JSON_DA=$DATA_ROOT/dataset_coco_lostgan_sub.json
+# JSON_DA=/data/share/data/coco2017/annotations/dataset_coco_lostgan.json # sub
+OUT_DIR_DA=$DATA_ROOT/cocotalk_da_lostgan_sub
+# IMG_ROOT_DA=/data/private/mxy/code/T2I_CL/DM-GAN+CL/output/coco_DMGAN_2021_08_15_14_19_42/Model/netG_epoch_120/f30k
+# IMG_ROOT_DA=/data/share/Seg-Backtranslation/data/gen_imgs/train2017_2
+# IMG_ROOT_DA=/data/share/Seg-Backtranslation/data/gen_imgs_lostgan_train
+IMG_ROOT_DA=/data/share/Seg-Backtranslation/data/gen_imgs_lostgan_train_sub
+LBL_JSON_DA=$DATA_ROOT/cocotalk_da_lostgan_sub.json
+LBL_H5_DA=$DATA_ROOT/cocotalk_da_lostgan_sub
+
+# pre labels for da
+python scripts/prepro_labels_da.py \
+    --input_json $JSON_DA \
+    --output_json $LBL_JSON_DA \
+    --output_h5 $LBL_H5_DA
+
+# pre ngram for da
+# python scripts/prepro_ngrams.py \
+#     --input_json $JSON_DA \
+#     --dict_json $LBL_JSON_DA \
+#     --output_pkl $DATA_ROOT/f30k_da-train \
+#     --split train
+
+
+# pre feats for da
+# python scripts/prepro_feats_da.py \
+#     --input_json $JSON_DA \
+#     --output_dir $OUT_DIR_DA \
+#     --images_root $IMG_ROOT_DA \
+#     --model_root $MODEL
+
+# pre feats
+# python scripts/prepro_feats.py \
+#     --input_json $JSON \
+#     --output_dir $OUT_DIR \
+#     --images_root $IMG_ROOT \
+#     --model_root $MODEL
\ No newline at end of file
diff --git a/scripts/prepro_feats.py b/scripts/prepro_feats.py
index a59ccb43..d39e2314 100644
--- a/scripts/prepro_feats.py
+++ b/scripts/prepro_feats.py
@@ -34,7 +34,8 @@
                 #trn.ToTensor(),
                 trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 ])
-
+import sys
+sys.path.append('/data/private/mxy/code/ImageCaptioning.pytorch')
 from captioning.utils.resnet_utils import myResnet
 import captioning.utils.resnet as resnet
 
diff --git a/scripts/prepro_feats_da.py b/scripts/prepro_feats_da.py
new file mode 100644
index 00000000..9ed572f9
--- /dev/null
+++ b/scripts/prepro_feats_da.py
@@ -0,0 +1,136 @@
+"""
+Preprocess a raw json dataset into features files for use in data_loader.py
+
+Input: json file that has the form
+[{ file_path: 'path/img.jpg', captions: ['a caption', ...] }, ...]
+example element in this list would look like
+{'captions': [u'A man with a red helmet on a small moped on a dirt road. ', u'Man riding a motor bike on a dirt road on the countryside.', u'A man riding on the back of a motorcycle.', u'A dirt path with a young person on a motor bike rests to the foreground of a verdant area with a bridge and a background of cloud-wreathed mountains. ', u'A man in a red shirt and a red hat is on a motorcycle on a hill side.'], 'file_path': u'val2014/COCO_val2014_000000391895.jpg', 'id': 391895}
+
+This script reads this json, does some basic preprocessing on the captions
+(e.g. lowercase, etc.), creates a special UNK token, and encodes everything to arrays
+
+Output: two folders of features
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import json
+import argparse
+from random import shuffle, seed
+import string
+# non-standard dependencies:
+import h5py
+from six.moves import cPickle
+import numpy as np
+import torch
+import torchvision.models as models
+import skimage.io
+
+from torchvision import transforms as trn
+preprocess = trn.Compose([
+                #trn.ToTensor(),
+                trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+import sys
+sys.path.append('/data/private/mxy/code/ImageCaptioning.pytorch')
+from captioning.utils.resnet_utils import myResnet
+import captioning.utils.resnet as resnet
+
+
+def main(params):
+    net = getattr(resnet, params['model'])()
+    net.load_state_dict(torch.load(os.path.join(params['model_root'],params['model']+'.pth')))
+    my_resnet = myResnet(net)
+    my_resnet.cuda()
+    my_resnet.eval()
+
+    imgs = json.load(open(params['input_json'], 'r'))
+    imgs = imgs['images']
+    N = len(imgs)
+
+    seed(123) # make reproducible
+
+    dir_fc = params['output_dir']+'_fc'
+    dir_att = params['output_dir']+'_att'
+    if not os.path.isdir(dir_fc):
+        os.mkdir(dir_fc)        
+    if not os.path.isdir(dir_att):
+        os.mkdir(dir_att)
+
+    total_cnt = 0
+    for i,img in enumerate(imgs):
+        # flickr
+        # if i != img['imgid']:
+        #     print(f"Error: id {img['imgid']} != index {i}")
+        #     continue
+        # for j in range(len(img['sentences'])):
+            # img_name = f"{params['images_root']}/{i}/0_s_{j}_g2.png"
+            # if not os.path.isfile(img_name):
+            #     print(f'{img_name} not exist!')
+            #     break
+            # # load the image
+            # I = skimage.io.imread(img_name)
+            # # handle grayscale input images
+            # if len(I.shape) == 2:
+            #     I = I[:,:,np.newaxis]
+            #     I = np.concatenate((I,I,I), axis=2)
+
+            # I = I.astype('float32')/255.0
+            # I = torch.from_numpy(I.transpose([2,0,1])).cuda()
+            # I = preprocess(I)
+            # with torch.no_grad():
+            #     tmp_fc, tmp_att = my_resnet(I, params['att_size'])
+            # # write to pkl
+            # np.save(f"{dir_fc}/{i}_{j}", tmp_fc.data.cpu().float().numpy())
+            # np.savez_compressed(f"{dir_att}/{i}_{j}", feat=tmp_att.data.cpu().float().numpy())
+            # total_cnt += 1
+
+        # coco
+        img_name = f"{params['images_root']}/{str(img['cocoid']).zfill(12)}.jpg"
+        if not os.path.isfile(img_name):
+            print(f'{img_name} not exist!')
+            continue
+        # load the image
+        I = skimage.io.imread(img_name)
+        # handle grayscale input images
+        if len(I.shape) == 2:
+            I = I[:,:,np.newaxis]
+            I = np.concatenate((I,I,I), axis=2)
+
+        I = I.astype('float32')/255.0
+        I = torch.from_numpy(I.transpose([2,0,1])).cuda()
+        I = preprocess(I)
+        with torch.no_grad():
+            tmp_fc, tmp_att = my_resnet(I, params['att_size'])
+        # write to pkl
+        np.save(f"{dir_fc}/{img['cocoid']}", tmp_fc.data.cpu().float().numpy())
+        np.savez_compressed(f"{dir_att}/{img['cocoid']}", feat=tmp_att.data.cpu().float().numpy())
+        total_cnt += 1
+
+        if i % 1000 == 0:
+            print('processing %d/%d (%.2f%% done) total images: %d' % (i, N, i*100.0/N, total_cnt))
+    print('wrote ', params['output_dir'])
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    # input json
+    parser.add_argument('--input_json', required=True, help='input json file to process into hdf5')
+    parser.add_argument('--output_dir', default='data', help='output h5 file')
+
+    # options
+    parser.add_argument('--batch_size', default=14, type=int, help='14x14 or 7x7')
+    parser.add_argument('--images_root', default='', help='root location in which images are stored, to be prepended to file_path in input json')
+    parser.add_argument('--att_size', default=14, type=int, help='14x14 or 7x7')
+    parser.add_argument('--model', default='resnet101', type=str, help='resnet101, resnet152')
+    parser.add_argument('--model_root', default='./data/imagenet_weights', type=str, help='model root')
+
+    args = parser.parse_args()
+    params = vars(args) # convert to ordinary dict
+    print('parsed input parameters:')
+    print(json.dumps(params, indent = 2))
+    main(params)
diff --git a/scripts/prepro_labels_da.py b/scripts/prepro_labels_da.py
new file mode 100644
index 00000000..4ea92c20
--- /dev/null
+++ b/scripts/prepro_labels_da.py
@@ -0,0 +1,164 @@
+"""
+Preprocess a raw json dataset into hdf5/json files for use in data_loader.py
+
+Input: json file that has the form
+[{ file_path: 'path/img.jpg', captions: ['a caption', ...] }, ...]
+example element in this list would look like
+{'captions': [u'A man with a red helmet on a small moped on a dirt road. ', u'Man riding a motor bike on a dirt road on the countryside.', u'A man riding on the back of a motorcycle.', u'A dirt path with a young person on a motor bike rests to the foreground of a verdant area with a bridge and a background of cloud-wreathed mountains. ', u'A man in a red shirt and a red hat is on a motorcycle on a hill side.'], 'file_path': u'val2014/COCO_val2014_000000391895.jpg', 'id': 391895}
+
+This script reads this json, does some basic preprocessing on the captions
+(e.g. lowercase, etc.), creates a special UNK token, and encodes everything to arrays
+
+Output: a json file and an hdf5 file
+The hdf5 file contains several fields:
+/labels is (M,max_length) uint32 array of encoded labels, zero padded
+/label_start_ix and /label_end_ix are (N,) uint32 arrays of pointers to the 
+  first and last indices (in range 1..M) of labels for each image
+/label_length stores the length of the sequence for each of the M sequences
+
+The json file has a dict that contains:
+- an 'ix_to_word' field storing the vocab in form {ix:'word'}, where ix is 1-indexed
+- an 'images' field that is a list holding auxiliary information for each image, 
+  such as in particular the 'split' it was assigned to.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import json
+import argparse
+from random import shuffle, seed
+import string
+# non-standard dependencies:
+import h5py
+import numpy as np
+import torch
+import torchvision.models as models
+import skimage.io
+from PIL import Image
+
+
+def encode_captions(imgs, params, wtoi):
+    """ 
+    encode all captions into one large array, which will be 1-indexed.
+    also produces label_start_ix and label_end_ix which store 1-indexed 
+    and inclusive (Lua-style) pointers to the first and last caption for
+    each image in the dataset.
+    """
+
+    max_length = params['max_length']
+    N = len(imgs)
+    M = sum(len(img['final_captions']) for img in imgs) # total number of captions
+
+    label_arrays = []
+    label_start_ix = np.zeros(N, dtype='uint32') # note: these will be one-indexed
+    label_end_ix = np.zeros(N, dtype='uint32')
+    label_length = np.zeros(M, dtype='uint32')
+    caption_counter = 0
+    counter = 1
+    for i,img in enumerate(imgs):
+        n = len(img['final_captions'])
+        assert n > 0, 'error: some image has no captions'
+
+        Li = np.zeros((n, max_length), dtype='uint32')
+        for j,s in enumerate(img['final_captions']):
+            label_length[caption_counter] = min(max_length, len(s)) # record the length of this sequence
+            caption_counter += 1
+            for k,w in enumerate(s):
+                if k < max_length:
+                    Li[j,k] = wtoi[w]
+
+        # note: word indices are 1-indexed, and captions are padded with zeros
+        label_arrays.append(Li)
+        label_start_ix[i] = counter
+        label_end_ix[i] = counter + n - 1
+        
+        counter += n
+    
+    L = np.concatenate(label_arrays, axis=0) # put all the labels together
+    assert L.shape[0] == M, 'lengths don\'t match? that\'s weird'
+    assert np.all(label_length > 0), 'error: some caption had no words?'
+
+    print('encoded captions to array of size ', L.shape)
+    return L, label_start_ix, label_end_ix, label_length
+
+
+def main(params):
+
+    imgs = json.load(open(params['input_json'], 'r'))
+    imgs = imgs['images']
+
+    seed(123) # make reproducible
+    
+    # create the vocab
+    # vocab = build_vocab(imgs, params)
+    with open('/data/share/image-caption/cocotalk.json') as f:
+        talk = json.load(f)
+    itow = talk['ix_to_word']
+    vocab = list(itow.values())
+    itow = {i+1:w for i,w in enumerate(vocab)} # a 1-indexed vocab translation table
+    wtoi = {w:i+1 for i,w in enumerate(vocab)} # inverse table
+
+    for img in imgs:
+        img['final_captions'] = []
+        for sent in img['sentences']:
+            txt = sent['tokens']
+            caption = [w if w in vocab else 'UNK' for w in txt]
+            img['final_captions'].append(caption)
+    
+    # encode captions in large arrays, ready to ship to hdf5 file
+    L, label_start_ix, label_end_ix, label_length = encode_captions(imgs, params, wtoi)
+
+    # create output h5 file
+    N = len(imgs)
+    f_lb = h5py.File(params['output_h5']+'_label.h5', "w")
+    f_lb.create_dataset("labels", dtype='uint32', data=L)
+    f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix)
+    f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix)
+    f_lb.create_dataset("label_length", dtype='uint32', data=label_length)
+    f_lb.close()
+
+    # create output json file
+    out = {}
+    out['ix_to_word'] = itow # encode the (1-indexed) vocab
+    out['images'] = []
+    for i,img in enumerate(imgs):
+        
+        jimg = {}
+        jimg['split'] = img['split']
+        if 'filename' in img: jimg['file_path'] = os.path.join(img.get('filepath', ''), img['filename']) # copy it over, might need
+        if 'cocoid' in img:
+            jimg['id'] = img['cocoid'] # copy over & mantain an id, if present (e.g. coco ids, useful)
+        elif 'id' in img:
+            jimg['id'] = img['id']
+
+        if params['images_root'] != '':
+            with Image.open(os.path.join(params['images_root'], img['filepath'], img['filename'])) as _img:
+                jimg['width'], jimg['height'] = _img.size
+
+        out['images'].append(jimg)
+    
+    json.dump(out, open(params['output_json'], 'w'))
+    print('wrote ', params['output_json'])
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    # input json
+    parser.add_argument('--input_json', required=True, help='input json file to process into hdf5')
+    parser.add_argument('--output_json', default='data.json', help='output json file')
+    parser.add_argument('--output_h5', default='data', help='output h5 file')
+    parser.add_argument('--images_root', default='', help='root location in which images are stored, to be prepended to file_path in input json')
+
+    # options
+    parser.add_argument('--max_length', default=16, type=int, help='max length of a caption, in number of words. captions longer than this get clipped.')
+    parser.add_argument('--word_count_threshold', default=5, type=int, help='only words that occur more than this number of times will be put in vocab')
+
+    args = parser.parse_args()
+    params = vars(args) # convert to ordinary dict
+    print('parsed input parameters:')
+    print(json.dumps(params, indent = 2))
+    main(params)
diff --git a/scripts/prepro_ngrams.py b/scripts/prepro_ngrams.py
index f7cdce47..d1328193 100644
--- a/scripts/prepro_ngrams.py
+++ b/scripts/prepro_ngrams.py
@@ -6,6 +6,8 @@
 import json
 import argparse
 from six.moves import cPickle
+import sys
+sys.path.append('/data/private/mxy/code/ImageCaptioning.pytorch')
 import captioning.utils.misc as utils
 from collections import defaultdict
 
diff --git a/scripts/write_coco_da.py b/scripts/write_coco_da.py
new file mode 100644
index 00000000..4eeea08c
--- /dev/null
+++ b/scripts/write_coco_da.py
@@ -0,0 +1,85 @@
+import os
+import copy
+import json
+
+def write_json_fs(root, ratio):
+    '''
+    write json for fewshot
+    '''
+    with open(f'{root}/dataset_coco.json', 'r') as fin:
+        dataset = json.load(fin)
+    images = dataset['images']
+
+    num_imgs = int(len(images) * ratio)
+
+    dataset_da = {
+        'dataset': f'coco_da_{ratio}',
+        'images': [],
+    }
+
+    i = 0
+    for img in images:
+        if img['split'] == 'train' or img['split'] == 'restval':
+            dataset_da['images'].append(img)
+            i += 1
+            if i == num_imgs:
+                break
+
+    with open(f'{root}/dataset_coco_fs.json', 'w') as fout:
+        json.dump(dataset_da, fout)
+
+def write_json_lostgan(root, img_root):
+    '''
+    write json for LostGAN which only generate about 70k images
+    '''
+    with open(f'/data/share/data/coco2017/annotations/dataset_coco_lostgan.json', 'r') as fin:
+        dataset = json.load(fin)
+    images = dataset['images']
+
+    dataset_da = {
+        'dataset': f'coco_da_lostgan',
+        'images': [],
+    }
+
+    for img in images:
+        img_name = f"{img_root}/{str(img['cocoid']).zfill(12)}.jpg"
+        if os.path.isfile(img_name):
+            new_img = copy.deepcopy(img)
+            new_img['sentences'] = [img['sentences']]
+            dataset_da['images'].append(new_img)
+
+    print(f"wrote {len(dataset_da['images'])} images")
+
+    with open(f'{root}/dataset_coco_lostgan_sub.json', 'w') as fout:
+        json.dump(dataset_da, fout)
+
+def write_json(root, ratio):
+    '''
+    write json for da
+    param: ratio  # of synthetic data / # of real data
+                  1:1 or 2:1 or 5:1 as usual
+    '''
+    with open(f'{root}/dataset_coco.json', 'r') as fin:
+        dataset = json.load(fin)
+    images = dataset['images']
+
+    dataset_da = {
+        'dataset': f'coco_da_{ratio}',
+        'images': copy.deepcopy(images),
+    }
+
+    for i in range(1, ratio):
+        for img in images:
+            new_img = copy.deepcopy(img)
+            new_img['cocoid'] = f"{img['cocoid']}_{i}"
+            dataset_da['images'].append(new_img)
+
+    with open(f'{root}/dataset_coco_da_{ratio}.json', 'w') as fout:
+        json.dump(dataset_da, fout)
+
+if __name__ == '__main__':
+    root = '/data/share/image-caption'
+    img_root = '/data/share/Seg-Backtranslation/data/gen_imgs_lostgan_train_sub'
+    # ratio = 0.1
+    # write_json_fs(root, ratio)
+    write_json_lostgan(root, img_root)
\ No newline at end of file
diff --git a/scripts/write_f30k_da.py b/scripts/write_f30k_da.py
new file mode 100644
index 00000000..7e664744
--- /dev/null
+++ b/scripts/write_f30k_da.py
@@ -0,0 +1,36 @@
+import json
+
+def write_json(root, img_num):
+    '''
+    split original json file for da
+    param: img_num  # of original images used for generation. 
+                    # of generated images is supposed to be 5*img_num since 5 captions were given per image
+    '''
+    with open(f'{root}/dataset_flickr30k.json', 'r') as fin:
+        dataset = json.load(fin)
+    images = dataset['images'][:img_num]
+
+    dataset_da = {
+        'dataset': 'flickr30k_da',
+        'images': []
+    }
+    for img in images:
+        for i,s in enumerate(img['sentences']):
+            new_img = {
+                'sentids': [img['sentids'][i]],
+                'imgid': img['imgid'],
+                'sentences': [s],
+                'split': img['split'],
+                'id': f"{img['imgid']}_{i}"
+            }
+            dataset_da['images'].append(new_img)
+
+    with open(f'{root}/dataset_flickr30k_da.json', 'w') as fout:
+        json.dump(dataset_da, fout)
+
+if __name__ == '__main__':
+    # root = '/data/private/mxy/code/T2I_CL/DM-GAN+CL/output/coco_DMGAN_2021_08_15_14_19_42/Model/netG_epoch_120/f30k'
+    # out_path = '/data/private/mxy/data'
+    root = '/data/share/image-caption'
+    img_num = 22684
+    write_json(root, img_num)
\ No newline at end of file
diff --git a/tools/train.py b/tools/train.py
index 4d015a81..49e1944d 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -14,7 +14,9 @@
 from six.moves import cPickle
 import traceback
 from collections import defaultdict
-
+# for import error
+import sys
+sys.path.append('/data/private/mxy/code/ImageCaptioning.pytorch')
 import captioning.utils.opts as opts
 import captioning.models as models
 from captioning.data.dataloader import *