diff --git a/.gitignore b/.gitignore index e1b77839b..f297d1f09 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ repos.cfg repos_init.cfg nvtool* +.idea/ \ No newline at end of file diff --git a/PyTorch/Detection/SSD/Dockerfile b/PyTorch/Detection/SSD/Dockerfile index 822683b70..721aa1b62 100755 --- a/PyTorch/Detection/SSD/Dockerfile +++ b/PyTorch/Detection/SSD/Dockerfile @@ -4,6 +4,10 @@ FROM ${FROM_IMAGE_NAME} # Set working directory WORKDIR /workspace/ssd +# Add custom CA certificates +# Ensure that my_custom_ca.crt is in the same context directory as your Dockerfile +RUN update-ca-certificates + # Copy the model files COPY . . diff --git a/PyTorch/Detection/SSD/__init__.py b/PyTorch/Detection/SSD/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/PyTorch/Detection/SSD/fpr_trp_eval.py b/PyTorch/Detection/SSD/fpr_trp_eval.py new file mode 100644 index 000000000..18afe18e0 --- /dev/null +++ b/PyTorch/Detection/SSD/fpr_trp_eval.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +""" Compute False Positive and True Positive Rates from output jsons folder + + Usage: + - python3 fpr_trp_eval.py + + Copyright 2023-, Cosmo Intelligent Medical Devices +""" +import json +import os +import sys + + +def fpr_tpr(base_path): + """ + Compute False Positive and True Positive Rates from output jsons folder. + + Args: + base_path (str): The folder path where annotation JSON files are located. + + Returns: + """ + # Specify the path to the JSON files saved by SSD evaluation code + json_results_path = os.path.join(base_path, "predictions.json") + json_gt_path = os.path.join(base_path, "ground_truth.json") + + threshold = 0.5 # threshold on prediction class scores + + # Load the JSON file + with open(json_results_path, "r") as json_file: + prediction_results = json.load(json_file) + print(f"Opened prediction file {json_results_path}") + with open(json_gt_path, "r") as json_file: + gt_file = json.load(json_file) + print(f"Opened ground truth file {json_gt_path}") + + id_of_negative_frames = [] + id_of_frames_with_predictions = [] + id_of_positive_frames = [] + for gt in gt_file: + if len(gt['annotations']) == 0: + id_of_negative_frames.append(gt['id']) + else: + id_of_positive_frames.append(gt['id']) + + for pred in prediction_results: + if pred["score"] > threshold: + id_of_frames_with_predictions.append(pred['id']) + + # Calculating FPR + set1 = set(id_of_negative_frames) + set2 = set(id_of_frames_with_predictions) + intersection = set1 & set2 + print("False Positive Rate: ", len(intersection) / len(id_of_negative_frames)) + + # Calculating TPR + true_positives = [pred for pred in prediction_results if pred["score"] > threshold and + pred['id'] in id_of_positive_frames] + tpr = len(true_positives) / len(id_of_positive_frames) if id_of_positive_frames else 0 + print("True Positive Rate: ", tpr) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: fpr_trp_eval.py ") + sys.exit(1) + + folder_path = sys.argv[1] + fpr_tpr(folder_path) diff --git a/PyTorch/Detection/SSD/main.py b/PyTorch/Detection/SSD/main.py index 4c3fc3e69..52af7f32c 100644 --- a/PyTorch/Detection/SSD/main.py +++ b/PyTorch/Detection/SSD/main.py @@ -19,13 +19,16 @@ import numpy as np from torch.optim.lr_scheduler import MultiStepLR import torch.utils.data.distributed +import torch.multiprocessing +torch.multiprocessing.set_sharing_strategy('file_system') from ssd.model import SSD300, ResNet, Loss from ssd.utils import dboxes300_coco, Encoder from ssd.logger import Logger, BenchLogger from ssd.evaluate import evaluate from ssd.train import train_loop, tencent_trick, load_checkpoint, benchmark_train_loop, benchmark_inference_loop -from ssd.data import get_train_loader, get_val_dataset, get_val_dataloader, get_coco_ground_truth +from ssd.data import get_train_loader, get_val_dataset, get_val_dataloader, get_coco_ground_truth_validation, \ + get_coco_ground_truth_test, get_test_dataset import dllogger as DLLogger @@ -35,6 +38,7 @@ except ImportError: raise ImportError("Please install APEX from https://github.com/nvidia/apex") + def generate_mean_std(args): mean_val = [0.485, 0.456, 0.406] std_val = [0.229, 0.224, 0.225] @@ -54,7 +58,13 @@ def make_parser(): parser = ArgumentParser(description="Train Single Shot MultiBox Detector" " on COCO") parser.add_argument('--data', '-d', type=str, default='/coco', required=True, - help='path to test and training data files') + help='path to validation and training data files') + parser.add_argument('--dataset-name', '--dn', type=str, default='coco', + choices=['real_colon', 'coco'], + help='The type of data being used') + parser.add_argument("--negatives_sampling", dest='negatives_sampling', action="http://github.com/store_true", + help="Enable negatives frames sampling at every epoch during training.") + parser.set_defaults(negatives_sampling=False) parser.add_argument('--epochs', '-e', type=int, default=65, help='number of epochs for training') parser.add_argument('--batch-size', '--bs', type=int, default=32, @@ -73,7 +83,7 @@ def make_parser(): parser.add_argument('--save', type=str, default=None, help='save model checkpoints in the specified directory') parser.add_argument('--mode', type=str, default='training', - choices=['training', 'evaluation', 'benchmark-training', 'benchmark-inference']) + choices=['training', 'evaluation', 'benchmark-training', 'benchmark-inference', 'testing']) parser.add_argument('--evaluation', nargs='*', type=int, default=[21, 31, 37, 42, 48, 53, 59, 64], help='epochs at which to evaluate') parser.add_argument('--multistep', nargs='*', type=int, default=[43, 54], @@ -110,6 +120,9 @@ def make_parser(): help="Allow TF32 computations on supported GPUs.") parser.add_argument("--no-allow-tf32", dest='allow_tf32', action="http://github.com/store_false", help="Disable TF32 computations.") + parser.add_argument("--no-skip-empty", dest='skip_empty', action="http://github.com/store_false", + help="Also use the empty images") + parser.set_defaults(skip_empty=True) parser.set_defaults(allow_tf32=True) parser.add_argument('--data-layout', default="channels_last", choices=['channels_first', 'channels_last'], help="Model data layout. It's recommended to use channels_first with --no-amp") @@ -118,9 +131,12 @@ def make_parser(): parser.add_argument('--json-summary', type=str, default=None, help='If provided, the json summary will be written to' 'the specified file.') + parser.add_argument('--inference_jsons', type=str, default=None, + help='If provided, the json summary will be written to' + 'the specified file.') # Distributed - parser.add_argument('--local_rank', default=os.getenv('LOCAL_RANK',0), type=int, + parser.add_argument('--local_rank', default=os.getenv('LOCAL_RANK', 0), type=int, help='Used for multi-process training. Can either be manually set ' + 'or automatically set by using \'python -m multiproc\'.') @@ -147,25 +163,29 @@ def train(train_loop_func, logger, args): args.seed = np.random.randint(1e4) if args.distributed: - args.seed = (args.seed + torch.distributed.get_rank()) % 2**32 + args.seed = (args.seed + torch.distributed.get_rank()) % 2 ** 32 print("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) np.random.seed(seed=args.seed) + os.makedirs(args.save, exist_ok=True) # Setup data, defaults dboxes = dboxes300_coco() encoder = Encoder(dboxes) - cocoGt = get_coco_ground_truth(args) - - train_loader = get_train_loader(args, args.seed - 2**31) - + cocoGt_val = get_coco_ground_truth_validation(args) + train_loader = get_train_loader(args, args.seed - 2 ** 31) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) + if args.dataset_name == "real_colon": + label_num = 2 + else: + label_num = 81 ssd300 = SSD300(backbone=ResNet(backbone=args.backbone, backbone_path=args.backbone_path, - weights=args.torchvision_weights_version)) + weights=args.torchvision_weights_version), + label_num=label_num) args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32) start_epoch = 0 iteration = 0 @@ -200,7 +220,16 @@ def train(train_loop_func, logger, args): total_time = 0 if args.mode == 'evaluation': - acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) + acc = evaluate(ssd300, val_dataloader, cocoGt_val, encoder, inv_map, args) + if args.local_rank == 0: + print('Model precision {} mAP'.format(acc)) + return + + if args.mode == 'testing' and args.dataset_name == 'real_colon': + cocoGt_test = get_coco_ground_truth_test(args) + test_dataset = get_test_dataset(args) + test_dataloader = get_val_dataloader(test_dataset, args) + acc = evaluate(ssd300, test_dataloader, cocoGt_test, encoder, inv_map, args) if args.local_rank == 0: print('Model precision {} mAP'.format(acc)) return @@ -208,6 +237,7 @@ def train(train_loop_func, logger, args): scaler = torch.cuda.amp.GradScaler(enabled=args.amp) mean, std = generate_mean_std(args) + for epoch in range(start_epoch, args.epochs): start_epoch_time = time.time() iteration = train_loop_func(ssd300, loss_func, scaler, @@ -222,7 +252,7 @@ def train(train_loop_func, logger, args): logger.update_epoch_time(epoch, end_epoch_time) if epoch in args.evaluation: - acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) + acc = evaluate(ssd300, val_dataloader, cocoGt_val, encoder, inv_map, args) if args.local_rank == 0: logger.update_epoch(epoch, acc) @@ -238,15 +268,57 @@ def train(train_loop_func, logger, args): obj['model'] = ssd300.module.state_dict() else: obj['model'] = ssd300.state_dict() - os.makedirs(args.save, exist_ok=True) save_path = os.path.join(args.save, f'epoch_{epoch}.pt') torch.save(obj, save_path) logger.log('model path', save_path) train_loader.reset() - DLLogger.log((), { 'total time': total_time }) + + if args.dataset_name == "real_colon" and args.negatives_sampling: + print("Creating a new train loader...") + train_loader = get_train_loader(args, args.seed - 2 ** 31) + print("... done.") + + DLLogger.log((), {'total time': total_time}) logger.log_summary() +def testing(args): + # Check that GPUs are actually available + args.N_gpu = 1 + args.distributed = False + + # Setup data, defaults + dboxes = dboxes300_coco() + encoder = Encoder(dboxes) + + if args.dataset_name == "real_colon": + label_num = 2 + else: + label_num = 81 + ssd300 = SSD300(backbone=ResNet(backbone=args.backbone, + backbone_path=args.backbone_path, + weights=args.torchvision_weights_version), + label_num=label_num) + + if not args.no_cuda: + ssd300.cuda() + + if args.checkpoint is not None: + if os.path.isfile(args.checkpoint): + load_checkpoint(ssd300, args.checkpoint) + else: + print('Provided checkpoint is not path to a file') + return + + cocoGt_test = get_coco_ground_truth_test(args) + test_dataset = get_test_dataset(args) + inv_map = {v: k for k, v in test_dataset.label_map.items()} + test_dataloader = get_val_dataloader(test_dataset, args) + + acc = evaluate(ssd300, test_dataloader, cocoGt_test, encoder, inv_map, args) + print('Model precision {} mAP'.format(acc)) + + def log_params(logger, args): logger.log_params({ "dataset path": args.data, @@ -268,8 +340,10 @@ def log_params(logger, args): "num workers": args.num_workers, "AMP": args.amp, "precision": 'amp' if args.amp else 'fp32', + "skip_empty": args.skip_empty }) + if __name__ == "__main__": parser = make_parser() args = parser.parse_args() @@ -284,21 +358,24 @@ def log_params(logger, args): # write json only on the main thread args.json_summary = args.json_summary if args.local_rank == 0 else None - if args.mode == 'benchmark-training': - train_loop_func = benchmark_train_loop - logger = BenchLogger('Training benchmark', log_interval=args.log_interval, - json_output=args.json_summary) - args.epochs = 1 - elif args.mode == 'benchmark-inference': - train_loop_func = benchmark_inference_loop - logger = BenchLogger('Inference benchmark', log_interval=args.log_interval, - json_output=args.json_summary) - args.epochs = 1 + if args.mode == 'testing': + testing(args) else: - train_loop_func = train_loop - logger = Logger('Training logger', log_interval=args.log_interval, - json_output=args.json_summary) + if args.mode == 'benchmark-training': + train_loop_func = benchmark_train_loop + logger = BenchLogger('Training benchmark', log_interval=args.log_interval, + json_output=args.json_summary) + args.epochs = 1 + elif args.mode == 'benchmark-inference': + train_loop_func = benchmark_inference_loop + logger = BenchLogger('Inference benchmark', log_interval=args.log_interval, + json_output=args.json_summary) + args.epochs = 1 + else: + train_loop_func = train_loop + logger = Logger('Training logger', log_interval=args.log_interval, + json_output=args.json_summary) - log_params(logger, args) + log_params(logger, args) - train(train_loop_func, logger, args) + train(train_loop_func, logger, args) diff --git a/PyTorch/Detection/SSD/result_visualisation.py b/PyTorch/Detection/SSD/result_visualisation.py new file mode 100644 index 000000000..788e92c6c --- /dev/null +++ b/PyTorch/Detection/SSD/result_visualisation.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python + +""" Starting from inference output jsons folder, produce videos with model predictions and GT boxes to + assess model performance. + + Usage: + - python3 result_visualisation.py + + Copyright 2023-, Cosmo Intelligent Medical Devices +""" +import json +import sys +import cv2 +import os +from PIL import Image, ImageDraw + + +def rescale_bbox(bbox, orig_width, orig_height, target_width, target_height): + """ + Rescale bounding box coordinates to fit a new image size. + + Args: + bbox (list): List containing bounding box coordinates in the format [x_min, y_min, x_max, y_max]. + orig_width (int): Original width of the image. + orig_height (int): Original height of the image. + target_width (int): Target width to which the bounding box coordinates will be rescaled. + target_height (int): Target height to which the bounding box coordinates will be rescaled. + + Returns: + list: Rescaled bounding box coordinates in the format [x_min_scaled, y_min_scaled, x_max_scaled, y_max_scaled]. + """ + # Rescale the bounding box coordinates + x_min, y_min, x_max, y_max = bbox + x_min_scaled = int((x_min / orig_width) * target_width) + y_min_scaled = int((y_min / orig_height) * target_height) + x_max_scaled = int((x_max / orig_width) * target_width) + y_max_scaled = int((y_max / orig_height) * target_height) + return [x_min_scaled, y_min_scaled, x_max_scaled, y_max_scaled] + + +def visual(base_path, videos_output_folder): + """ + Compute False Positive and True Positive Rates from output jsons folder. + + Args: + base_path (str): The folder path where annotation JSON files are located. + videos_output_folder (str): The folder path where to save output videos. + + Returns: + """ + # Specify the path to the JSON files + json_results_path = os.path.join(base_path, "predictions.json") + json_gt_path = os.path.join(base_path, "ground_truth.json") + + # Load the JSON file + with open(json_results_path, "r") as json_file: + prediction_results = json.load(json_file) + print(f"Opened prediction file {json_results_path}") + with open(json_gt_path, "r") as json_file: + gt_file = json.load(json_file) + print(f"Opened ground truth file {json_gt_path}") + + # Specify the test videos here + video_names = ["001-013", "002-013", "003-013", "004-013", + "001-014", "002-014", "003-014", "004-014", + "001-015", "002-015", "003-015", "004-015"] + for vn in video_names: + new_gt_file = [] + for gt in gt_file: + if vn in gt["file_name"]: + new_gt_file.append(gt) + max_id = gt["id"] + + new_pred_file = [] + for pred in prediction_results: + if pred["id"] <= max_id: + new_pred_file.append(pred) + + # Convert gt_file into a dictionary for faster access + gt_dict = {item['id']: item for item in new_gt_file} + + # Iterate over prediction_results and add the predictions to the corresponding new_gt_file entry + for prediction in new_pred_file: + image_id = prediction['id'] + # Check if the image_id exists in gt_dict (new_gt_file) + if image_id in gt_dict: + # If the 'preds' key does not exist, create it + if 'preds' not in gt_dict[image_id]: + gt_dict[image_id]['preds'] = [] + + if prediction['score'] > 0.5: + # Add the bbox prediction to the 'annotations' list + gt_dict[image_id]['preds'].append(prediction['bbox']) + + # Optionally, convert gt_dict back to a list if needed for further processing + updated_new_gt_file = list(gt_dict.values()) + + def extract_frame_number(entry): + # Assuming file_name format is always like '001-013_framenumber.jpg' + # Split by '_' then take the second part and split by '.' to remove the extension, + # finally convert to integer + _, frame_str = entry['file_name'].split('_') + frame_number = int(frame_str.split('.')[0]) + return frame_number + + # Sort the updated_new_gt_file list by frame number + updated_new_gt_file = sorted(updated_new_gt_file, key=extract_frame_number) + + # Drawing, frame per frame + base_image_path = videos_output_folder + "/test_images/" + temp_dir = videos_output_folder + "/temp_images_" + vn # Temporary directory to save annotated images + os.makedirs(temp_dir, exist_ok=True) + for gt in updated_new_gt_file: + file_name = gt["file_name"] + image_path = os.path.join(base_image_path, file_name) + + # Load and rescale the image + image = Image.open(image_path) + orig_width, orig_height = image.size + image_rescaled = image.resize((300, 300)) + + # Create a copy for drawing + draw = image_rescaled.copy() + draw_draw = ImageDraw.Draw(draw) + + # Rescale and draw the bounding boxes + for annotation in gt.get("annotations", []): + bbox_scaled = rescale_bbox(annotation, orig_width, orig_height, 300, 300) + draw_draw.rectangle(bbox_scaled, outline="green", width=5) + + for predn in gt.get("preds", []): + bbox_scaled = rescale_bbox(predn, orig_width, orig_height, 300, 300) + draw_draw.rectangle(bbox_scaled, outline="red", width=5) + + # Save the annotated image + temp_image_path = os.path.join(temp_dir, file_name) + draw.save(temp_image_path) + + # Step 4: Create a video from the annotated images + video_name = videos_output_folder + vn + ".mp4" + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + video = cv2.VideoWriter(video_name, fourcc, 20.0, (300, 300)) + + for file_name in sorted(os.listdir(temp_dir)): + img_path = os.path.join(temp_dir, file_name) + img = cv2.imread(img_path) + video.write(img) + + video.release() + print(f"Video saved as {video_name}") + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("Usage: per_video_eval.py ") + sys.exit(1) + + folder_path = sys.argv[1] + output_folder = sys.argv[2] + visual(folder_path, output_folder) diff --git a/PyTorch/Detection/SSD/ssd/__init__.py b/PyTorch/Detection/SSD/ssd/__init__.py deleted file mode 100644 index f5ad37a90..000000000 --- a/PyTorch/Detection/SSD/ssd/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .entrypoints import nvidia_ssd, nvidia_ssd_processing_utils diff --git a/PyTorch/Detection/SSD/ssd/coco_pipeline.py b/PyTorch/Detection/SSD/ssd/coco_pipeline.py index 88a844422..d29054650 100644 --- a/PyTorch/Detection/SSD/ssd/coco_pipeline.py +++ b/PyTorch/Detection/SSD/ssd/coco_pipeline.py @@ -28,7 +28,7 @@ class COCOPipeline(Pipeline): def __init__(self, batch_size, file_root, annotations_file, default_boxes, device_id, num_shards, output_fp16=False, output_nhwc=False, pad_output=False, - num_threads=1, seed=15): + num_threads=1, seed=15, skip_empty=True): super(COCOPipeline, self).__init__(batch_size=batch_size, device_id=device_id, num_threads=num_threads, @@ -47,7 +47,7 @@ def __init__(self, batch_size, file_root, annotations_file, default_boxes, ratio=True, ltrb=True, shuffle_after_epoch=True, - skip_empty=True) + skip_empty=skip_empty) self.decode_slice = dali.ops.decoders.ImageSlice(device="cpu", output_type=dali.types.RGB) diff --git a/PyTorch/Detection/SSD/ssd/data.py b/PyTorch/Detection/SSD/ssd/data.py index 22fb99739..f6cc4b1fe 100644 --- a/PyTorch/Detection/SSD/ssd/data.py +++ b/PyTorch/Detection/SSD/ssd/data.py @@ -13,33 +13,68 @@ # limitations under the License. import os - +import json import torch +import random from torch.utils.data import DataLoader from ssd.utils import dboxes300_coco, COCODetection from ssd.utils import SSDTransformer from pycocotools.coco import COCO -#DALI import +# DALI import from ssd.coco_pipeline import COCOPipeline, DALICOCOIterator + def get_train_loader(args, local_seed): - train_annotate = os.path.join(args.data, "annotations/instances_train2017.json") - train_coco_root = os.path.join(args.data, "train2017") + if args.dataset_name == 'real_colon': + train_annotate = os.path.join(args.data, "train_ann.json") + train_coco_root = os.path.join(args.data, "train_images") + else: + train_annotate = os.path.join(args.data, "annotations/instances_train2017.json") + train_coco_root = os.path.join(args.data, "train2017") + + if args.dataset_name == 'real_colon' and args.negatives_sampling: + # Load original COCO annotations + with open(train_annotate) as f: + json_data = json.load(f) + + # Separate images with and without annotations + images_with_annotations = set() + for ann in json_data['annotations']: + images_with_annotations.add(ann['image_id']) + all_images = {img['id']: img for img in json_data['images']} + empty_images = [img for img_id, img in all_images.items() if img_id not in images_with_annotations] + images_with_annotations = [img for img_id, img in all_images.items() if img_id in images_with_annotations] + + # Randomly sample the same number of negative images and positive images + num_to_sample = len(images_with_annotations) + sampled_empty_images = random.sample(empty_images, num_to_sample) + json_data['images'] = images_with_annotations + sampled_empty_images + + train_annotate = os.path.join(args.save, "train_temp.json") + with open(train_annotate, 'w') as f: + json.dump(json_data, f) + + num_train_images = len(json_data['images']) + else: + num_train_images = len(os.listdir(train_coco_root)) + print(f"Setting epoch size to {num_train_images}, 'skip_empty': {args.skip_empty}") train_pipe = COCOPipeline(batch_size=args.batch_size, - file_root=train_coco_root, - annotations_file=train_annotate, - default_boxes=dboxes300_coco(), - device_id=args.local_rank, - num_shards=args.N_gpu, - output_fp16=args.amp, - output_nhwc=False, - pad_output=False, - num_threads=args.num_workers, seed=local_seed) + file_root=train_coco_root, + annotations_file=train_annotate, + default_boxes=dboxes300_coco(), + device_id=args.local_rank, + num_shards=args.N_gpu, + output_fp16=args.amp, + output_nhwc=False, + pad_output=False, + num_threads=args.num_workers, + seed=local_seed, + skip_empty=args.skip_empty) train_pipe.build() - test_run = train_pipe.schedule_run(), train_pipe.share_outputs(), train_pipe.release_outputs() - train_loader = DALICOCOIterator(train_pipe, 118287 / args.N_gpu) + train_loader = DALICOCOIterator(train_pipe, num_train_images / args.N_gpu) + return train_loader @@ -47,10 +82,28 @@ def get_val_dataset(args): dboxes = dboxes300_coco() val_trans = SSDTransformer(dboxes, (300, 300), val=True) - val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") - val_coco_root = os.path.join(args.data, "val2017") + if args.dataset_name == 'real_colon': + val_annotate = os.path.join(args.data, "validation_ann.json") + val_coco_root = os.path.join(args.data, "validation_images") + else: + val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") + val_coco_root = os.path.join(args.data, "val2017") - val_coco = COCODetection(val_coco_root, val_annotate, val_trans) + val_coco = COCODetection(val_coco_root, val_annotate, val_trans, skip_empty=args.skip_empty) + return val_coco + + +def get_test_dataset(args): + dboxes = dboxes300_coco() + val_trans = SSDTransformer(dboxes, (300, 300), val=True) + if args.dataset_name == 'real_colon': + val_annotate = os.path.join(args.data, "test_ann.json") + val_coco_root = os.path.join(args.data, "test_images") + else: + val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") + val_coco_root = os.path.join(args.data, "val2017") + + val_coco = COCODetection(val_coco_root, val_annotate, val_trans, skip_empty=args.skip_empty) return val_coco @@ -68,7 +121,23 @@ def get_val_dataloader(dataset, args): return val_dataloader -def get_coco_ground_truth(args): - val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") + +def get_coco_ground_truth_validation(args): + val_annotate = None + print(args.data) + if args.dataset_name == 'real_colon': + val_annotate = os.path.join(args.data, "validation_ann.json") + else: + val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") + cocoGt = COCO(annotation_file=val_annotate, use_ext=True) return cocoGt + + +def get_coco_ground_truth_test(args): + if args.dataset_name == 'real_colon': + test_annotate = os.path.join(args.data, "test_ann.json") + else: + test_annotate = os.path.join(args.data, "annotations/instances_val2017.json") + cocoGt = COCO(annotation_file=test_annotate, use_ext=True) + return cocoGt diff --git a/PyTorch/Detection/SSD/ssd/evaluate.py b/PyTorch/Detection/SSD/ssd/evaluate.py index e96df0aaf..8bf9fcfd9 100644 --- a/PyTorch/Detection/SSD/ssd/evaluate.py +++ b/PyTorch/Detection/SSD/ssd/evaluate.py @@ -17,6 +17,8 @@ import numpy as np from contextlib import redirect_stdout import io +import json +import os from pycocotools.cocoeval import COCOeval @@ -114,6 +116,69 @@ def evaluate(model, coco, cocoGt, encoder, inv_map, args): print("") print("Predicting Ended, total time: {:.2f} s".format(time.time() - start)) + # Create jsons with per-frame predictions and ground truths to compute whole-video statistics + if args.dataset_name == "real_colon": + + json_start_time = time.time() + # Specify the path where you want to save the JSON file + output_result_path = os.path.join(args.inference_jsons, "predictions.json") + output_ground_truth_path = os.path.join(args.inference_jsons, "ground_truth.json") + print("Creating json file for predictions...") + + # Convert final_results and cocoGt.dataset to a serializable format + serializable_results = [] + serializable_gts = [] + # Populate serializable_results + for result in final_results: + serializable_result = { + "id": int(result[0]), + "bbox": [float(result[1]), float(result[2]), + float(result[1]) + float(result[3]), + float(result[2]) + float(result[4])], + "score": float(result[5]), + "label": int(result[6]), + } + serializable_results.append(serializable_result) + + # Save to a JSON file + with open(output_result_path, "w") as json_file: + json.dump(serializable_results, json_file) + print(f"Created predictions json file of size {len(serializable_results)}.") + print("Creating json file for ground truth...") + + # Populate serializable_gt + for image_info in cocoGt.dataset['images']: + img_id = image_info['id'] + annotations_list = [] + + # Collect all annotations for the current image + for annotation in cocoGt.dataset['annotations']: + if annotation['image_id'] == img_id: + bbox_converted = [annotation['bbox'][0], annotation['bbox'][1], + annotation['bbox'][0] + annotation['bbox'][2], + annotation['bbox'][1] + annotation['bbox'][3]] + annotations_list.append({ + "unique_id": annotation['unique_id'], + "bbox": bbox_converted, + "label": 1 # or the actual label if available + }) + + serializable_gt = { + "id": img_id, + "file_name": image_info['file_name'], + "annotations": annotations_list + } + + serializable_gts.append(serializable_gt) + + # Save to a JSON file + with open(output_ground_truth_path, "w") as json_file: + json.dump(serializable_gts, json_file) + print(f"Created ground truth json file of size {len(serializable_gts)}") + + json_end_time = time.time() + print(f"Finished creating jsons in {json_end_time - json_start_time} seconds.") + cocoDt = cocoGt.loadRes(final_results, use_ext=True) E = COCOeval(cocoGt, cocoDt, iouType='bbox', use_ext=True) diff --git a/PyTorch/Detection/SSD/ssd/model.py b/PyTorch/Detection/SSD/ssd/model.py index 18a269d83..33b71f452 100644 --- a/PyTorch/Detection/SSD/ssd/model.py +++ b/PyTorch/Detection/SSD/ssd/model.py @@ -53,12 +53,12 @@ def forward(self, x): class SSD300(nn.Module): - def __init__(self, backbone=ResNet('resnet50')): + def __init__(self, backbone=ResNet('resnet50'), label_num=81): super().__init__() self.feature_extractor = backbone - self.label_num = 81 # number of COCO classes + self.label_num = label_num # number of output classes self._build_additional_features(self.feature_extractor.out_channels) self.num_defaults = [4, 6, 6, 6, 4, 4] self.loc = [] diff --git a/PyTorch/Detection/SSD/ssd/utils.py b/PyTorch/Detection/SSD/ssd/utils.py index 27c2dd1c2..3ce9ecfd0 100644 --- a/PyTorch/Detection/SSD/ssd/utils.py +++ b/PyTorch/Detection/SSD/ssd/utils.py @@ -443,8 +443,9 @@ def __call__(self, img, img_size, bbox=None, label=None, max_num=200): if self.val: bbox_out = torch.zeros(max_num, 4) label_out = torch.zeros(max_num, dtype=torch.long) - bbox_out[:bbox.size(0), :] = bbox - label_out[:label.size(0)] = label + if bbox.size(0) > 0: + bbox_out[:bbox.size(0), :] = bbox + label_out[:label.size(0)] = label return self.trans_val(img), img_size, bbox_out, label_out img, img_size, bbox, label = self.crop(img, img_size, bbox, label) @@ -460,7 +461,7 @@ def __call__(self, img, img_size, bbox=None, label=None, max_num=200): # Implement a datareader for COCO dataset class COCODetection(data.Dataset): - def __init__(self, img_folder, annotate_file, transform=None): + def __init__(self, img_folder, annotate_file, transform=None, skip_empty=True): self.img_folder = img_folder self.annotate_file = annotate_file @@ -497,12 +498,14 @@ def __init__(self, img_folder, annotate_file, transform=None): bbox_label = self.label_map[bboxes["category_id"]] self.images[img_id][2].append((bbox, bbox_label)) - for k, v in list(self.images.items()): - if len(v[2]) == 0: - self.images.pop(k) + if skip_empty: + for k, v in list(self.images.items()): + if len(v[2]) == 0: + self.images.pop(k) self.img_keys = list(self.images.keys()) self.transform = transform + print(f"Loaded dataset with {len(self.images)} images, skip_empty : {skip_empty}") @property def labelnum(self): @@ -593,3 +596,23 @@ def draw_patches(img, bboxes, labels, order="xywh", label_map={}): ax.text(cx-0.5*w, cy-0.5*h, label, ha="center", va="center", size=15, bbox=bbox_props) plt.show() + +if __name__ == "__main__": + # Run this code to test the loading of the Real-Colon validation and test splits for dataset converted in COCO format + dataset_folder = "/path/to/dataset" + dboxes = dboxes300_coco() + val_trans = SSDTransformer(dboxes, (300, 300), val=True) + + val_annotate = os.path.join(dataset_folder, "validation_ann.json") + val_coco_root = os.path.join(dataset_folder, "validation_images") + + val_coco = COCODetection(val_coco_root, val_annotate, val_trans) + val_coco_no_empty = COCODetection(val_coco_root, val_annotate, val_trans, skip_empty=True) + print(f"Loaded validation set with {len(val_coco)} images, {len(val_coco_no_empty)} with boxes") + + test_annotate = os.path.join(dataset_folder, "test_ann.json") + test_coco_root = os.path.join(dataset_folder, "test_images") + + test_coco = COCODetection(test_coco_root, test_annotate, val_trans) + test_coco_no_empty = COCODetection(test_coco_root, test_annotate, val_trans, skip_empty=True) + print(f"Loaded test set with {len(test_coco)} images, {len(test_coco_no_empty)} with boxes") diff --git a/README.md b/README.md index fb2b6b841..c8cadd13a 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,36 @@ +# Polyp Detection using NVIDIA Deep Learning Examples for Tensor Cores + +This repository is a fork from https://github.com/NVIDIA/DeepLearningExamples and provides all the training and testing codes used for training and testing SSD polyp detection models in the paper "REAL-Colon: A dataset for developing real-world AI applications in colonoscopy". +The REAL (Real-world multi-center Endoscopy Annotated video Library) - colon dataset is a dataset composed of 60 recordings of real-world colonoscopies. Full details and code to download the dataset and prepare data for model training and testing can be found here: https://github.com/cosmoimd/real-colon-dataset +For full details on the dataset and to cite this work, please refer to: Carlo Biffi, Giulio Antonelli, Sebastian Bernhofer, Cesare Hassan, Daizen Hirata, Mineo Iwatate, Andreas Maieron, Pietro Salvagnini, and Andrea Cherubini. "REAL-Colon: A dataset for developing real-world AI applications in colonoscopy." arXiv preprint arXiv:2403.02163 (2024). Available at: https://arxiv.org/abs/2403.02163. + +## SSD Model Training and Evaluation +The SSD model is defined here https://github.com/cosmoimd/DeepLearningExamples/tree/master/PyTorch/Detection/SSD/ + +### Training +- Build and run the docker container with `docker build . -t nvidia_ssd` and then `docker run --rm -it --gpus=all --ipc=host nvidia_ssd`. Here you can also +add any paths necessary for the code using the -v flag. +- Add to the `dataset_folder` in `PyTorch/Detection/SSD/ssd/utils.py` the `output_folder` path obtained from the `export_coco_format.py` code run in the previous step. In this way, the model will be trained with an user-defined train/valid/test split of the data according to the user needds. +- To start training run: `CUDA_VISIBLE_DEVICES=0 python main.py --dataset-name real_colon --backbone resnet50 --warmup 300 --bs 64 --epochs 65 --data /coco --save ./models`. +This will also save the model checkpoint in `./models`. + +### Validation +To evaluate the trained models: +- In the docker container, run `python ./main.py --backbone resnet50 --dataset-name real_colon +--json-save-path /path/to/save/json/files --mode testing --no-skip-empty --checkpoint /your/model/path --data /path/to/dir/containing/test/set/` +- To compute False Positive and True Positive Rates (FPR and TRP) per video, from output jsons folder defined with `--json-save-path` in the previous code, please run: `python3 PyTorch/Detection/SSD/fpr_trp_eval.py ` +- To create output videos with with model predictions and GT boxes run:`python3 result_visualisation.py ` + +## Contact +Andrea Cherubini - acherubini@cosmoimd.com +Carlo Biffi - cbiffi@cosmoimd.com + # NVIDIA Deep Learning Examples for Tensor Cores -## Introduction +### Introduction This repository provides State-of-the-Art Deep Learning examples that are easy to train and deploy, achieving the best reproducible accuracy and performance with NVIDIA CUDA-X software stack running on NVIDIA Volta, Turing and Ampere GPUs. -## NVIDIA GPU Cloud (NGC) Container Registry +### NVIDIA GPU Cloud (NGC) Container Registry These examples, along with our NVIDIA deep learning software stack, are provided in a monthly updated Docker container on the NGC container registry (https://ngc.nvidia.com). These containers include: - The latest NVIDIA examples from this repository @@ -12,7 +39,7 @@ These examples, along with our NVIDIA deep learning software stack, are provided - [Monthly release notes](https://docs.nvidia.com/deeplearning/dgx/index.html#nvidia-optimized-frameworks-release-notes) for each of the NVIDIA optimized containers -## Computer Vision +### Computer Vision | Models | Framework | AMP | Multi-GPU | Multi-Node | TensorRT | ONNX | Triton | DLC | NB | |----------------------------------------------------------------------------------------------------------------------------------------|--------------|----------------|-----------|------------|----------|------|------------------------------------------------------------------------------------------------------------------------------|------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| | [EfficientNet-B0](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Classification/ConvNets/efficientnet) | PyTorch | Yes | Yes | - | Supported | - | Supported | Yes | - | @@ -38,7 +65,7 @@ These examples, along with our NVIDIA deep learning software stack, are provided | [SSD](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Detection/SSD) | TensorFlow | Yes | Yes | - | Supported | - | Supported | Yes | [Yes](https://github.com/NVIDIA/DeepLearningExamples/blob/master/TensorFlow/Detection/SSD/models/research/object_detection/object_detection_tutorial.ipynb) | | [U-Net Med](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical) | TensorFlow2 | Yes | Yes | - | Example | - | Supported | Yes | - | -## Natural Language Processing +### Natural Language Processing | Models | Framework | AMP | Multi-GPU | Multi-Node | TensorRT | ONNX | Triton | DLC | NB | |------------------------------------------------------------------------------------------------------------------------|-------------|------|-----------|------------|----------|------|-----------------------------------------------------------------------------------------------------------|------|---------------------------------------------------------------------------------------------------------------------------------------------| | [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT) | PyTorch | Yes | Yes | Yes | [Example](https://github.com/NVIDIA/TensorRT/tree/main/demo/BERT) | - | [Example](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT/triton) | Yes | - | @@ -49,8 +76,7 @@ These examples, along with our NVIDIA deep learning software stack, are provided | [GNMT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Translation/GNMT) | TensorFlow | Yes | Yes | - | Supported | - | Supported | - | - | | [Faster Transformer](https://github.com/NVIDIA/DeepLearningExamples/tree/master/FasterTransformer) | Tensorflow | - | - | - | Example | - | Supported | - | - | - -## Recommender Systems +### Recommender Systems | Models | Framework | AMP | Multi-GPU | Multi-Node | ONNX | Triton | DLC | NB | |----------------------------------------------------------------------------------------------------------------|-------------|-------|-----------|--------------|--------|------------------------------------------------------------------------------------------------------|------|--------------------------------------------------------------------------------------------------------| | [DLRM](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/DLRM) | PyTorch | Yes | Yes | - | Yes | [Example](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/DLRM/triton) | Yes | [Yes](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Recommendation/DLRM/notebooks) | @@ -62,14 +88,13 @@ These examples, along with our NVIDIA deep learning software stack, are provided | [VAE-CF](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Recommendation/VAE-CF) | TensorFlow | Yes | Yes | - | - | Supported | - | - | | [SIM](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/SIM) | TensorFlow2 | Yes | Yes | - | - | Supported | Yes | - | - -## Speech to Text +### Speech to Text | Models | Framework | AMP | Multi-GPU | Multi-Node | TensorRT | ONNX | Triton | DLC | NB | |--------------------------------------------------------------------------------------------------------------|-------------|------|------------|--------------|----------|--------|----------------------------------------------------------------------------------------------------------|-------|--------------------------------------------------------------------------------------------------------------| | [Jasper](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechRecognition/Jasper) | PyTorch | Yes | Yes | - | Example | Yes | [Example](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechRecognition/Jasper/trtis) | Yes | [Yes](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechRecognition/Jasper/notebooks) | | [QuartzNet](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechRecognition/QuartzNet) | PyTorch | Yes | Yes | - | Supported | - | Supported | Yes | - | -## Text to Speech +### Text to Speech | Models | Framework | AMP | Multi-GPU | Multi-Node | TensorRT | ONNX | Triton | DLC | NB | |-------------------------------------------------------------------------------------------------------------------------|-------------|------|------------|-------------|----------|--------|---------------------------------------------------------------------------------------------------------------|-------|-----| | [FastPitch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch) | PyTorch | Yes | Yes | - | Example | - | [Example](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch/triton) | Yes | Yes | @@ -77,21 +102,21 @@ These examples, along with our NVIDIA deep learning software stack, are provided | [Tacotron 2 and WaveGlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2) | PyTorch | Yes | Yes | - | Example | Yes | [Example](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp) | Yes | - | | [HiFi-GAN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/HiFiGAN) | PyTorch | Yes | Yes | - | Supported | - | Supported | Yes | - | -## Graph Neural Networks +### Graph Neural Networks | Models | Framework | AMP | Multi-GPU | Multi-Node | ONNX | Triton | DLC | NB | |-------------------------------------------------------------------------------------------------------------------------|------------|------|------------|--------------|--------|----------|------|------| | [SE(3)-Transformer](https://github.com/NVIDIA/DeepLearningExamples/tree/master/DGLPyTorch/DrugDiscovery/SE3Transformer) | PyTorch | Yes | Yes | - | - | Supported | - | - | | [MoFlow](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/DrugDiscovery/MoFlow) | PyTorch | Yes | Yes | - | - | Supported | - | - | -## Time-Series Forecasting +### Time-Series Forecasting | Models | Framework | AMP | Multi-GPU | Multi-Node | TensorRT | ONNX | Triton | DLC | NB | |-------------------------------------------------------------------------------------------------------------------|------------|------|-------------|--------------|----------|--------|--------------------------------------------------------------------------------------------------|-------|-----| | [Temporal Fusion Transformer](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Forecasting/TFT) | PyTorch | Yes | Yes | - | Example | Yes | [Example](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Forecasting/TFT/triton) | Yes | - | -## NVIDIA support +### NVIDIA support In each of the network READMEs, we indicate the level of support that will be provided. The range is from ongoing updates and improvements to a point-in-time release for thought leadership. -## Glossary +### Glossary **Multinode Training** Supported on a pyxis/enroot Slurm cluster.