!git clone https://github.com/ashxjain/planercnn
!git clone https://github.com/intel-isl/MiDaS.git
!git clone https://github.com/ashxjain/YoloV3.git


from google.colab import drive
drive.mount('/content/drive')


import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")    
print(device)


%%shell
cd /content/planercnn/roialign; python setup.py install; ./test.sh
cp -r /usr/local/lib/python3.6/dist-packages/roi_align-0.0.2-py3.6-linux-x86_64.egg/roi_align /usr/local/lib/python3.6/dist-packages/


%%shell
cd /content/planercnn/
mkdir -p checkpoint


#wget https://www.dropbox.com/s/yjcg6s57n581sk0/checkpoint.zip?dl=0
#mv "checkpoint.zip?dl=0" "planercnn_refine.zip"

fileId=1o2wZG0swF-HImZbQGPC7cHONkCThFVQZ
fileName=planercnn_refine.zip
curl -sc /tmp/cookie "https://drive.google.com/uc?export=download&id=${fileId}" > /dev/null
code="$(awk '/_warning_/ {print $NF}' /tmp/cookie)"  
curl -Lb /tmp/cookie "https://drive.google.com/uc?export=download&confirm=${code}&id=${fileId}" -o ${fileName}

mv planercnn_refine.zip checkpoint/
cd checkpoint/
unzip planercnn_refine.zip
rm planercnn_refine.zip


%%shell
cd /content/planercnn
echo "Running Tests"
python evaluate.py --methods=f --suffix=warping_refine --dataset=inference --customDataFolder=example_images


%cd /content/planercnn

import sys

from config import InferenceConfig, PlaneConfig
from options import parse_args
from evaluate import PlaneRCNNDetector
from datasets.plane_stereo_dataset import PlaneDataset
from datasets.inference_dataset import InferenceDataset
from models.model import SamePad2d, MaskRCNN
from models.refinement_net import RefineModel

sys.argv[1:] = ["--methods=f", "--suffix=warping_refine", "--dataset=inference", "--customDataFolder=example_images"]
options = parse_args()
plane_config = PlaneConfig(options)


%cd /content
from MiDaS.midas.midas_net import MidasNet
from MiDaS.midas.transforms import Resize, NormalizeImage, PrepareForNet
import MiDaS.utils as midas_utils
from torch import nn
import torch


%cd /content/YoloV3
from models import Darknet, YOLOLayer


!wget https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.pt -O /content/model-f6b98070.pt


class FinalModel(nn.Module):
  def __init__(self):
    super(FinalModel, self).__init__()

    # MiDaS Layers
    # ============
    # Load MiDas Model and its weights
    midas_model = MidasNet('/content/model-f6b98070.pt', non_negative=True).to(device)
    # Freeze all layers of MiDaS network
    for param in midas_model.parameters():
      param.requires_grad = False

    self.resNext = midas_model.pretrained
    self.midas_scratch = midas_model.scratch

    # YoloV3 Layers
    # =============
    yolo_model = Darknet("/content/YoloV3/cfg/yolov3-ppe.cfg", img_size=512).to(device)
    #yolo_chkpt = torch.load('/content/YoloV3/weights/yolov3-spp-ultralytics.pt', map_location=device)
    yolo_chkpt = torch.load('/content/YoloV3/weights/yolov3_best_300.pt', map_location=device)

    # load model weights for Darknet
    yolo_chkpt['model'] = {k: v for k, v in yolo_chkpt['model'].items() if yolo_model.state_dict()[k].numel() == v.numel()}
    yolo_model.load_state_dict(yolo_chkpt['model'], strict=False)

    anchors = [(10,13),  (16,30),  (33,23),  (30,61),  (62,45),  (59,119),  (116,90),  (156,198),  (373,326)]
    numclasses = 4

    self.yolo_start_1 = nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.yolo_start_2 = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    self.yolo_start_3 = nn.Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    # Following are used to fetch anchor vectors
    self.yolo_layers = yolo_model.yolo_layers
    self.module_list = yolo_model.module_list
    for i, j in enumerate(self.yolo_layers):
      # get number of grid points and anchor vec for this yolo layer
      self.module_list[j].anchor_vec = self.module_list[j].anchor_vec.to(device)
    
    # Yolo scale-1
    self.yolo_scale_1_conv_upsample = nn.Sequential(
        nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False),
        nn.BatchNorm2d(128, eps=0.0001, momentum=0.03, affine=True, track_running_stats=True),
        nn.LeakyReLU(negative_slope=0.1, inplace=True),
        nn.Upsample(scale_factor=2.0),
        )
    self.yolo_scale_1_pretrained = nn.Sequential(*list(yolo_model.module_list)[106:113])
    self.yolo_scale_1_out = YOLOLayer(anchors[:3], numclasses, 512, -1, [], 1)
    # Yolo scale-2
    self.yolo_scale_2_conv_upsample = nn.Sequential(
        nn.Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
        nn.BatchNorm2d(256, eps=0.0001, momentum=0.03, affine=True, track_running_stats=True),
        nn.LeakyReLU(negative_slope=0.1, inplace=True),
        nn.Upsample(scale_factor=2.0),
    )
    self.yolo_scale_2_pretrained = nn.Sequential(*list(yolo_model.module_list)[94:100])
    self.yolo_scale_2_out = YOLOLayer(anchors[3:6], numclasses, 512, -1, [], 1)

    self.yolo_scale_2_pretrained_last = yolo_model.module_list[100]
    # Yolo scale-3
    self.yolo_scale_3_conv = nn.Conv2d(1024, 27, kernel_size=(1, 1), stride=(1, 1))
    self.yolo_scale_3_out = YOLOLayer(anchors[6:], numclasses, 512, -1, [], 1)

    # PlaneRCNN Layers
    # ================
    # Load MaskRCNN model
    maskrcnn_model = MaskRCNN(plane_config)
    # TODO: Load MaskRCNN model weights
    checkpoint_dir = '/content/planercnn/checkpoint/planercnn_' + options.anchorType
    if options.suffix != '':
      checkpoint_dir += '_' + options.suffix
    maskrcnn_state_dict = torch.load(checkpoint_dir + '/checkpoint.pth')
    maskrcnn_model.load_state_dict(maskrcnn_state_dict)
    
    # Load Refine Model
    refine_model = RefineModel(options)
    # Load Refine Model weights
    refine_model.load_state_dict(torch.load(checkpoint_dir + '/checkpoint_refine.pth'))

    self.planercnn_maskrcnn_fpn = maskrcnn_model.fpn
    self.planercnn_refine_model = refine_model

  def forward(self, x, augment=False):
    if augment:  # Augment images (inference and test only)
      img_size = x.shape[-2:]  # height, width
      s = [0.83, 0.67]  # scales
      y = []
      for i, xi in enumerate((x,
                              torch_utils.scale_img(x.flip(3), s[0], same_shape=False),  # flip-lr and scale
                              torch_utils.scale_img(x, s[1], same_shape=False),  # scale
                              )):
          # cv2.imwrite('img%g.jpg' % i, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1])
          y.append(self.forward_once(xi)[0])

      y[1][..., :4] /= s[0]  # scale
      y[1][..., 0] = img_size[1] - y[1][..., 0]  # flip lr
      y[2][..., :4] /= s[1]  # scale

      y = torch.cat(y, 1)
      return y, None
    return self.forward_once(x)

  def forward_once(self, x):
    # MiDaS
    # =====
    resNext_layer_1 = self.resNext.layer1(x)
    resNext_layer_2 = self.resNext.layer2(resNext_layer_1)
    resNext_layer_3 = self.resNext.layer3(resNext_layer_2)
    resNext_layer_4 = self.resNext.layer4(resNext_layer_3)

    layer_1_rn = self.midas_scratch.layer1_rn(resNext_layer_1)
    layer_2_rn = self.midas_scratch.layer2_rn(resNext_layer_2)
    layer_3_rn = self.midas_scratch.layer3_rn(resNext_layer_3)
    layer_4_rn = self.midas_scratch.layer4_rn(resNext_layer_4)

    path_4 = self.midas_scratch.refinenet4(layer_4_rn)
    path_3 = self.midas_scratch.refinenet3(path_4, layer_3_rn)
    path_2 = self.midas_scratch.refinenet2(path_3, layer_2_rn)
    path_1 = self.midas_scratch.refinenet1(path_2, layer_1_rn)

    # MiDAS out
    midas_out = self.midas_scratch.output_conv(path_1) # Output: 224x224x1
    midas_out = torch.squeeze(midas_out, dim=1)

    # YoloV3
    # ======
    yolo_out = []
    # Yolo Scale-3 out
    yolo_scale3_start3 = self.yolo_start_3(resNext_layer_4)
    yolo_scale3_conv = self.yolo_scale_3_conv(yolo_scale3_start3)
    yolo_scale3_out = self.yolo_scale_3_out(yolo_scale3_conv, None) # Output: 13x13x27
    yolo_out.append(yolo_scale3_out)

    # Yolo Scale-2 Out
    yolo_scale2_start3 = self.yolo_start_3(resNext_layer_4)
    yolo_scale2_conv_upsample = self.yolo_scale_2_conv_upsample(yolo_scale2_start3)
    yolo_scale2_start2 = self.yolo_start_2(resNext_layer_3)
    yolo_scale2_cat = torch.cat([yolo_scale2_start2, yolo_scale2_conv_upsample], 1)
    yolo_scale2_pretrained = self.yolo_scale_2_pretrained(yolo_scale2_cat)
    yolo_scale2 = self.yolo_scale_2_pretrained_last(yolo_scale2_pretrained)
    yolo_scale2_out = self.yolo_scale_2_out(yolo_scale2, None) # Output: 26x26x27
    yolo_out.append(yolo_scale2_out)

    # Yolo Scale-1 Out
    yolo_scale1_conv_upsample = self.yolo_scale_1_conv_upsample(yolo_scale2_pretrained)
    yolo_scale1_start = self.yolo_start_1(resNext_layer_2)
    yolo_scale1 = torch.cat([yolo_scale1_start, yolo_scale1_conv_upsample], 1)
    yolo_scale1_pretrained = self.yolo_scale_1_pretrained(yolo_scale1)
    yolo_scale1_out = self.yolo_scale_1_out(yolo_scale1_pretrained, None) # Output: 52x52x27
    yolo_out.append(yolo_scale1_out)

    # PlaneRCNN
    # =========
    # MaskRCNN FPN
    c2_out = resNext_layer_1
    c3_out = resNext_layer_2
    c4_out = resNext_layer_3
    p5_out = self.P5_conv1(resNext_layer_4)
    if self.planercnn_maskrcnn_fpn.bilinear_upsampling:
        p4_out = self.planercnn_maskrcnn_fpn.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2, mode='bilinear')
        p3_out = self.planercnn_maskrcnn_fpn.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2, mode='bilinear')
        p2_out = self.planercnn_maskrcnn_fpn.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2, mode='bilinear')
    else:
        p4_out = self.planercnn_maskrcnn_fpn.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2)
        p3_out = self.planercnn_maskrcnn_fpn.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2)
        p2_out = self.planercnn_maskrcnn_fpn.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2)
        pass
    p5_out = self.planercnn_maskrcnn_fpn.P5_conv2(p5_out)
    p4_out = self.planercnn_maskrcnn_fpn.P4_conv2(p4_out)
    p3_out = self.planercnn_maskrcnn_fpn.P3_conv2(p3_out)
    p2_out = self.planercnn_maskrcnn_fpn.P2_conv2(p2_out)
    ## P6 is used for the 5th anchor scale in RPN. Generated by
    ## subsampling from P5 with stride of 2.
    p6_out = self.planercnn_maskrcnn_fpn.P6(p5_out)

    rpn_feature_maps = [p2_out, p3_out, p4_out, p5_out, p6_out]
    mrcnn_feature_maps = [p2_out, p3_out, p4_out, p5_out]
    feature_maps = [feature_map for index, feature_map in enumerate(rpn_feature_maps[::-1])]
    ## Loop through pyramid layers
    layer_outputs = []  ## list of lists
    for p in rpn_feature_maps:
        layer_outputs.append(self.rpn(p))
    ## Concatenate layer outputs
    ## Convert from list of lists of level outputs to list of lists
    ## of outputs across levels.
    ## e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
    outputs = list(zip(*layer_outputs))
    outputs = [torch.cat(list(o), dim=1) for o in outputs]
    rpn_class_logits, rpn_class, rpn_bbox = outputs

    ## Generate proposals
    ## Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
    ## and zero padded.
    proposal_count = self.config.POST_NMS_ROIS_TRAINING if 'training' in mode and use_refinement == False \
        else self.config.POST_NMS_ROIS_INFERENCE
    rpn_rois = proposal_layer([rpn_class, rpn_bbox],
                              proposal_count=proposal_count,
                              nms_threshold=self.config.RPN_NMS_THRESHOLD,
                              anchors=self.anchors,
                              config=self.config)

    if mode == 'inference':
        ## Network Heads
        ## Proposal classifier and BBox regressor heads
        mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_parameters = self.classifier(mrcnn_feature_maps, rpn_rois, ranges)

        ## Detections
        ## output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates
        detections = detection_layer(self.config, rpn_rois, mrcnn_class, mrcnn_bbox, mrcnn_parameters, image_metas)

        if len(detections) == 0:
            return [[]], [[]], depth_np
        ## Convert boxes to normalized coordinates
        ## TODO: let DetectionLayer return normalized coordinates to avoid
        ##       unnecessary conversions
        h, w = self.config.IMAGE_SHAPE[:2]
        scale = Variable(torch.from_numpy(np.array([h, w, h, w])).float(), requires_grad=False)
        if self.config.GPU_COUNT:
            scale = scale.cuda()
        detection_boxes = detections[:, :4] / scale

        ## Add back batch dimension
        detection_boxes = detection_boxes.unsqueeze(0)

        ## Create masks for detections
        mrcnn_mask, roi_features = self.mask(mrcnn_feature_maps, detection_boxes)

        ## Add back batch dimension
        detections = detections.unsqueeze(0)
        mrcnn_mask = mrcnn_mask.unsqueeze(0)
        planercnn_out = (detections, mrcnn_mask)
    else:
      gt_class_ids = input[2]
      gt_boxes = input[3]
      gt_masks = input[4]
      gt_parameters = input[5]

      ## Normalize coordinates
      h, w = self.config.IMAGE_SHAPE[:2]
      scale = Variable(torch.from_numpy(np.array([h, w, h, w])).float(), requires_grad=False)
      if self.config.GPU_COUNT:
          scale = scale.cuda()
      gt_boxes = gt_boxes / scale

      ## Generate detection targets
      ## Subsamples proposals and generates target outputs for training
      ## Note that proposal class IDs, gt_boxes, and gt_masks are zero
      ## padded. Equally, returned rois and targets are zero padded.
      rois, target_class_ids, target_deltas, target_mask, target_parameters = \
          detection_target_layer(rpn_rois, gt_class_ids, gt_boxes, gt_masks, gt_parameters, self.config)

      if len(rois) == 0:
          mrcnn_class_logits = Variable(torch.FloatTensor())
          mrcnn_class = Variable(torch.IntTensor())
          mrcnn_bbox = Variable(torch.FloatTensor())
          mrcnn_mask = Variable(torch.FloatTensor())
          mrcnn_parameters = Variable(torch.FloatTensor())
          if self.config.GPU_COUNT:
              mrcnn_class_logits = mrcnn_class_logits.cuda()
              mrcnn_class = mrcnn_class.cuda()
              mrcnn_bbox = mrcnn_bbox.cuda()
              mrcnn_mask = mrcnn_mask.cuda()
              mrcnn_parameters = mrcnn_parameters.cuda()
      else:
          ## Network Heads
          ## Proposal classifier and BBox regressor heads
          #print([maps.shape for maps in mrcnn_feature_maps], target_parameters.shape)
          mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_parameters = self.classifier(mrcnn_feature_maps, rois, ranges, target_parameters)

          ## Create masks for detections
          mrcnn_mask, _ = self.mask(mrcnn_feature_maps, rois)

      planercnn_out = [rpn_class_logits, rpn_bbox, target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask, target_parameters, mrcnn_parameters, rois, depth_np]

    return [midas_out, yolo_out, planercnn_out]


%%shell
cd /content
ls -altrh drive/My\ Drive/EVA/Datasets/YoloV3-PPE/YoloV3_Dataset.zip

unzip drive/My\ Drive/EVA/Datasets/YoloV3-PPE/YoloV3_Dataset.zip

srcFolder="YoloV3_Dataset"
targetDataFolder="YoloV3/data/ppedata"
mkdir -p $targetDataFolder
# create ppe.data file
cat > $targetDataFolder/ppe.data <<EOF
classes=4
train=data/ppedata/ppetrain.txt
valid=data/ppedata/ppetest.txt 
names=data/ppedata/ppe.names
EOF

# copy required files from dataset
cp $srcFolder/classes.txt $targetDataFolder/ppe.names
mkdir -p $targetDataFolder/images && cp $srcFolder/Images/* $targetDataFolder/images/
mkdir -p $targetDataFolder/labels && cp $srcFolder/Labels/* $targetDataFolder/labels/
# Make sure any decimals around 1 i.e 1.xxxxx should be rounded off to 1.0
sed -i 's/1\.[0-9]*/1.0/g'  $targetDataFolder/labels/*

echo "> ppe.data"
cat $targetDataFolder/ppe.data

echo ""; echo "> ppe.names"
cat $targetDataFolder/ppe.names; echo""


%%shell
cd /content
# Copy the contents of 'yolov3-spp.cfg' file to a new file called 'yolov3-ppe.cfg' file in the data/cfg folder.
cp YoloV3/cfg/yolov3-spp.cfg YoloV3/cfg/yolov3-ppe.cfg

# Search for 'filters=255' (you should get entries entries). Change 255 to *27* = (4+1+4)*3
sed -i 's/filters=255/filters=27/g' YoloV3/cfg/yolov3-ppe.cfg

# Search for 'classes=80' and change all three entries to 'classes=4'
sed -i 's/classes=80/classes=4/g' YoloV3/cfg/yolov3-ppe.cfg

sed -i 's/burn_in.*/burn_in=100/g' YoloV3/cfg/yolov3-ppe.cfg
sed -i 's/max_batches.*/max_batches=5000/g' YoloV3/cfg/yolov3-ppe.cfg
sed -i 's/steps=.*/steps=4000,4500/g' YoloV3/cfg/yolov3-ppe.cfg

# Verify if changes took place successfully
grep "filters=27" YoloV3/cfg/yolov3-ppe.cfg
grep "classes" YoloV3/cfg/yolov3-ppe.cfg
grep "burn_in" YoloV3/cfg/yolov3-ppe.cfg
grep "max_batches" YoloV3/cfg/yolov3-ppe.cfg
grep "steps=" YoloV3/cfg/yolov3-ppe.cfg


# Create a folder called weights in the root (YoloV3) folder
!mkdir -p /content/YoloV3/weights
!cp /content/drive/My\ Drive/EVA/Models/YoloV3PPE/*.pt /content/YoloV3/weights/


%cd /content

import re
import os
import imagesize

srcFolder="YoloV3_Dataset"
targetDataFolder="YoloV3/data/ppedata"
trainFile = open(f'{targetDataFolder}/ppetrain.txt', 'w')
testFile = open(f'{targetDataFolder}/ppetest.txt', 'w')
trainShapesFile = open(f'{targetDataFolder}/ppetrain.shapes', 'w')
testShapesFile = open(f'{targetDataFolder}/ppetest.shapes', 'w')
labelFiles = os.listdir(srcFolder + "/Labels")
imgFiles = os.listdir(srcFolder + "/Images")
count = 0
testCnt = len(labelFiles)/10
trainCnt = len(labelFiles) - testCnt
for file in labelFiles:
  imgParts = file.split(".txt")
  r = re.compile(re.escape(imgParts[0])+".*")
  found = False
  for imgFile in imgFiles:
    if r.match(imgFile):
      shape = imagesize.get(f'{srcFolder}/Images/{imgFile}')
      if count < trainCnt:
        trainFile.write(f'./data/ppedata/images/{imgFile}\n')
        if shape: trainShapesFile.write(f'{shape[0]} {shape[1]}\n')
      else:
        testFile.write(f'./data/ppedata/images/{imgFile}\n')
        if shape: testShapesFile.write(f'{shape[0]} {shape[1]}\n')
      count+=1
      found = True
      break
trainFile.close()
testFile.close()
trainShapesFile.close()
testShapesFile.close()

!echo "Total train images: $(cat YoloV3/data/ppedata/ppetrain.txt | wc -l)"
!echo "Total train image shapes: $(cat YoloV3/data/ppedata/ppetrain.shapes | wc -l)"
!echo "Top 5 lines of train file: $(head -n 5 YoloV3/data/ppedata/ppetrain.txt)"
!echo "";
!echo "Total test images: $(cat YoloV3/data/ppedata/ppetest.txt | wc -l)"
!echo "Total test image shapes: $(cat YoloV3/data/ppedata/ppetest.shapes | wc -l)"
!echo "Top 5 lines of test file: $(head -n 5 YoloV3/data/ppedata/ppetest.txt)"


def yolo_test(cfg,
         data,
         weights=None,
         batch_size=16,
         img_size=416,
         conf_thres=0.001,
         iou_thres=0.6,  # for nms
         save_json=False,
         single_cls=False,
         augment=False,
         model=None,
         dataloader=None):
    # Initialize/load model and set device
    if model is None:
        device = torch_utils.select_device(opt.device, batch_size=batch_size)
        verbose = opt.task == 'test'

        # Remove previous
        for f in glob.glob('test_batch*.png'):
            os.remove(f)

        # Initialize model
        model = FinalModel().to(device)
        model.to(device)

        if device.type != 'cpu' and torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:  # called by train.py
        device = next(model.parameters()).device  # get model device
        verbose = False

    # Configure run
    data = parse_data_cfg(data)
    nc = 1 if single_cls else int(data['classes'])  # number of classes
    path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
    iouv = iouv[0].view(1)  # comment for mAP@0.5:0.95
    niou = iouv.numel()

    # Dataloader
    if dataloader is None:
        dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True, single_cls=opt.single_cls)
        batch_size = min(batch_size, len(dataset))
        dataloader = DataLoader(dataset,
                                batch_size=batch_size,
                                num_workers=min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]),
                                pin_memory=True,
                                collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    _ = model(torch.zeros((1, 3, img_size, img_size), device=device)) if device.type != 'cpu' else None  # run once
    coco91class = coco80_to_coco91_class()
    s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@0.5', 'F1')
    p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = imgs.shape  # batch size, channels, height, width
        whwh = torch.Tensor([width, height, width, height]).to(device)

        # Plot images with bounding boxes
        f = 'test_batch%g.png' % batch_i  # filename
        if batch_i < 1 and not os.path.exists(f):
            plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)

        # Disable gradients
        with torch.no_grad():
            # Run model
            t = torch_utils.time_synchronized()
            midas_out, yolo_out = model(imgs, augment=augment)  # inference and training outputs
            t0 += torch_utils.time_synchronized() - t
            inf_out, train_out = zip(*yolo_out)
            inf_out = torch.cat(inf_out, 1)

            # Compute loss
            if hasattr(model, 'hyp'):  # if model has loss hyperparameters
                loss += compute_loss(train_out, targets, model)[1][:3]  # GIoU, obj, cls

            # Run NMS
            t = torch_utils.time_synchronized()
            output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres)  # nms
            t1 += torch_utils.time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            seen += 1

            if pred is None:
                if nl:
                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[si]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[si].shape[1:], box, shapes[si][0], shapes[si][1])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append({'image_id': image_id,
                                  'category_id': coco91class[int(p[5])],
                                  'bbox': [round(x, 3) for x in b],
                                  'score': round(p[4], 5)})

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # prediction indices
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        for j in (ious > iouv[0]).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if len(detected) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats):
        p, r, ap, f1, ap_class = ap_per_class(*stats)
        if niou > 1:
            p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(1), ap[:, 0]  # [P, R, AP@0.5:0.95, AP@0.5]
        mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = '%20s' + '%10.3g' * 6  # print format
    print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

    # Print results per class
    if verbose and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

    # Print speeds
    if verbose or save_json:
        t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (img_size, img_size, batch_size)  # tuple
        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)


    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps


%cd /content/YoloV3

import torch.distributed as dist
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

import test  # import test.py to get mAP after each epoch
from models import *
from utils.datasets import *
from utils.utils import *

mixed_precision = True
try:  # Mixed precision training https://github.com/NVIDIA/apex
    from apex import amp
except:
    # print('Apex recommended for mixed precision and faster training: https://github.com/NVIDIA/apex')
    mixed_precision = False  # not installed

wdir = '/content/YoloV3/weights/' # weights dir
last = wdir + 'last.pt'
best = wdir + 'best.pt'
results_file = 'results.txt'

# Hyperparameters https://github.com/ultralytics/yolov3/issues/310

hyp = {'giou': 3.54,  # giou loss gain
       'cls': 37.4,  # cls loss gain
       'cls_pw': 1.0,  # cls BCELoss positive_weight
       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
       'obj_pw': 1.0,  # obj BCELoss positive_weight
       'iou_t': 0.225,  # iou training threshold
       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
       'lrf': 0.0005,  # final learning rate (with cos scheduler)
       'momentum': 0.937,  # SGD momentum
       'weight_decay': 0.000484,  # optimizer weight decay
       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
       'degrees': 1.98 * 0,  # image rotation (+/- deg)
       'translate': 0.05 * 0,  # image translation (+/- fraction)
       'scale': 0.05 * 0,  # image scale (+/- gain)
       'shear': 0.641 * 0}  # image shear (+/- deg)

def yolo_train():
    cfg = opt_cfg
    data = opt_data
    epochs = opt_epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
    batch_size = opt_batch_size
    accumulate = opt_accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
    weights = last if opt_resume else opt_weights # initial training weights
    imgsz_min, imgsz_max, imgsz_test = opt_img_size  # img sizes (min, max, test)
    multi_scale = opt_multi_scale
    # Image Sizes
    gs = 64  # (pixels) grid size
    assert math.fmod(imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs)
    multi_scale |= imgsz_min != imgsz_max  # multi if different (min, max)
    if multi_scale:
        if imgsz_min == imgsz_max:
            imgsz_min //= 1.5
            imgsz_max //= 0.667
        grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
        imgsz_min, imgsz_max = grid_min * gs, grid_max * gs
    img_size = imgsz_max  # initialize with max size

    # Configure run
    init_seeds()
    data_dict = parse_data_cfg(data)
    train_path = data_dict['train']
    test_path = data_dict['valid']
    nc = int(data_dict['classes'])  # number of classes
    hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset

    # Remove previous results
    for f in glob.glob('*_batch*.png') + glob.glob(results_file):
        os.remove(f)

    # Initialize model
    model = FinalModel().to(device)
    # Optimizer
    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in dict(model.named_parameters()).items():
        if '.bias' in k:
            pg2 += [v]  # biases
        elif 'Conv2d.weight' in k:
            pg1 += [v]  # apply weight_decay
        else:
            pg0 += [v]  # all else

    optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    del pg0, pg1, pg2

    start_epoch = 0
    best_fitness = 0.0
    #attempt_download(weights)
    if opt_resume and weights.endswith('.pt'):  # pytorch format
        # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
        chkpt = torch.load(weights, map_location=device)

        # load model
        try:
            chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
            model.load_state_dict(chkpt['model'], strict=False)
        except KeyError as e:
            s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
                "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights)
            raise KeyError(s) from e
        
        # load optimizer
        if chkpt['optimizer'] is not None:
            optimizer.load_state_dict(chkpt['optimizer'])
            best_fitness = chkpt['best_fitness']

        # load results
        if chkpt.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(chkpt['training_results'])  # write results.txt

        start_epoch = chkpt['epoch'] + 1
        del chkpt

    elif opt_resume and len(weights) > 0:  # darknet format
        # possible weights are '*.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
        load_darknet_weights(model, weights)

    # Mixed precision training https://github.com/NVIDIA/apex
    if mixed_precision:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)

    # Scheduler https://github.com/ultralytics/yolov3/issues/238
    lf = lambda x: (((1 + math.cos(
        x * math.pi / epochs)) / 2) ** 1.0) * 0.95 + 0.05  # cosine https://arxiv.org/pdf/1812.01187.pdf
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf, last_epoch=start_epoch - 1)

        # Initialize distributed training
    if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
        dist.init_process_group(backend='nccl',  # 'distributed backend'
                                init_method='tcp://127.0.0.1:9999',  # distributed training init method
                                world_size=1,  # number of nodes for distributed training
                                rank=0)  # distributed training node rank
        model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)
        model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level

    # Dataset
    dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
                                  augment=True,
                                  hyp=hyp,  # augmentation hyperparameters
                                  rect=opt_rect,  # rectangular training
                                  cache_images=opt_cache_images,
                                  single_cls=opt_single_cls)

    # Dataloader
    batch_size = min(batch_size, len(dataset))
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=nw,
                                             shuffle=not opt_rect,  # Shuffle=True unless rectangular training is used
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    # Testloader
    testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, imgsz_test, batch_size,
                                                                 hyp=hyp,
                                                                 rect=True,
                                                                 cache_images=opt_cache_images,
                                                                 single_cls=opt_single_cls),
                                             batch_size=batch_size,
                                             num_workers=nw,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    # Model parameters
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights

    # Model EMA
    ema = torch_utils.ModelEMA(model)

    # Start training
    nb = len(dataloader)  # number of batches
    n_burn = max(3 * nb, 500)  # burn-in iterations, max(3 epochs, 500 iterations)
    maps = np.zeros(nc)  # mAP per class
    # torch.autograd.set_detect_anomaly(True)
    results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
    t0 = time.time()
    print('Image sizes %g - %g train, %g test' % (imgsz_min, imgsz_max, imgsz_test))
    print('Using %g dataloader workers' % nw)
    print('Starting training for %g epochs...' % epochs)
    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
        model.train()

        # Update image weights (optional)
        if dataset.image_weights:
            w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
            image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
            dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx

        mloss = torch.zeros(4).to(device)  # mean losses
        print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
        pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = imgs.to(device).float() / 255.0  # uint8 to float32, 0 - 255 to 0.0 - 1.0
            targets = targets.to(device)

            # Burn-in
            if ni <= n_burn * 2:
                model.gr = np.interp(ni, [0, n_burn * 2], [0.0, 1.0])  # giou loss ratio (obj_loss = 1.0 or giou)
                if ni == n_burn:  # burnin complete
                    print_model_biases(model)

                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x['lr'] = np.interp(ni, [0, n_burn], [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
                    if 'momentum' in x:
                        x['momentum'] = np.interp(ni, [0, n_burn], [0.9, hyp['momentum']])

            # Multi-Scale training
            if opt_multi_scale:
                if ni / accumulate % 1 == 0:  #  adjust img_size (67% - 150%) every 1 batch
                    img_size = random.randrange(grid_min, grid_max + 1) * gs
                sf = img_size / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

            # Run model
            pred = model(imgs)
            # Compute loss
            loss, loss_items = compute_loss(pred[1], targets, model)
            if not torch.isfinite(loss):
                print('WARNING: non-finite loss, ending training ', loss_items)
                return results

            # Scale loss by nominal batch_size of 64
            loss *= batch_size / 64

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Optimize accumulated gradient
            if ni % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()
                ema.update(model)

            # Print batch results
            mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
            mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
            s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size)
            pbar.set_description(s)

            # end batch ------------------------------------------------------------------------------------------------

        # Update scheduler
        scheduler.step()
        
        # Process epoch results
        ema.update_attr(model)
        final_epoch = epoch + 1 == epochs

        if not opt_notest or final_epoch:  # Calculate mAP
            is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80
            results, maps = yolo_test(cfg,
                                      data,
                                      batch_size=batch_size,
                                      img_size=imgsz_test,
                                      model=ema.ema,
                                      save_json=final_epoch and is_coco,
                                      single_cls=opt_single_cls,
                                      dataloader=testloader)

        # Write epoch results
        with open(results_file, 'a') as f:
            f.write(s + '%10.3g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
        if len(opt_name) and opt_bucket:
            os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt_bucket, opt_name))

        # Update best mAP
        fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
        if fi > best_fitness:
            best_fitness = fi

        # Save training results
        save = (not opt_nosave) or (final_epoch and not opt_evolve)
        if save:
            with open(results_file, 'r') as f:
                # Create checkpoint
                chkpt = {'epoch': epoch,
                         'best_fitness': best_fitness,
                         'training_results': f.read(),
                         'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(),
                         'optimizer': optimizer.state_dict()}

            # Save last checkpoint
            torch.save(chkpt, last)

            # Save best checkpoint
            if (best_fitness == fi) and not final_epoch:
                torch.save(chkpt, best)

            # Save backup every 10 epochs (optional)
            # if epoch > 0 and epoch % 10 == 0:
            #     torch.save(chkpt, wdir + 'backup%g.pt' % epoch)

            # Delete checkpoint
            del chkpt
        # end epoch ----------------------------------------------------------------------------------------------------

    # end training
    n = opt_name
    if len(n):
        n = '_' + n if not n.isnumeric() else n
        fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
        for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
            if os.path.exists(f1):
                os.rename(f1, f2)  # rename
                ispt = f2.endswith('.pt')  # is *.pt
                strip_optimizer(f2) if ispt else None  # strip optimizer
                os.system('gsutil cp %s gs://%s/weights' % (f2, opt_bucket)) if opt_bucket and ispt else None  # upload

    if not opt_evolve:
        plot_results()  # save as results.png
    print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
    dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
    torch.cuda.empty_cache()


%cd /content/YoloV3

opt_cfg = "cfg/yolov3-ppe.cfg"
opt_data = "data/ppedata/ppe.data"
opt_epochs = 5  # 500200 batches at bs 64, 117263 images = 273 epochs
opt_batch_size = 4
opt_accumulate = 4 # effective bs = batch_size * accumulate = 16 * 4 = 64
opt_weights = "weights/yolov3_best_300.pt" # initial training weights
opt_img_size = [512,512,512] # img sizes (min, max, test)
opt_rect = False
opt_cache_images = True
opt_single_cls = False
opt_multi_scale = False
opt_evolve = False
opt_nosave = False
opt_name = ''
opt_bucket = ''
opt_notest = False
opt_resume = False

yolo_train()
!cp /content/YoloV3/weights/best.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/
!cp /content/YoloV3/weights/last.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/


opt_resume = True
opt_batch_size = 12
opt_epochs = 20

yolo_train()
!cp /content/YoloV3/weights/best.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/
!cp /content/YoloV3/weights/last.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/


%%shell
cd /content
mkdir /content/input_imgs
mkdir /content/output_imgs

# Load a sample image
cp /content/planercnn/example_images/image_1.png /content/input_imgs/input_img1.png


 from pathlib import Path
 
 def yolo_inference(img, img_path, pred):
  save_img = True
  out, view_img, save_txt = opt_output, opt_view_img, opt_save_txt
  path = str(Path(img_path))  # os-agnostic

  im0s = cv2.imread(img_path)  # BGR
  # Get names and colors
  names = load_classes(opt_names)
  colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

  pred = non_max_suppression(pred, opt_conf_thres, opt_iou_thres,
                           multi_label=False, classes=opt_classes, agnostic=opt_agnostic_nms)
  # Process detections
  for i, det in enumerate(pred):  # detections per image
      p, s, im0 = path, '', im0s

      save_path = str(Path(out) / Path(p).name)
      s += '%gx%g ' % img.shape[2:]  # print string
      if det is not None and len(det):
          # Rescale boxes from img_size to im0 size
          det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

          # Print results
          for c in det[:, -1].unique():
              n = (det[:, -1] == c).sum()  # detections per class
              s += '%g %ss, ' % (n, names[int(c)])  # add to string

          # Write results
          for *xyxy, conf, cls in det:
              if save_txt:  # Write to file
                  with open(save_path + '.txt', 'a') as file:
                      file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))

              if save_img or view_img:  # Add bbox to image
                  label = '%s %.2f' % (names[int(cls)], conf)
                  plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

      # Save results (image with detections)
      if save_img:
        filepath = os.path.join(
          os.path.dirname(save_path), os.path.splitext(os.path.basename(save_path))[0] + "_yolo" + os.path.splitext(os.path.basename(save_path))[1]
        )

        cv2.imwrite(filepath, im0)


opt_output = '/content/output_imgs'
opt_view_img = False
opt_save_txt = False
opt_names = '/content/YoloV3/data/ppedata/ppe.names'
opt_conf_thres = 0.1
opt_iou_thres = 0.6
opt_classes = None
opt_agnostic_nms = False
opt_optimize = False
opt_weights = '/content/YoloV3/weights/best.pt'


%cd /content/

from torchvision.transforms import Compose
import cv2
import glob
import os

input_path = "/content/input_imgs"
output_path = "/content/output_imgs"

model = FinalModel().to(device)

# load model
print("Load saved model weights")
chkpt = torch.load(opt_weights, map_location=device)
chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
model.load_state_dict(chkpt['model'], strict=False)

net_w, net_h = 416,416 #384, 384
midas_net_w, midas_net_h = 384, 384
midas_transform = Compose(
    [
        Resize(
            midas_net_w,
            midas_net_h,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ]
)

print("Model eval")
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")    
model.to(device)

# get input
img_names = glob.glob(os.path.join(input_path, "*"))
num_images = len(img_names)
# create output folder
os.makedirs(output_path, exist_ok=True)

print("start processing")

for ind, img_name in enumerate(img_names):

    print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))
    # input
    img = midas_utils.read_image(img_name)
    img_input = midas_transform({"image": img})["image"]

    # compute
    with torch.no_grad():
        sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
        if opt_optimize==True and device == torch.device("cuda"):
            sample = sample.to(memory_format=torch.channels_last)  
            sample = sample.half()
        prediction = model.forward(sample)
        midas_prediction = (
            torch.nn.functional.interpolate(
                prediction[0].unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            )
            .squeeze()
            .cpu()
            .numpy()
        )

    # MiDaS output
    filename = os.path.join(
        output_path, os.path.splitext(os.path.basename(img_name))[0] + "_midas"
    )
    midas_utils.write_depth(filename, midas_prediction, bits=2)

    # Yolo output
    yolo_out = prediction[1]
    yolo_inf_out, _ = zip(*yolo_out)
    yolo_inf_out = torch.cat(yolo_inf_out, 1)
    yolo_inference(sample, img_name, yolo_inf_out)

print("finished")


from IPython.display import Image, clear_output 

Image(filename='/content/input_imgs/input_img1.png', width=300)


Image(filename='/content/output_imgs/input_img1_midas.png', width=300)


Image(filename='/content/output_imgs/input_img1_yolo.png', width=300)

Building a single model to perform Object Detection, Depth Estimation and Planes detection on an image¶

Step 1: Understand the three models¶

MiDaS¶

YoloV3¶

PlaneRCNN¶

Step 2: Load all the required code for building a combined model of MiDaS, YoloV3, PlanerRCNN¶

Step 3: Install required packages for PlaneRCNN model and load its weights¶

Step 4: Load all the models¶

Step 5: Training YoloV3¶

Fetch YoloV3 specific configs and dataset¶

Train YoloV3 model¶

Step 6: Inference¶

Input Image¶

MiDaS Output Image¶

Yolo Output Image¶

Conclusion¶