Uses resnext101_32x8d_wsl model with pretrained weights from PyTorch Hub: facebookresearch/WSL-Images
Above is ResNEXT 50 (in the right) with Cardinality set to 32 and 4d i.e. residual layer channels start with 128 (4xC=4x32=128). A similar model is used in MiDaS but more parameters i.e. ResNEXT101 with 8d i.e. residual layer channels start with 256 (8xC=8x32=256). This has 4 residual layers.
Following is a simplified diagram of its architecture:
!git clone https://github.com/ashxjain/planercnn
!git clone https://github.com/intel-isl/MiDaS.git
!git clone https://github.com/ashxjain/YoloV3.git
from google.colab import drive
drive.mount('/content/drive')
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
%%shell
cd /content/planercnn/roialign; python setup.py install; ./test.sh
cp -r /usr/local/lib/python3.6/dist-packages/roi_align-0.0.2-py3.6-linux-x86_64.egg/roi_align /usr/local/lib/python3.6/dist-packages/
%%shell
cd /content/planercnn/
mkdir -p checkpoint
#wget https://www.dropbox.com/s/yjcg6s57n581sk0/checkpoint.zip?dl=0
#mv "checkpoint.zip?dl=0" "planercnn_refine.zip"
fileId=1o2wZG0swF-HImZbQGPC7cHONkCThFVQZ
fileName=planercnn_refine.zip
curl -sc /tmp/cookie "https://drive.google.com/uc?export=download&id=${fileId}" > /dev/null
code="$(awk '/_warning_/ {print $NF}' /tmp/cookie)"
curl -Lb /tmp/cookie "https://drive.google.com/uc?export=download&confirm=${code}&id=${fileId}" -o ${fileName}
mv planercnn_refine.zip checkpoint/
cd checkpoint/
unzip planercnn_refine.zip
rm planercnn_refine.zip
%%shell
cd /content/planercnn
echo "Running Tests"
python evaluate.py --methods=f --suffix=warping_refine --dataset=inference --customDataFolder=example_images
%cd /content/planercnn
import sys
from config import InferenceConfig, PlaneConfig
from options import parse_args
from evaluate import PlaneRCNNDetector
from datasets.plane_stereo_dataset import PlaneDataset
from datasets.inference_dataset import InferenceDataset
from models.model import SamePad2d, MaskRCNN
from models.refinement_net import RefineModel
sys.argv[1:] = ["--methods=f", "--suffix=warping_refine", "--dataset=inference", "--customDataFolder=example_images"]
options = parse_args()
plane_config = PlaneConfig(options)
%cd /content
from MiDaS.midas.midas_net import MidasNet
from MiDaS.midas.transforms import Resize, NormalizeImage, PrepareForNet
import MiDaS.utils as midas_utils
from torch import nn
import torch
%cd /content/YoloV3
from models import Darknet, YOLOLayer
!wget https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.pt -O /content/model-f6b98070.pt
class FinalModel(nn.Module):
def __init__(self):
super(FinalModel, self).__init__()
# MiDaS Layers
# ============
# Load MiDas Model and its weights
midas_model = MidasNet('/content/model-f6b98070.pt', non_negative=True).to(device)
# Freeze all layers of MiDaS network
for param in midas_model.parameters():
param.requires_grad = False
self.resNext = midas_model.pretrained
self.midas_scratch = midas_model.scratch
# YoloV3 Layers
# =============
yolo_model = Darknet("/content/YoloV3/cfg/yolov3-ppe.cfg", img_size=512).to(device)
#yolo_chkpt = torch.load('/content/YoloV3/weights/yolov3-spp-ultralytics.pt', map_location=device)
yolo_chkpt = torch.load('/content/YoloV3/weights/yolov3_best_300.pt', map_location=device)
# load model weights for Darknet
yolo_chkpt['model'] = {k: v for k, v in yolo_chkpt['model'].items() if yolo_model.state_dict()[k].numel() == v.numel()}
yolo_model.load_state_dict(yolo_chkpt['model'], strict=False)
anchors = [(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)]
numclasses = 4
self.yolo_start_1 = nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
self.yolo_start_2 = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
self.yolo_start_3 = nn.Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
# Following are used to fetch anchor vectors
self.yolo_layers = yolo_model.yolo_layers
self.module_list = yolo_model.module_list
for i, j in enumerate(self.yolo_layers):
# get number of grid points and anchor vec for this yolo layer
self.module_list[j].anchor_vec = self.module_list[j].anchor_vec.to(device)
# Yolo scale-1
self.yolo_scale_1_conv_upsample = nn.Sequential(
nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False),
nn.BatchNorm2d(128, eps=0.0001, momentum=0.03, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.1, inplace=True),
nn.Upsample(scale_factor=2.0),
)
self.yolo_scale_1_pretrained = nn.Sequential(*list(yolo_model.module_list)[106:113])
self.yolo_scale_1_out = YOLOLayer(anchors[:3], numclasses, 512, -1, [], 1)
# Yolo scale-2
self.yolo_scale_2_conv_upsample = nn.Sequential(
nn.Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
nn.BatchNorm2d(256, eps=0.0001, momentum=0.03, affine=True, track_running_stats=True),
nn.LeakyReLU(negative_slope=0.1, inplace=True),
nn.Upsample(scale_factor=2.0),
)
self.yolo_scale_2_pretrained = nn.Sequential(*list(yolo_model.module_list)[94:100])
self.yolo_scale_2_out = YOLOLayer(anchors[3:6], numclasses, 512, -1, [], 1)
self.yolo_scale_2_pretrained_last = yolo_model.module_list[100]
# Yolo scale-3
self.yolo_scale_3_conv = nn.Conv2d(1024, 27, kernel_size=(1, 1), stride=(1, 1))
self.yolo_scale_3_out = YOLOLayer(anchors[6:], numclasses, 512, -1, [], 1)
# PlaneRCNN Layers
# ================
# Load MaskRCNN model
maskrcnn_model = MaskRCNN(plane_config)
# TODO: Load MaskRCNN model weights
checkpoint_dir = '/content/planercnn/checkpoint/planercnn_' + options.anchorType
if options.suffix != '':
checkpoint_dir += '_' + options.suffix
maskrcnn_state_dict = torch.load(checkpoint_dir + '/checkpoint.pth')
maskrcnn_model.load_state_dict(maskrcnn_state_dict)
# Load Refine Model
refine_model = RefineModel(options)
# Load Refine Model weights
refine_model.load_state_dict(torch.load(checkpoint_dir + '/checkpoint_refine.pth'))
self.planercnn_maskrcnn_fpn = maskrcnn_model.fpn
self.planercnn_refine_model = refine_model
def forward(self, x, augment=False):
if augment: # Augment images (inference and test only)
img_size = x.shape[-2:] # height, width
s = [0.83, 0.67] # scales
y = []
for i, xi in enumerate((x,
torch_utils.scale_img(x.flip(3), s[0], same_shape=False), # flip-lr and scale
torch_utils.scale_img(x, s[1], same_shape=False), # scale
)):
# cv2.imwrite('img%g.jpg' % i, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1])
y.append(self.forward_once(xi)[0])
y[1][..., :4] /= s[0] # scale
y[1][..., 0] = img_size[1] - y[1][..., 0] # flip lr
y[2][..., :4] /= s[1] # scale
y = torch.cat(y, 1)
return y, None
return self.forward_once(x)
def forward_once(self, x):
# MiDaS
# =====
resNext_layer_1 = self.resNext.layer1(x)
resNext_layer_2 = self.resNext.layer2(resNext_layer_1)
resNext_layer_3 = self.resNext.layer3(resNext_layer_2)
resNext_layer_4 = self.resNext.layer4(resNext_layer_3)
layer_1_rn = self.midas_scratch.layer1_rn(resNext_layer_1)
layer_2_rn = self.midas_scratch.layer2_rn(resNext_layer_2)
layer_3_rn = self.midas_scratch.layer3_rn(resNext_layer_3)
layer_4_rn = self.midas_scratch.layer4_rn(resNext_layer_4)
path_4 = self.midas_scratch.refinenet4(layer_4_rn)
path_3 = self.midas_scratch.refinenet3(path_4, layer_3_rn)
path_2 = self.midas_scratch.refinenet2(path_3, layer_2_rn)
path_1 = self.midas_scratch.refinenet1(path_2, layer_1_rn)
# MiDAS out
midas_out = self.midas_scratch.output_conv(path_1) # Output: 224x224x1
midas_out = torch.squeeze(midas_out, dim=1)
# YoloV3
# ======
yolo_out = []
# Yolo Scale-3 out
yolo_scale3_start3 = self.yolo_start_3(resNext_layer_4)
yolo_scale3_conv = self.yolo_scale_3_conv(yolo_scale3_start3)
yolo_scale3_out = self.yolo_scale_3_out(yolo_scale3_conv, None) # Output: 13x13x27
yolo_out.append(yolo_scale3_out)
# Yolo Scale-2 Out
yolo_scale2_start3 = self.yolo_start_3(resNext_layer_4)
yolo_scale2_conv_upsample = self.yolo_scale_2_conv_upsample(yolo_scale2_start3)
yolo_scale2_start2 = self.yolo_start_2(resNext_layer_3)
yolo_scale2_cat = torch.cat([yolo_scale2_start2, yolo_scale2_conv_upsample], 1)
yolo_scale2_pretrained = self.yolo_scale_2_pretrained(yolo_scale2_cat)
yolo_scale2 = self.yolo_scale_2_pretrained_last(yolo_scale2_pretrained)
yolo_scale2_out = self.yolo_scale_2_out(yolo_scale2, None) # Output: 26x26x27
yolo_out.append(yolo_scale2_out)
# Yolo Scale-1 Out
yolo_scale1_conv_upsample = self.yolo_scale_1_conv_upsample(yolo_scale2_pretrained)
yolo_scale1_start = self.yolo_start_1(resNext_layer_2)
yolo_scale1 = torch.cat([yolo_scale1_start, yolo_scale1_conv_upsample], 1)
yolo_scale1_pretrained = self.yolo_scale_1_pretrained(yolo_scale1)
yolo_scale1_out = self.yolo_scale_1_out(yolo_scale1_pretrained, None) # Output: 52x52x27
yolo_out.append(yolo_scale1_out)
# PlaneRCNN
# =========
# MaskRCNN FPN
c2_out = resNext_layer_1
c3_out = resNext_layer_2
c4_out = resNext_layer_3
p5_out = self.P5_conv1(resNext_layer_4)
if self.planercnn_maskrcnn_fpn.bilinear_upsampling:
p4_out = self.planercnn_maskrcnn_fpn.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2, mode='bilinear')
p3_out = self.planercnn_maskrcnn_fpn.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2, mode='bilinear')
p2_out = self.planercnn_maskrcnn_fpn.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2, mode='bilinear')
else:
p4_out = self.planercnn_maskrcnn_fpn.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2)
p3_out = self.planercnn_maskrcnn_fpn.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2)
p2_out = self.planercnn_maskrcnn_fpn.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2)
pass
p5_out = self.planercnn_maskrcnn_fpn.P5_conv2(p5_out)
p4_out = self.planercnn_maskrcnn_fpn.P4_conv2(p4_out)
p3_out = self.planercnn_maskrcnn_fpn.P3_conv2(p3_out)
p2_out = self.planercnn_maskrcnn_fpn.P2_conv2(p2_out)
## P6 is used for the 5th anchor scale in RPN. Generated by
## subsampling from P5 with stride of 2.
p6_out = self.planercnn_maskrcnn_fpn.P6(p5_out)
rpn_feature_maps = [p2_out, p3_out, p4_out, p5_out, p6_out]
mrcnn_feature_maps = [p2_out, p3_out, p4_out, p5_out]
feature_maps = [feature_map for index, feature_map in enumerate(rpn_feature_maps[::-1])]
## Loop through pyramid layers
layer_outputs = [] ## list of lists
for p in rpn_feature_maps:
layer_outputs.append(self.rpn(p))
## Concatenate layer outputs
## Convert from list of lists of level outputs to list of lists
## of outputs across levels.
## e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
outputs = list(zip(*layer_outputs))
outputs = [torch.cat(list(o), dim=1) for o in outputs]
rpn_class_logits, rpn_class, rpn_bbox = outputs
## Generate proposals
## Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
## and zero padded.
proposal_count = self.config.POST_NMS_ROIS_TRAINING if 'training' in mode and use_refinement == False \
else self.config.POST_NMS_ROIS_INFERENCE
rpn_rois = proposal_layer([rpn_class, rpn_bbox],
proposal_count=proposal_count,
nms_threshold=self.config.RPN_NMS_THRESHOLD,
anchors=self.anchors,
config=self.config)
if mode == 'inference':
## Network Heads
## Proposal classifier and BBox regressor heads
mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_parameters = self.classifier(mrcnn_feature_maps, rpn_rois, ranges)
## Detections
## output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates
detections = detection_layer(self.config, rpn_rois, mrcnn_class, mrcnn_bbox, mrcnn_parameters, image_metas)
if len(detections) == 0:
return [[]], [[]], depth_np
## Convert boxes to normalized coordinates
## TODO: let DetectionLayer return normalized coordinates to avoid
## unnecessary conversions
h, w = self.config.IMAGE_SHAPE[:2]
scale = Variable(torch.from_numpy(np.array([h, w, h, w])).float(), requires_grad=False)
if self.config.GPU_COUNT:
scale = scale.cuda()
detection_boxes = detections[:, :4] / scale
## Add back batch dimension
detection_boxes = detection_boxes.unsqueeze(0)
## Create masks for detections
mrcnn_mask, roi_features = self.mask(mrcnn_feature_maps, detection_boxes)
## Add back batch dimension
detections = detections.unsqueeze(0)
mrcnn_mask = mrcnn_mask.unsqueeze(0)
planercnn_out = (detections, mrcnn_mask)
else:
gt_class_ids = input[2]
gt_boxes = input[3]
gt_masks = input[4]
gt_parameters = input[5]
## Normalize coordinates
h, w = self.config.IMAGE_SHAPE[:2]
scale = Variable(torch.from_numpy(np.array([h, w, h, w])).float(), requires_grad=False)
if self.config.GPU_COUNT:
scale = scale.cuda()
gt_boxes = gt_boxes / scale
## Generate detection targets
## Subsamples proposals and generates target outputs for training
## Note that proposal class IDs, gt_boxes, and gt_masks are zero
## padded. Equally, returned rois and targets are zero padded.
rois, target_class_ids, target_deltas, target_mask, target_parameters = \
detection_target_layer(rpn_rois, gt_class_ids, gt_boxes, gt_masks, gt_parameters, self.config)
if len(rois) == 0:
mrcnn_class_logits = Variable(torch.FloatTensor())
mrcnn_class = Variable(torch.IntTensor())
mrcnn_bbox = Variable(torch.FloatTensor())
mrcnn_mask = Variable(torch.FloatTensor())
mrcnn_parameters = Variable(torch.FloatTensor())
if self.config.GPU_COUNT:
mrcnn_class_logits = mrcnn_class_logits.cuda()
mrcnn_class = mrcnn_class.cuda()
mrcnn_bbox = mrcnn_bbox.cuda()
mrcnn_mask = mrcnn_mask.cuda()
mrcnn_parameters = mrcnn_parameters.cuda()
else:
## Network Heads
## Proposal classifier and BBox regressor heads
#print([maps.shape for maps in mrcnn_feature_maps], target_parameters.shape)
mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_parameters = self.classifier(mrcnn_feature_maps, rois, ranges, target_parameters)
## Create masks for detections
mrcnn_mask, _ = self.mask(mrcnn_feature_maps, rois)
planercnn_out = [rpn_class_logits, rpn_bbox, target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask, target_parameters, mrcnn_parameters, rois, depth_np]
return [midas_out, yolo_out, planercnn_out]
%%shell
cd /content
ls -altrh drive/My\ Drive/EVA/Datasets/YoloV3-PPE/YoloV3_Dataset.zip
unzip drive/My\ Drive/EVA/Datasets/YoloV3-PPE/YoloV3_Dataset.zip
srcFolder="YoloV3_Dataset"
targetDataFolder="YoloV3/data/ppedata"
mkdir -p $targetDataFolder
# create ppe.data file
cat > $targetDataFolder/ppe.data <<EOF
classes=4
train=data/ppedata/ppetrain.txt
valid=data/ppedata/ppetest.txt
names=data/ppedata/ppe.names
EOF
# copy required files from dataset
cp $srcFolder/classes.txt $targetDataFolder/ppe.names
mkdir -p $targetDataFolder/images && cp $srcFolder/Images/* $targetDataFolder/images/
mkdir -p $targetDataFolder/labels && cp $srcFolder/Labels/* $targetDataFolder/labels/
# Make sure any decimals around 1 i.e 1.xxxxx should be rounded off to 1.0
sed -i 's/1\.[0-9]*/1.0/g' $targetDataFolder/labels/*
echo "> ppe.data"
cat $targetDataFolder/ppe.data
echo ""; echo "> ppe.names"
cat $targetDataFolder/ppe.names; echo""
%%shell
cd /content
# Copy the contents of 'yolov3-spp.cfg' file to a new file called 'yolov3-ppe.cfg' file in the data/cfg folder.
cp YoloV3/cfg/yolov3-spp.cfg YoloV3/cfg/yolov3-ppe.cfg
# Search for 'filters=255' (you should get entries entries). Change 255 to *27* = (4+1+4)*3
sed -i 's/filters=255/filters=27/g' YoloV3/cfg/yolov3-ppe.cfg
# Search for 'classes=80' and change all three entries to 'classes=4'
sed -i 's/classes=80/classes=4/g' YoloV3/cfg/yolov3-ppe.cfg
sed -i 's/burn_in.*/burn_in=100/g' YoloV3/cfg/yolov3-ppe.cfg
sed -i 's/max_batches.*/max_batches=5000/g' YoloV3/cfg/yolov3-ppe.cfg
sed -i 's/steps=.*/steps=4000,4500/g' YoloV3/cfg/yolov3-ppe.cfg
# Verify if changes took place successfully
grep "filters=27" YoloV3/cfg/yolov3-ppe.cfg
grep "classes" YoloV3/cfg/yolov3-ppe.cfg
grep "burn_in" YoloV3/cfg/yolov3-ppe.cfg
grep "max_batches" YoloV3/cfg/yolov3-ppe.cfg
grep "steps=" YoloV3/cfg/yolov3-ppe.cfg
# Create a folder called weights in the root (YoloV3) folder
!mkdir -p /content/YoloV3/weights
!cp /content/drive/My\ Drive/EVA/Models/YoloV3PPE/*.pt /content/YoloV3/weights/
%cd /content
import re
import os
import imagesize
srcFolder="YoloV3_Dataset"
targetDataFolder="YoloV3/data/ppedata"
trainFile = open(f'{targetDataFolder}/ppetrain.txt', 'w')
testFile = open(f'{targetDataFolder}/ppetest.txt', 'w')
trainShapesFile = open(f'{targetDataFolder}/ppetrain.shapes', 'w')
testShapesFile = open(f'{targetDataFolder}/ppetest.shapes', 'w')
labelFiles = os.listdir(srcFolder + "/Labels")
imgFiles = os.listdir(srcFolder + "/Images")
count = 0
testCnt = len(labelFiles)/10
trainCnt = len(labelFiles) - testCnt
for file in labelFiles:
imgParts = file.split(".txt")
r = re.compile(re.escape(imgParts[0])+".*")
found = False
for imgFile in imgFiles:
if r.match(imgFile):
shape = imagesize.get(f'{srcFolder}/Images/{imgFile}')
if count < trainCnt:
trainFile.write(f'./data/ppedata/images/{imgFile}\n')
if shape: trainShapesFile.write(f'{shape[0]} {shape[1]}\n')
else:
testFile.write(f'./data/ppedata/images/{imgFile}\n')
if shape: testShapesFile.write(f'{shape[0]} {shape[1]}\n')
count+=1
found = True
break
trainFile.close()
testFile.close()
trainShapesFile.close()
testShapesFile.close()
!echo "Total train images: $(cat YoloV3/data/ppedata/ppetrain.txt | wc -l)"
!echo "Total train image shapes: $(cat YoloV3/data/ppedata/ppetrain.shapes | wc -l)"
!echo "Top 5 lines of train file: $(head -n 5 YoloV3/data/ppedata/ppetrain.txt)"
!echo "";
!echo "Total test images: $(cat YoloV3/data/ppedata/ppetest.txt | wc -l)"
!echo "Total test image shapes: $(cat YoloV3/data/ppedata/ppetest.shapes | wc -l)"
!echo "Top 5 lines of test file: $(head -n 5 YoloV3/data/ppedata/ppetest.txt)"
def yolo_test(cfg,
data,
weights=None,
batch_size=16,
img_size=416,
conf_thres=0.001,
iou_thres=0.6, # for nms
save_json=False,
single_cls=False,
augment=False,
model=None,
dataloader=None):
# Initialize/load model and set device
if model is None:
device = torch_utils.select_device(opt.device, batch_size=batch_size)
verbose = opt.task == 'test'
# Remove previous
for f in glob.glob('test_batch*.png'):
os.remove(f)
# Initialize model
model = FinalModel().to(device)
model.to(device)
if device.type != 'cpu' and torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
else: # called by train.py
device = next(model.parameters()).device # get model device
verbose = False
# Configure run
data = parse_data_cfg(data)
nc = 1 if single_cls else int(data['classes']) # number of classes
path = data['valid'] # path to test images
names = load_classes(data['names']) # class names
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
iouv = iouv[0].view(1) # comment for mAP@0.5:0.95
niou = iouv.numel()
# Dataloader
if dataloader is None:
dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True, single_cls=opt.single_cls)
batch_size = min(batch_size, len(dataset))
dataloader = DataLoader(dataset,
batch_size=batch_size,
num_workers=min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]),
pin_memory=True,
collate_fn=dataset.collate_fn)
seen = 0
model.eval()
_ = model(torch.zeros((1, 3, img_size, img_size), device=device)) if device.type != 'cpu' else None # run once
coco91class = coco80_to_coco91_class()
s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@0.5', 'F1')
p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
loss = torch.zeros(3, device=device)
jdict, stats, ap, ap_class = [], [], [], []
for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0
targets = targets.to(device)
nb, _, height, width = imgs.shape # batch size, channels, height, width
whwh = torch.Tensor([width, height, width, height]).to(device)
# Plot images with bounding boxes
f = 'test_batch%g.png' % batch_i # filename
if batch_i < 1 and not os.path.exists(f):
plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)
# Disable gradients
with torch.no_grad():
# Run model
t = torch_utils.time_synchronized()
midas_out, yolo_out = model(imgs, augment=augment) # inference and training outputs
t0 += torch_utils.time_synchronized() - t
inf_out, train_out = zip(*yolo_out)
inf_out = torch.cat(inf_out, 1)
# Compute loss
if hasattr(model, 'hyp'): # if model has loss hyperparameters
loss += compute_loss(train_out, targets, model)[1][:3] # GIoU, obj, cls
# Run NMS
t = torch_utils.time_synchronized()
output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) # nms
t1 += torch_utils.time_synchronized() - t
# Statistics per image
for si, pred in enumerate(output):
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
tcls = labels[:, 0].tolist() if nl else [] # target class
seen += 1
if pred is None:
if nl:
stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
continue
# Append to text file
# with open('test.txt', 'a') as file:
# [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
# Clip boxes to image bounds
clip_coords(pred, (height, width))
# Append to pycocotools JSON dictionary
if save_json:
# [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
image_id = int(Path(paths[si]).stem.split('_')[-1])
box = pred[:, :4].clone() # xyxy
scale_coords(imgs[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape
box = xyxy2xywh(box) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
for p, b in zip(pred.tolist(), box.tolist()):
jdict.append({'image_id': image_id,
'category_id': coco91class[int(p[5])],
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5)})
# Assign all predictions as incorrect
correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
if nl:
detected = [] # target indices
tcls_tensor = labels[:, 0]
# target boxes
tbox = xywh2xyxy(labels[:, 1:5]) * whwh
# Per target class
for cls in torch.unique(tcls_tensor):
ti = (cls == tcls_tensor).nonzero().view(-1) # prediction indices
pi = (cls == pred[:, 5]).nonzero().view(-1) # target indices
# Search for detections
if pi.shape[0]:
# Prediction to target ious
ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices
# Append detections
for j in (ious > iouv[0]).nonzero():
d = ti[i[j]] # detected target
if d not in detected:
detected.append(d)
correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn
if len(detected) == nl: # all targets already located in image
break
# Append statistics (correct, conf, pcls, tcls)
stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
# Compute statistics
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
if len(stats):
p, r, ap, f1, ap_class = ap_per_class(*stats)
if niou > 1:
p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(1), ap[:, 0] # [P, R, AP@0.5:0.95, AP@0.5]
mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
else:
nt = torch.zeros(1)
# Print results
pf = '%20s' + '%10.3g' * 6 # print format
print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
# Print results per class
if verbose and nc > 1 and len(stats):
for i, c in enumerate(ap_class):
print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
# Print speeds
if verbose or save_json:
t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (img_size, img_size, batch_size) # tuple
print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
maps = np.zeros(nc) + map
for i, c in enumerate(ap_class):
maps[c] = ap[i]
return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps
%cd /content/YoloV3
import torch.distributed as dist
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import test # import test.py to get mAP after each epoch
from models import *
from utils.datasets import *
from utils.utils import *
mixed_precision = True
try: # Mixed precision training https://github.com/NVIDIA/apex
from apex import amp
except:
# print('Apex recommended for mixed precision and faster training: https://github.com/NVIDIA/apex')
mixed_precision = False # not installed
wdir = '/content/YoloV3/weights/' # weights dir
last = wdir + 'last.pt'
best = wdir + 'best.pt'
results_file = 'results.txt'
# Hyperparameters https://github.com/ultralytics/yolov3/issues/310
hyp = {'giou': 3.54, # giou loss gain
'cls': 37.4, # cls loss gain
'cls_pw': 1.0, # cls BCELoss positive_weight
'obj': 64.3, # obj loss gain (*=img_size/320 if img_size != 320)
'obj_pw': 1.0, # obj BCELoss positive_weight
'iou_t': 0.225, # iou training threshold
'lr0': 0.01, # initial learning rate (SGD=5E-3, Adam=5E-4)
'lrf': 0.0005, # final learning rate (with cos scheduler)
'momentum': 0.937, # SGD momentum
'weight_decay': 0.000484, # optimizer weight decay
'fl_gamma': 0.0, # focal loss gamma (efficientDet default is gamma=1.5)
'hsv_h': 0.0138, # image HSV-Hue augmentation (fraction)
'hsv_s': 0.678, # image HSV-Saturation augmentation (fraction)
'hsv_v': 0.36, # image HSV-Value augmentation (fraction)
'degrees': 1.98 * 0, # image rotation (+/- deg)
'translate': 0.05 * 0, # image translation (+/- fraction)
'scale': 0.05 * 0, # image scale (+/- gain)
'shear': 0.641 * 0} # image shear (+/- deg)
def yolo_train():
cfg = opt_cfg
data = opt_data
epochs = opt_epochs # 500200 batches at bs 64, 117263 images = 273 epochs
batch_size = opt_batch_size
accumulate = opt_accumulate # effective bs = batch_size * accumulate = 16 * 4 = 64
weights = last if opt_resume else opt_weights # initial training weights
imgsz_min, imgsz_max, imgsz_test = opt_img_size # img sizes (min, max, test)
multi_scale = opt_multi_scale
# Image Sizes
gs = 64 # (pixels) grid size
assert math.fmod(imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs)
multi_scale |= imgsz_min != imgsz_max # multi if different (min, max)
if multi_scale:
if imgsz_min == imgsz_max:
imgsz_min //= 1.5
imgsz_max //= 0.667
grid_min, grid_max = imgsz_min // gs, imgsz_max // gs
imgsz_min, imgsz_max = grid_min * gs, grid_max * gs
img_size = imgsz_max # initialize with max size
# Configure run
init_seeds()
data_dict = parse_data_cfg(data)
train_path = data_dict['train']
test_path = data_dict['valid']
nc = int(data_dict['classes']) # number of classes
hyp['cls'] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset
# Remove previous results
for f in glob.glob('*_batch*.png') + glob.glob(results_file):
os.remove(f)
# Initialize model
model = FinalModel().to(device)
# Optimizer
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in dict(model.named_parameters()).items():
if '.bias' in k:
pg2 += [v] # biases
elif 'Conv2d.weight' in k:
pg1 += [v] # apply weight_decay
else:
pg0 += [v] # all else
optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
del pg0, pg1, pg2
start_epoch = 0
best_fitness = 0.0
#attempt_download(weights)
if opt_resume and weights.endswith('.pt'): # pytorch format
# possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
chkpt = torch.load(weights, map_location=device)
# load model
try:
chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
model.load_state_dict(chkpt['model'], strict=False)
except KeyError as e:
s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \
"See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights)
raise KeyError(s) from e
# load optimizer
if chkpt['optimizer'] is not None:
optimizer.load_state_dict(chkpt['optimizer'])
best_fitness = chkpt['best_fitness']
# load results
if chkpt.get('training_results') is not None:
with open(results_file, 'w') as file:
file.write(chkpt['training_results']) # write results.txt
start_epoch = chkpt['epoch'] + 1
del chkpt
elif opt_resume and len(weights) > 0: # darknet format
# possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc.
load_darknet_weights(model, weights)
# Mixed precision training https://github.com/NVIDIA/apex
if mixed_precision:
model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
# Scheduler https://github.com/ultralytics/yolov3/issues/238
lf = lambda x: (((1 + math.cos(
x * math.pi / epochs)) / 2) ** 1.0) * 0.95 + 0.05 # cosine https://arxiv.org/pdf/1812.01187.pdf
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf, last_epoch=start_epoch - 1)
# Initialize distributed training
if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
dist.init_process_group(backend='nccl', # 'distributed backend'
init_method='tcp://127.0.0.1:9999', # distributed training init method
world_size=1, # number of nodes for distributed training
rank=0) # distributed training node rank
model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True)
model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level
# Dataset
dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
augment=True,
hyp=hyp, # augmentation hyperparameters
rect=opt_rect, # rectangular training
cache_images=opt_cache_images,
single_cls=opt_single_cls)
# Dataloader
batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
dataloader = torch.utils.data.DataLoader(dataset,
batch_size=batch_size,
num_workers=nw,
shuffle=not opt_rect, # Shuffle=True unless rectangular training is used
pin_memory=True,
collate_fn=dataset.collate_fn)
# Testloader
testloader = torch.utils.data.DataLoader(LoadImagesAndLabels(test_path, imgsz_test, batch_size,
hyp=hyp,
rect=True,
cache_images=opt_cache_images,
single_cls=opt_single_cls),
batch_size=batch_size,
num_workers=nw,
pin_memory=True,
collate_fn=dataset.collate_fn)
# Model parameters
model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou)
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
# Model EMA
ema = torch_utils.ModelEMA(model)
# Start training
nb = len(dataloader) # number of batches
n_burn = max(3 * nb, 500) # burn-in iterations, max(3 epochs, 500 iterations)
maps = np.zeros(nc) # mAP per class
# torch.autograd.set_detect_anomaly(True)
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
t0 = time.time()
print('Image sizes %g - %g train, %g test' % (imgsz_min, imgsz_max, imgsz_test))
print('Using %g dataloader workers' % nw)
print('Starting training for %g epochs...' % epochs)
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
model.train()
# Update image weights (optional)
if dataset.image_weights:
w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights
image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx
mloss = torch.zeros(4).to(device) # mean losses
print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
pbar = tqdm(enumerate(dataloader), total=nb) # progress bar
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0
targets = targets.to(device)
# Burn-in
if ni <= n_burn * 2:
model.gr = np.interp(ni, [0, n_burn * 2], [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou)
if ni == n_burn: # burnin complete
print_model_biases(model)
for j, x in enumerate(optimizer.param_groups):
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x['lr'] = np.interp(ni, [0, n_burn], [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
if 'momentum' in x:
x['momentum'] = np.interp(ni, [0, n_burn], [0.9, hyp['momentum']])
# Multi-Scale training
if opt_multi_scale:
if ni / accumulate % 1 == 0: # adjust img_size (67% - 150%) every 1 batch
img_size = random.randrange(grid_min, grid_max + 1) * gs
sf = img_size / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to 32-multiple)
imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
# Run model
pred = model(imgs)
# Compute loss
loss, loss_items = compute_loss(pred[1], targets, model)
if not torch.isfinite(loss):
print('WARNING: non-finite loss, ending training ', loss_items)
return results
# Scale loss by nominal batch_size of 64
loss *= batch_size / 64
# Compute gradient
if mixed_precision:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
# Optimize accumulated gradient
if ni % accumulate == 0:
optimizer.step()
optimizer.zero_grad()
ema.update(model)
# Print batch results
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB)
s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size)
pbar.set_description(s)
# end batch ------------------------------------------------------------------------------------------------
# Update scheduler
scheduler.step()
# Process epoch results
ema.update_attr(model)
final_epoch = epoch + 1 == epochs
if not opt_notest or final_epoch: # Calculate mAP
is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80
results, maps = yolo_test(cfg,
data,
batch_size=batch_size,
img_size=imgsz_test,
model=ema.ema,
save_json=final_epoch and is_coco,
single_cls=opt_single_cls,
dataloader=testloader)
# Write epoch results
with open(results_file, 'a') as f:
f.write(s + '%10.3g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
if len(opt_name) and opt_bucket:
os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt_bucket, opt_name))
# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1]
if fi > best_fitness:
best_fitness = fi
# Save training results
save = (not opt_nosave) or (final_epoch and not opt_evolve)
if save:
with open(results_file, 'r') as f:
# Create checkpoint
chkpt = {'epoch': epoch,
'best_fitness': best_fitness,
'training_results': f.read(),
'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(),
'optimizer': optimizer.state_dict()}
# Save last checkpoint
torch.save(chkpt, last)
# Save best checkpoint
if (best_fitness == fi) and not final_epoch:
torch.save(chkpt, best)
# Save backup every 10 epochs (optional)
# if epoch > 0 and epoch % 10 == 0:
# torch.save(chkpt, wdir + 'backup%g.pt' % epoch)
# Delete checkpoint
del chkpt
# end epoch ----------------------------------------------------------------------------------------------------
# end training
n = opt_name
if len(n):
n = '_' + n if not n.isnumeric() else n
fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
if os.path.exists(f1):
os.rename(f1, f2) # rename
ispt = f2.endswith('.pt') # is *.pt
strip_optimizer(f2) if ispt else None # strip optimizer
os.system('gsutil cp %s gs://%s/weights' % (f2, opt_bucket)) if opt_bucket and ispt else None # upload
if not opt_evolve:
plot_results() # save as results.png
print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
torch.cuda.empty_cache()
%cd /content/YoloV3
opt_cfg = "cfg/yolov3-ppe.cfg"
opt_data = "data/ppedata/ppe.data"
opt_epochs = 5 # 500200 batches at bs 64, 117263 images = 273 epochs
opt_batch_size = 4
opt_accumulate = 4 # effective bs = batch_size * accumulate = 16 * 4 = 64
opt_weights = "weights/yolov3_best_300.pt" # initial training weights
opt_img_size = [512,512,512] # img sizes (min, max, test)
opt_rect = False
opt_cache_images = True
opt_single_cls = False
opt_multi_scale = False
opt_evolve = False
opt_nosave = False
opt_name = ''
opt_bucket = ''
opt_notest = False
opt_resume = False
yolo_train()
!cp /content/YoloV3/weights/best.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/
!cp /content/YoloV3/weights/last.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/
opt_resume = True
opt_batch_size = 12
opt_epochs = 20
yolo_train()
!cp /content/YoloV3/weights/best.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/
!cp /content/YoloV3/weights/last.pt /content/drive/My\ Drive/EVA/Models/MiDaSYolo/
%%shell
cd /content
mkdir /content/input_imgs
mkdir /content/output_imgs
# Load a sample image
cp /content/planercnn/example_images/image_1.png /content/input_imgs/input_img1.png
from pathlib import Path
def yolo_inference(img, img_path, pred):
save_img = True
out, view_img, save_txt = opt_output, opt_view_img, opt_save_txt
path = str(Path(img_path)) # os-agnostic
im0s = cv2.imread(img_path) # BGR
# Get names and colors
names = load_classes(opt_names)
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
pred = non_max_suppression(pred, opt_conf_thres, opt_iou_thres,
multi_label=False, classes=opt_classes, agnostic=opt_agnostic_nms)
# Process detections
for i, det in enumerate(pred): # detections per image
p, s, im0 = path, '', im0s
save_path = str(Path(out) / Path(p).name)
s += '%gx%g ' % img.shape[2:] # print string
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += '%g %ss, ' % (n, names[int(c)]) # add to string
# Write results
for *xyxy, conf, cls in det:
if save_txt: # Write to file
with open(save_path + '.txt', 'a') as file:
file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
if save_img or view_img: # Add bbox to image
label = '%s %.2f' % (names[int(cls)], conf)
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
# Save results (image with detections)
if save_img:
filepath = os.path.join(
os.path.dirname(save_path), os.path.splitext(os.path.basename(save_path))[0] + "_yolo" + os.path.splitext(os.path.basename(save_path))[1]
)
cv2.imwrite(filepath, im0)
opt_output = '/content/output_imgs'
opt_view_img = False
opt_save_txt = False
opt_names = '/content/YoloV3/data/ppedata/ppe.names'
opt_conf_thres = 0.1
opt_iou_thres = 0.6
opt_classes = None
opt_agnostic_nms = False
opt_optimize = False
opt_weights = '/content/YoloV3/weights/best.pt'
%cd /content/
from torchvision.transforms import Compose
import cv2
import glob
import os
input_path = "/content/input_imgs"
output_path = "/content/output_imgs"
model = FinalModel().to(device)
# load model
print("Load saved model weights")
chkpt = torch.load(opt_weights, map_location=device)
chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
model.load_state_dict(chkpt['model'], strict=False)
net_w, net_h = 416,416 #384, 384
midas_net_w, midas_net_h = 384, 384
midas_transform = Compose(
[
Resize(
midas_net_w,
midas_net_h,
resize_target=None,
keep_aspect_ratio=True,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC,
),
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
PrepareForNet(),
]
)
print("Model eval")
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# get input
img_names = glob.glob(os.path.join(input_path, "*"))
num_images = len(img_names)
# create output folder
os.makedirs(output_path, exist_ok=True)
print("start processing")
for ind, img_name in enumerate(img_names):
print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))
# input
img = midas_utils.read_image(img_name)
img_input = midas_transform({"image": img})["image"]
# compute
with torch.no_grad():
sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
if opt_optimize==True and device == torch.device("cuda"):
sample = sample.to(memory_format=torch.channels_last)
sample = sample.half()
prediction = model.forward(sample)
midas_prediction = (
torch.nn.functional.interpolate(
prediction[0].unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
)
.squeeze()
.cpu()
.numpy()
)
# MiDaS output
filename = os.path.join(
output_path, os.path.splitext(os.path.basename(img_name))[0] + "_midas"
)
midas_utils.write_depth(filename, midas_prediction, bits=2)
# Yolo output
yolo_out = prediction[1]
yolo_inf_out, _ = zip(*yolo_out)
yolo_inf_out = torch.cat(yolo_inf_out, 1)
yolo_inference(sample, img_name, yolo_inf_out)
print("finished")
from IPython.display import Image, clear_output
Image(filename='/content/input_imgs/input_img1.png', width=300)
Image(filename='/content/output_imgs/input_img1_midas.png', width=300)
Image(filename='/content/output_imgs/input_img1_yolo.png', width=300)