%reload_ext autoreload %autoreload 2 #export from nb_006a import * PATH = Path('data/pascal') JPEG_PATH = PATH/'VOCdevkit'/'VOC2007'/'JPEGImages' import json trn_j = json.load((PATH / 'pascal_train2007.json').open()) classes = {o['id']:o['name'] for o in trn_j['categories']} filenames = {o['id']:JPEG_PATH/o['file_name'] for o in trn_j['images']} annotations = [{'img_id': o['image_id'], 'class': classes[o['category_id']], 'bbox':o['bbox']} for o in trn_j['annotations'] if not o['ignore']] len(annotations) annot_by_img = collections.defaultdict(list) for annot in annotations: annot_by_img[annot['img_id']].append({'class': annot['class'], 'bbox': annot['bbox']}) len(annot_by_img) biggest_bb = {} for id in filenames.keys(): size,best = 0,0 for i,o in enumerate(annot_by_img[id]): o_sz = o['bbox'][2] * o['bbox'][3] if size < o_sz: size,best = o_sz,i biggest_bb[id] = annot_by_img[id][best] ids = np.array(list(filenames.keys())) ids = np.random.permutation(ids) split = int(len(filenames) * 0.2) train_fns = [filenames[i] for i in ids[split:]] valid_fns = [filenames[i] for i in ids[:split]] bboxes = {} for i in filenames.keys(): bb = biggest_bb[i]['bbox'] bboxes[i] = [[bb[1],bb[0], bb[3]+bb[1], bb[2]+bb[0]]] train_bbs = [bboxes[i] for i in ids[split:]] valid_bbs = [bboxes[i] for i in ids[:split]] all_bboxes = collections.defaultdict(list) for i in filenames.keys(): for o in annot_by_img[i]: bb = o['bbox'] all_bboxes[i].append([bb[1],bb[0], bb[3]+bb[1], bb[2]+bb[0]]) train_all_bbs = [all_bboxes[i] for i in ids[split:]] valid_all_bbs = [all_bboxes[i] for i in ids[:split]] #export class ImageBBox(ImageMask): "Image class for bbox-style annotations" def clone(self): return self.__class__(self.px.clone()) @classmethod def create(cls, bboxes:Collection[Collection[int]], h:int, w:int) -> 'ImageBBox': "Creates an ImageBBox object from bboxes" pxls = torch.zeros(len(bboxes),h, w).long() for i,bbox in enumerate(bboxes): pxls[i,bbox[0]:bbox[2]+1,bbox[1]:bbox[3]+1] = 1 return cls(pxls.float()) @property def data(self) -> LongTensor: bboxes = [] for i in range(self.px.size(0)): idxs = torch.nonzero(self.px[i]) if len(idxs) != 0: bboxes.append(torch.tensor([idxs[:,0].min(), idxs[:,1].min(), idxs[:,0].max(), idxs[:,1].max()])[None]) return torch.cat(bboxes, 0).squeeze() #export from matplotlib import patches, patheffects from matplotlib.patches import Patch def bb2hw(a:Collection[int]) -> np.ndarray: "Converts bounding box points from (width,height,center) to (height,width,top,left)" return np.array([a[1],a[0],a[3]-a[1],a[2]-a[0]]) def draw_outline(o:Patch, lw:int): "Outlines bounding box onto image `Patch`" o.set_path_effects([patheffects.Stroke( linewidth=lw, foreground='black'), patheffects.Normal()]) def draw_rect(ax:plt.Axes, b:Collection[int], color:str='white'): "Draws bounding box on `ax`" patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2)) draw_outline(patch, 4) def _show_image(img:Image, ax:plt.Axes=None, figsize:tuple=(3,3), hide_axis:bool=True, cmap:str='binary', alpha:float=None) -> plt.Axes: if ax is None: fig,ax = plt.subplots(figsize=figsize) ax.imshow(image2np(img), cmap=cmap, alpha=alpha) if hide_axis: ax.axis('off') return ax def show_image(x:Image, y:Image=None, ax:plt.Axes=None, figsize:tuple=(3,3), alpha:float=0.5, hide_axis:bool=True, cmap:str='viridis'): ax1 = _show_image(x, ax=ax, hide_axis=hide_axis, cmap=cmap) if y is not None: _show_image(y, ax=ax1, alpha=alpha, hide_axis=hide_axis, cmap=cmap) if hide_axis: ax1.axis('off') def _show(self:Image, ax:plt.Axes=None, y:Image=None, **kwargs): if y is not None: is_bb = isinstance(y, ImageBBox) y=y.data if not is_bb: return show_image(self.data, ax=ax, y=y, **kwargs) ax = _show_image(self.data, ax=ax) if len(y.size()) == 1: draw_rect(ax, bb2hw(y)) else: for i in range(y.size(0)): draw_rect(ax, bb2hw(y[i])) Image.show = _show #export @dataclass class CoordTargetDataset(Dataset): "A dataset with annotated images" x_fns:Collection[Path] bbs:Collection[Collection[int]] def __post_init__(self): assert len(self.x_fns)==len(self.bbs) def __repr__(self) -> str: return f'{type(self).__name__} of len {len(self.x_fns)}' def __len__(self) -> int: return len(self.x_fns) def __getitem__(self, i:int) -> Tuple[Image,ImageBBox]: x = open_image(self.x_fns[i]) return x, ImageBBox.create(self.bbs[i], *x.size) train_ds = CoordTargetDataset(train_fns, train_all_bbs) valid_ds = CoordTargetDataset(valid_fns, valid_all_bbs) train_fns.index(Path(JPEG_PATH/'000012.jpg')) x,y = train_ds[1477] y.data x.show(y=y) x.show(y=ImageMask(y.px[0].unsqueeze(0))) y.data, valid_all_bbs[1] tfms = get_transforms(do_flip=True, max_rotate=4, max_lighting=0.2) train_tds = DatasetTfm(train_ds, tfms=tfms[0], tfm_y=True, size=128, padding_mode='border') x,y = train_tds[0] fig,axs = plt.subplots(4,4, figsize=(10,10)) for ax in axs.flatten(): x,y = train_tds[0] x.show(ax=ax,y=y) bs,sz=4,224 tfms = get_transforms(do_flip=True, max_rotate=4, max_lighting=0.2) train_ds = CoordTargetDataset(train_fns, train_bbs) valid_ds = CoordTargetDataset(valid_fns, valid_bbs) data = DataBunch.create(train_ds, valid_ds, path=PATH, bs=bs, num_workers=0, ds_tfms=tfms, size=sz, tfms=imagenet_norm, padding_mode='border') arch = tvm.resnet34 model = create_body(arch(), -2) num_features(model) def custom_loss(output, target): target = target.float().div_(sz) return F.l1_loss(output, target) arch = tvm.resnet34 head_reg4 = nn.Sequential(Flatten(), nn.Linear(512 * 7*7,4), nn.Sigmoid()) learn = ConvLearner(data, arch, metrics=accuracy, custom_head=head_reg4) learn.loss_fn = custom_loss learn.lr_find() learn.recorder.plot()