import pickle,gzip,math,os,time,shutil,torch,random,timm,torchvision,io,PIL import fastcore.all as fc,matplotlib as mpl,numpy as np,matplotlib.pyplot as plt from collections.abc import Mapping from pathlib import Path from operator import attrgetter,itemgetter from functools import partial from copy import copy from contextlib import contextmanager import torchvision.transforms.functional as TF,torch.nn.functional as F from torchvision import transforms from torch import tensor,nn,optim from torch.utils.data import DataLoader,default_collate from torch.nn import init from torch.optim import lr_scheduler from torcheval.metrics import MulticlassAccuracy from datasets import load_dataset,load_dataset_builder from fastcore.foundation import L, store_attr from miniai.datasets import * from miniai.conv import * from miniai.learner import * from miniai.activations import * from miniai.init import * from miniai.sgd import * from miniai.resnet import * # Image URLs for demos. Change as desired. face_url = "https://images.pexels.com/photos/2690323/pexels-photo-2690323.jpeg?w=256" spiderweb_url = "https://images.pexels.com/photos/34225/spider-web-with-water-beads-network-dewdrop.jpg?w=256" def download_image(url): imgb = fc.urlread(url, decode=False) return torchvision.io.decode_image(tensor(list(imgb), dtype=torch.uint8)).float()/255. content_im = download_image(face_url).to(def_device) print('content_im.shape:', content_im.shape) show_image(content_im); content_im.min(),content_im.max() # Check bounds class LengthDataset(): def __init__(self, length=1): self.length=length def __len__(self): return self.length def __getitem__(self, idx): return 0,0 def get_dummy_dls(length=100): return DataLoaders(DataLoader(LengthDataset(length), batch_size=1), # Train DataLoader(LengthDataset(1), batch_size=1)) # Valid (length 1) class TensorModel(nn.Module): def __init__(self, t): super().__init__() self.t = nn.Parameter(t.clone()) def forward(self, x=0): return self.t model = TensorModel(torch.rand_like(content_im)) show_image(model()); [p.shape for p in model.parameters()] class ImageOptCB(TrainCB): def predict(self, learn): learn.preds = learn.model() def get_loss(self, learn): learn.loss = learn.loss_func(learn.preds) def loss_fn_mse(im): return F.mse_loss(im, content_im) model = TensorModel(torch.rand_like(content_im)) cbs = [ImageOptCB(), ProgressCB(), MetricsCB(), DeviceCB()] learn = Learner(model, get_dummy_dls(100), loss_fn_mse, lr=1e-2, cbs=cbs, opt_func=torch.optim.Adam) learn.fit(1) # Result (left) vs target image (right): show_images([learn.model().clip(0, 1), content_im]); class ImageLogCB(Callback): order = ProgressCB.order + 1 def __init__(self, log_every=10): store_attr(); self.images=[]; self.i=0 def after_batch(self, learn): if self.i%self.log_every == 0: self.images.append(to_cpu(learn.preds.clip(0, 1))) self.i += 1 def after_fit(self, learn): show_images(self.images) model = TensorModel(torch.rand_like(content_im)) learn = Learner(model, get_dummy_dls(150), loss_fn_mse, lr=1e-2, cbs=cbs, opt_func=torch.optim.Adam) learn.fit(1, cbs=[ImageLogCB(30)]) print(timm.list_models('*vgg*')) vgg16 = timm.create_model('vgg16', pretrained=True).to(def_device).features # vgg16 imagenet_mean = tensor([0.485, 0.456, 0.406]) imagenet_std = tensor([0.229, 0.224, 0.225]) # Try 1 (won't work): # (content_im - imagenet_mean) / imagenet_std imagenet_mean.shape content_im.shape imagenet_mean[:,None,None].shape # Try 2: def normalize(im): imagenet_mean = tensor([0.485, 0.456, 0.406])[:,None,None].to(im.device) imagenet_std = tensor([0.229, 0.224, 0.225])[:,None,None].to(im.device) return (im - imagenet_mean) / imagenet_std normalize(content_im).min(), normalize(content_im).max() normalize(content_im).mean(dim=(1, 2)) # And with torchvision transforms: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) normalize(content_im).min(), normalize(content_im).max() def calc_features(imgs, target_layers=(18, 25)): x = normalize(imgs) feats = [] for i, layer in enumerate(vgg16[:max(target_layers)+1]): x = layer(x) if i in target_layers: feats.append(x.clone()) return feats # Testing it out to see the shapes of the resulting feature maps: feats = calc_features(content_im) [f.shape for f in feats] # Homework: Can you do this using hooks? class ContentLossToTarget(): def __init__(self, target_im, target_layers=(18, 25)): fc.store_attr() with torch.no_grad(): self.target_features = calc_features(target_im, target_layers) def __call__(self, input_im): return sum((f1-f2).pow(2).mean() for f1, f2 in zip(calc_features(input_im, self.target_layers), self.target_features)) loss_function_perceptual = ContentLossToTarget(content_im) model = TensorModel(torch.rand_like(content_im)) learn = Learner(model, get_dummy_dls(150), loss_function_perceptual, lr=1e-2, cbs=cbs, opt_func=torch.optim.Adam) learn.fit(1, cbs=[ImageLogCB(log_every=30)]) loss_function_perceptual = ContentLossToTarget(content_im, target_layers=(1, 6)) model = TensorModel(torch.rand_like(content_im)) learn = Learner(model, get_dummy_dls(150), loss_function_perceptual, lr=1e-2, cbs=cbs, opt_func=torch.optim.Adam) learn.fit(1, cbs=[ImageLogCB(log_every=30)]) t = tensor([[0, 1, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 0, 0, 0, 1], [1, 0, 1, 1, 1, 1, 1, 1, 0], [1, 0, 1, 1, 0, 1, 1, 0, 0]]) torch.einsum('fs, gs -> fg', t, t) t.matmul(t.T) # Alternate approach style_im = download_image(spiderweb_url).to(def_device) show_image(style_im); def calc_grams(img, target_layers=(1, 6, 11, 18, 25)): return L(torch.einsum('chw, dhw -> cd', x, x) / (x.shape[-2]*x.shape[-1]) # 'bchw, bdhw -> bcd' if batched for x in calc_features(img, target_layers)) style_grams = calc_grams(style_im) [g.shape for g in style_grams] # The gram matrices for features from different layers style_grams.attrgot('shape') # The magic of fastcore's L class StyleLossToTarget(): def __init__(self, target_im, target_layers=(1, 6, 11, 18, 25)): fc.store_attr() with torch.no_grad(): self.target_grams = calc_grams(target_im, target_layers) def __call__(self, input_im): return sum((f1-f2).pow(2).mean() for f1, f2 in zip(calc_grams(input_im, self.target_layers), self.target_grams)) style_loss = StyleLossToTarget(style_im) style_loss(content_im) model = TensorModel(content_im) # Start from content image style_loss = StyleLossToTarget(style_im) content_loss = ContentLossToTarget(content_im) def combined_loss(x): return style_loss(x) + content_loss(x) learn = Learner(model, get_dummy_dls(150), combined_loss, lr=1e-2, cbs=cbs, opt_func=torch.optim.Adam) learn.fit(1, cbs=[ImageLogCB(30)]) show_image(learn.model().clip(0, 1)); # View the final result model = TensorModel(torch.rand_like(content_im)) style_loss = StyleLossToTarget(style_im) content_loss = ContentLossToTarget(content_im, target_layers=(6, 18, 25)) def combined_loss(x): return style_loss(x) * 0.2 + content_loss(x) learn = Learner(model, get_dummy_dls(300), combined_loss, lr=5e-2, cbs=cbs, opt_func=torch.optim.Adam) learn.fit(1, cbs=[ImageLogCB(60)]) # The image to be optimized im = torch.rand(3, 256, 256).to(def_device) im.requires_grad = True # Set up the optimizer opt = torch.optim.Adam([im], lr=5e-2) # Define the loss function style_loss = StyleLossToTarget(style_im) content_loss = ContentLossToTarget(content_im, target_layers=[6, 18, 25]) def combined_loss(x): return style_loss(x) * 0.2 + content_loss(x) # Optimization loop for i in range(300): loss = combined_loss(im) loss.backward() opt.step() opt.zero_grad() # Show the result show_image(im.clip(0, 1));