import torch
from torchvision.models import vgg16,densenet121,resnet152
from time import time
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim
from torch.autograd import Variable
import torchvision.models as models
torch.backends.cudnn.benchmark=True
model_names = sorted(name for name in models.__dict__
if name.islower() and not name.startswith("__")
and callable(models.__dict__[name]))
print('cuda version=', torch.version.cuda)
print('cudnn version=', torch.backends.cudnn.version())
for arch in ['densenet121', 'vgg16', 'resnet152']:
model = models.__dict__[arch]().cuda()
loss = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), 0.001,
momentum=0.9,
weight_decay=1e-5)
durations = []
num_runs = 100
for i in range(num_runs + 1):
x = torch.rand(16, 3, 224, 224)
x_var = torch.autograd.Variable(x).cuda()
target = Variable(torch.LongTensor(16).fill_(1).cuda())
torch.cuda.synchronize()
t1 = time()
out = model(x_var)
err = loss(out, target)
err.backward()
optimizer.step()
torch.cuda.synchronize()
t2 = time()
# treat the initial run as warm up and don't count
if i > 0:
durations.append(t2 - t1)
print('{} FP 32 avg over {} runs: {} ms'.format(arch, len(durations), sum(durations) / len(durations) * 1000))
model = models.__dict__[arch]().cuda().half()
loss = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), 0.001,
momentum=0.9,
weight_decay=1e-5)
durations = []
num_runs = 100
for i in range(num_runs + 1):
x = torch.rand(16, 3, 224, 224)
x_var = torch.autograd.Variable(x).cuda().half()
target = Variable(torch.LongTensor(16).fill_(1).cuda())
torch.cuda.synchronize()
t1 = time()
out = model(x_var)
err = loss(out, target)
err.backward()
optimizer.step()
torch.cuda.synchronize()
t2 = time()
# treat the initial run as warm up and don't count
if i > 0:
durations.append(t2 - t1)
print('{} FP 16 avg over {} runs: {} ms'.format(arch, len(durations), sum(durations) / len(durations) * 1000))
cuda version= 9.0.176 cudnn version= 7005 densenet121 FP 32 avg over 100 runs: 71.4666223526001 ms densenet121 FP 16 avg over 100 runs: 59.28621768951416 ms vgg16 FP 32 avg over 100 runs: 87.8125810623169 ms vgg16 FP 16 avg over 100 runs: 58.735389709472656 ms resnet152 FP 32 avg over 100 runs: 143.52030992507935 ms resnet152 FP 16 avg over 100 runs: 92.21737623214722 ms