from fastai.text import * bs=64 path = untar_data(URLs.HUMAN_NUMBERS) path.ls() def readnums(d): return [', '.join(o.strip() for o in open(path/d).readlines())] train_txt = readnums('train.txt'); train_txt[0][:80] valid_txt = readnums('valid.txt'); valid_txt[0][-80:] train = TextList(train_txt, path=path) valid = TextList(valid_txt, path=path) src = ItemLists(path=path, train=train, valid=valid).label_for_lm() data = src.databunch(bs=bs) train[0].text[:80] len(data.valid_ds[0][0].data) data.bptt, len(data.valid_dl) 13017/70/bs it = iter(data.valid_dl) x1,y1 = next(it) x2,y2 = next(it) x3,y3 = next(it) it.close() x1.numel()+x2.numel()+x3.numel() x1.shape,y1.shape x2.shape,y2.shape x1[:,0] y1[:,0] v = data.valid_ds.vocab v.textify(x1[0]) v.textify(y1[0]) v.textify(x2[0]) v.textify(x3[0]) v.textify(x1[1]) v.textify(x2[1]) v.textify(x3[1]) v.textify(x3[-1]) data.show_batch(ds_type=DatasetType.Valid) data = src.databunch(bs=bs, bptt=3) x,y = data.one_batch() x.shape,y.shape nv = len(v.itos); nv nh=64 def loss4(input,target): return F.cross_entropy(input, target[:,-1]) def acc4 (input,target): return accuracy(input, target[:,-1]) class Model0(nn.Module): def __init__(self): super().__init__() self.i_h = nn.Embedding(nv,nh) # green arrow self.h_h = nn.Linear(nh,nh) # brown arrow self.h_o = nn.Linear(nh,nv) # blue arrow self.bn = nn.BatchNorm1d(nh) def forward(self, x): h = self.bn(F.relu(self.h_h(self.i_h(x[:,0])))) if x.shape[1]>1: h = h + self.i_h(x[:,1]) h = self.bn(F.relu(self.h_h(h))) if x.shape[1]>2: h = h + self.i_h(x[:,2]) h = self.bn(F.relu(self.h_h(h))) return self.h_o(h) learn = Learner(data, Model0(), loss_func=loss4, metrics=acc4) learn.fit_one_cycle(6, 1e-4) class Model1(nn.Module): def __init__(self): super().__init__() self.i_h = nn.Embedding(nv,nh) # green arrow self.h_h = nn.Linear(nh,nh) # brown arrow self.h_o = nn.Linear(nh,nv) # blue arrow self.bn = nn.BatchNorm1d(nh) def forward(self, x): h = torch.zeros(x.shape[0], nh).to(device=x.device) for i in range(x.shape[1]): h = h + self.i_h(x[:,i]) h = self.bn(F.relu(self.h_h(h))) return self.h_o(h) learn = Learner(data, Model1(), loss_func=loss4, metrics=acc4) learn.fit_one_cycle(6, 1e-4) data = src.databunch(bs=bs, bptt=20) x,y = data.one_batch() x.shape,y.shape class Model2(nn.Module): def __init__(self): super().__init__() self.i_h = nn.Embedding(nv,nh) self.h_h = nn.Linear(nh,nh) self.h_o = nn.Linear(nh,nv) self.bn = nn.BatchNorm1d(nh) def forward(self, x): h = torch.zeros(x.shape[0], nh).to(device=x.device) res = [] for i in range(x.shape[1]): h = h + self.i_h(x[:,i]) h = F.relu(self.h_h(h)) res.append(self.h_o(self.bn(h))) return torch.stack(res, dim=1) learn = Learner(data, Model2(), metrics=accuracy) learn.fit_one_cycle(10, 1e-4, pct_start=0.1) class Model3(nn.Module): def __init__(self): super().__init__() self.i_h = nn.Embedding(nv,nh) self.h_h = nn.Linear(nh,nh) self.h_o = nn.Linear(nh,nv) self.bn = nn.BatchNorm1d(nh) self.h = torch.zeros(bs, nh).cuda() def forward(self, x): res = [] h = self.h for i in range(x.shape[1]): h = h + self.i_h(x[:,i]) h = F.relu(self.h_h(h)) res.append(self.bn(h)) self.h = h.detach() res = torch.stack(res, dim=1) res = self.h_o(res) return res learn = Learner(data, Model3(), metrics=accuracy) learn.fit_one_cycle(20, 3e-3) class Model4(nn.Module): def __init__(self): super().__init__() self.i_h = nn.Embedding(nv,nh) self.rnn = nn.RNN(nh,nh, batch_first=True) self.h_o = nn.Linear(nh,nv) self.bn = BatchNorm1dFlat(nh) self.h = torch.zeros(1, bs, nh).cuda() def forward(self, x): res,h = self.rnn(self.i_h(x), self.h) self.h = h.detach() return self.h_o(self.bn(res)) learn = Learner(data, Model4(), metrics=accuracy) learn.fit_one_cycle(20, 3e-3) class Model5(nn.Module): def __init__(self): super().__init__() self.i_h = nn.Embedding(nv,nh) self.rnn = nn.GRU(nh, nh, 2, batch_first=True) self.h_o = nn.Linear(nh,nv) self.bn = BatchNorm1dFlat(nh) self.h = torch.zeros(2, bs, nh).cuda() def forward(self, x): res,h = self.rnn(self.i_h(x), self.h) self.h = h.detach() return self.h_o(self.bn(res)) learn = Learner(data, Model5(), metrics=accuracy) learn.fit_one_cycle(10, 1e-2)