#hide
from utils import *
from fastai2.text.all import *
path = untar_data(URLs.HUMAN_NUMBERS)
#hide
Path.BASE_PATH = path
path.ls()
(#2) [Path('train.txt'),Path('valid.txt')]
lines = L()
with open(path/'train.txt') as f: lines += L(*f.readlines())
with open(path/'valid.txt') as f: lines += L(*f.readlines())
lines
(#9998) ['one \n','two \n','three \n','four \n','five \n','six \n','seven \n','eight \n','nine \n','ten \n'...]
text = ' . '.join([l.strip() for l in lines])
text[:100]
'one . two . three . four . five . six . seven . eight . nine . ten . eleven . twelve . thirteen . fo'
tokens = text.split(' ')
tokens[:10]
['one', '.', 'two', '.', 'three', '.', 'four', '.', 'five', '.']
vocab = L(*tokens).unique()
vocab
(#30) ['one','.','two','three','four','five','six','seven','eight','nine'...]
word2idx = {w:i for i,w in enumerate(vocab)}
nums = L(word2idx[i] for i in tokens)
nums
(#63095) [0,1,2,1,3,1,4,1,5,1...]
L((tokens[i:i+3], tokens[i+3]) for i in range(0,len(tokens)-4,3))
(#21031) [(['one', '.', 'two'], '.'),(['.', 'three', '.'], 'four'),(['four', '.', 'five'], '.'),(['.', 'six', '.'], 'seven'),(['seven', '.', 'eight'], '.'),(['.', 'nine', '.'], 'ten'),(['ten', '.', 'eleven'], '.'),(['.', 'twelve', '.'], 'thirteen'),(['thirteen', '.', 'fourteen'], '.'),(['.', 'fifteen', '.'], 'sixteen')...]
seqs = L((tensor(nums[i:i+3]), nums[i+3]) for i in range(0,len(nums)-4,3))
seqs
(#21031) [(tensor([0, 1, 2]), 1),(tensor([1, 3, 1]), 4),(tensor([4, 1, 5]), 1),(tensor([1, 6, 1]), 7),(tensor([7, 1, 8]), 1),(tensor([1, 9, 1]), 10),(tensor([10, 1, 11]), 1),(tensor([ 1, 12, 1]), 13),(tensor([13, 1, 14]), 1),(tensor([ 1, 15, 1]), 16)...]
bs = 64
cut = int(len(seqs) * 0.8)
dls = DataLoaders.from_dsets(seqs[:cut], seqs[cut:], bs=64, shuffle=False)
class LMModel1(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden,vocab_sz)
def forward(self, x):
h = F.relu(self.h_h(self.i_h(x[:,0])))
h = h + self.i_h(x[:,1])
h = F.relu(self.h_h(h))
h = h + self.i_h(x[:,2])
h = F.relu(self.h_h(h))
return self.h_o(h)
learn = Learner(dls, LMModel1(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 1.824297 | 1.970941 | 0.467554 | 00:05 |
| 1 | 1.386973 | 1.823242 | 0.467554 | 00:05 |
| 2 | 1.417556 | 1.654497 | 0.494414 | 00:05 |
| 3 | 1.376440 | 1.650849 | 0.494414 | 00:05 |
n,counts = 0,torch.zeros(len(vocab))
for x,y in dls.valid:
n += y.shape[0]
for i in range_of(vocab): counts[i] += (y==i).long().sum()
idx = torch.argmax(counts)
idx, vocab[idx.item()], counts[idx].item()/n
(tensor(29), 'thousand', 0.15165200855716662)
class LMModel2(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden,vocab_sz)
def forward(self, x):
h = 0
for i in range(3):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
return self.h_o(h)
learn = Learner(dls, LMModel2(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 1.816274 | 1.964143 | 0.460185 | 00:04 |
| 1 | 1.423805 | 1.739964 | 0.473259 | 00:05 |
| 2 | 1.430327 | 1.685172 | 0.485382 | 00:05 |
| 3 | 1.388390 | 1.657033 | 0.470406 | 00:05 |
class LMModel3(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden,vocab_sz)
self.h = 0
def forward(self, x):
for i in range(3):
self.h = self.h + self.i_h(x[:,i])
self.h = F.relu(self.h_h(self.h))
out = self.h_o(self.h)
self.h = self.h.detach()
return out
def reset(self): self.h = 0
def group_chunks(ds, bs):
m = len(ds) // bs
new_ds = L()
for i in range(m): new_ds += L(ds[i + m*j] for j in range(bs))
return new_ds
cut = int(len(seqs) * 0.8)
dls = DataLoaders.from_dsets(group_chunks(seqs[:cut], bs), group_chunks(seqs[cut:], bs), bs=bs, drop_last=True, shuffle=False)
learn = Learner(dls, LMModel3(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy, cbs=ModelReseter)
learn.fit_one_cycle(10, 3e-3)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 1.677074 | 1.827367 | 0.467548 | 00:06 |
| 1 | 1.282722 | 1.870913 | 0.388942 | 00:06 |
| 2 | 1.090705 | 1.651794 | 0.462500 | 00:05 |
| 3 | 1.005215 | 1.615990 | 0.515144 | 00:06 |
| 4 | 0.963020 | 1.605894 | 0.551202 | 00:06 |
| 5 | 0.926150 | 1.721608 | 0.543269 | 00:06 |
| 6 | 0.901529 | 1.650839 | 0.559375 | 00:05 |
| 7 | 0.829993 | 1.743913 | 0.569952 | 00:06 |
| 8 | 0.810508 | 1.746486 | 0.584135 | 00:06 |
| 9 | 0.795921 | 1.756200 | 0.582212 | 00:04 |
sl = 16
seqs = L((tensor(nums[i:i+sl]), tensor(nums[i+1:i+sl+1])) for i in range(0,len(nums)-sl-1,sl))
cut = int(len(seqs) * 0.8)
dls = DataLoaders.from_dsets(group_chunks(seqs[:cut], bs), group_chunks(seqs[cut:], bs), bs=bs, drop_last=True, shuffle=False)
class LMModel4(Module):
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden,vocab_sz)
self.h = 0
def forward(self, x):
outs = []
for i in range(sl):
self.h = self.h + self.i_h(x[:,i])
self.h = F.relu(self.h_h(self.h))
outs.append(self.h_o(self.h))
self.h = self.h.detach()
return torch.stack(outs, dim=1)
def reset(self): self.h = 0
def loss_func(inp, targ): return F.cross_entropy(inp.view(-1, len(vocab)), targ.view(-1))
learn = Learner(dls, LMModel4(len(vocab), 64), loss_func=loss_func, metrics=accuracy, cbs=ModelReseter)
learn.fit_one_cycle(15, 3e-3)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 3.285931 | 3.072032 | 0.212565 | 00:02 |
| 1 | 2.330371 | 1.969522 | 0.425781 | 00:02 |
| 2 | 1.742317 | 1.841378 | 0.441488 | 00:02 |
| 3 | 1.470120 | 1.810856 | 0.494303 | 00:02 |
| 4 | 1.298810 | 1.823129 | 0.492839 | 00:02 |
| 5 | 1.176840 | 1.755435 | 0.509033 | 00:02 |
| 6 | 1.070433 | 1.689250 | 0.517497 | 00:02 |
| 7 | 0.972999 | 1.867314 | 0.513021 | 00:02 |
| 8 | 0.896505 | 1.716296 | 0.582682 | 00:02 |
| 9 | 0.835817 | 1.673266 | 0.592285 | 00:02 |
| 10 | 0.782597 | 1.707047 | 0.580322 | 00:02 |
| 11 | 0.744230 | 1.719031 | 0.581299 | 00:02 |
| 12 | 0.710533 | 1.790540 | 0.593262 | 00:02 |
| 13 | 0.690307 | 1.801058 | 0.587565 | 00:02 |
| 14 | 0.678195 | 1.765376 | 0.600179 | 00:02 |