from fastai.text import * # Quick access to NLP functionality
An example of creating a language model and then transfering to a classifier.
path = untar_data(URLs.IMDB_SAMPLE)
path
PosixPath('/home/sgugger/.fastai/data/imdb_sample')
Open and view the independent and dependent variables:
df = pd.read_csv(path/'texts.csv')
df.head()
| label | text | is_valid | |
|---|---|---|---|
| 0 | negative | Un-bleeping-believable! Meg Ryan doesn't even ... | False |
| 1 | positive | This is a extremely well-made film. The acting... | False |
| 2 | negative | Every once in a long while a movie will come a... | False |
| 3 | positive | Name just says it all. I watched this movie wi... | False |
| 4 | negative | This movie succeeds at being one of the most u... | False |
Create a DataBunch for each of the language model and the classifier:
data_lm = TextLMDataBunch.from_csv(path, 'texts.csv')
data_clas = TextClasDataBunch.from_csv(path, 'texts.csv', vocab=data_lm.train_ds.vocab, bs=42)
We'll fine-tune the language model. fast.ai has a pre-trained English model available that we can download, we just have to specify it like this:
moms = (0.8,0.7)
learn = language_model_learner(data_lm, AWD_LSTM)
learn.unfreeze()
learn.fit_one_cycle(4, slice(1e-2), moms=moms)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 4.427422 | 3.872353 | 0.290179 | 00:04 |
| 1 | 4.153738 | 3.806826 | 0.294167 | 00:04 |
| 2 | 3.835191 | 3.787578 | 0.295491 | 00:04 |
| 3 | 3.566909 | 3.791415 | 0.296696 | 00:04 |
Save our language model's encoder:
learn.save_encoder('enc')
Fine tune it to create a classifier:
learn = text_classifier_learner(data_clas, AWD_LSTM)
learn.load_encoder('enc')
learn.fit_one_cycle(4, moms=moms)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 0.676608 | 0.588255 | 0.791045 | 00:05 |
| 1 | 0.640127 | 0.512341 | 0.796020 | 00:05 |
| 2 | 0.583452 | 0.452867 | 0.796020 | 00:05 |
| 3 | 0.550518 | 0.450967 | 0.786070 | 00:05 |
learn.save('stage1-clas')
learn.unfreeze()
learn.fit_one_cycle(8, slice(1e-5,1e-3), moms=moms)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 0.502358 | 0.430561 | 0.800995 | 00:08 |
| 1 | 0.475305 | 0.442821 | 0.796020 | 00:08 |
| 2 | 0.468599 | 0.427160 | 0.805970 | 00:07 |
| 3 | 0.462368 | 0.384489 | 0.845771 | 00:08 |
| 4 | 0.462626 | 0.379667 | 0.845771 | 00:07 |
| 5 | 0.444405 | 0.380510 | 0.835821 | 00:07 |
| 6 | 0.422357 | 0.372341 | 0.860696 | 00:08 |
| 7 | 0.416024 | 0.380486 | 0.830846 | 00:07 |
learn.save('stage2-clas')
learn.load('stage1-clas');
list(learn.model[0].module.rnns[-1].parameters())
[Parameter containing:
tensor([[ 0.0739, 0.0123, 0.0579, ..., 0.0617, 0.0304, 0.0275],
[-0.0487, 0.0562, 0.0039, ..., 0.0523, -0.0193, 0.0294],
[-0.0179, -0.1158, 0.1973, ..., 0.0256, -0.0063, -0.0337],
...,
[ 0.0111, -0.0447, -0.0007, ..., -0.0460, -0.0016, 0.0070],
[-0.0272, 0.0378, 0.0377, ..., -0.0205, 0.1363, -0.0199],
[-0.0088, -0.0115, -0.0832, ..., -0.0684, 0.1311, -0.0668]],
device='cuda:0', requires_grad=True), Parameter containing:
tensor([[-0.0664, -0.1465, -0.0776, ..., 0.1152, 0.0886, 0.0717],
[ 0.0177, 0.1248, -0.0452, ..., -0.0159, -0.0884, -0.0310],
[ 0.0828, -0.0289, -0.0932, ..., 0.1441, 0.1289, 0.0946],
...,
[-0.1123, -0.0756, 0.3082, ..., -0.0644, -0.0201, 0.0431],
[ 0.0530, 0.0738, 0.0781, ..., 0.0096, 0.2213, -0.0149],
[ 0.2115, -0.0221, 0.0563, ..., -0.2186, 0.0302, -0.0570]],
device='cuda:0', requires_grad=True), Parameter containing:
tensor([0.2707, 0.1091, 0.1264, ..., 0.2405, 0.1249, 0.3700], device='cuda:0',
requires_grad=True), Parameter containing:
tensor([0.2707, 0.1091, 0.1264, ..., 0.2405, 0.1249, 0.3700], device='cuda:0',
requires_grad=True)]
learn.load('stage2-clas');
list(learn.model[0].module.rnns[-1].named_parameters())
[('weight_hh_l0_raw', Parameter containing:
tensor([[ 7.0162e-02, 1.2104e-02, 5.8028e-02, ..., 6.0046e-02,
3.0242e-02, 2.7227e-02],
[-4.6466e-02, 5.5549e-02, 5.8688e-03, ..., 5.3150e-02,
-1.5318e-02, 2.5991e-02],
[-1.2340e-02, -1.1872e-01, 1.9423e-01, ..., 2.8058e-02,
-1.0008e-02, -3.6459e-02],
...,
[ 1.4048e-02, -4.4230e-02, -1.3452e-03, ..., -4.7389e-02,
-2.4187e-05, 5.1805e-03],
[-2.0523e-02, 3.7637e-02, 4.2203e-02, ..., -2.0026e-02,
1.4004e-01, -1.9072e-02],
[-8.9586e-03, -6.4311e-03, -8.2364e-02, ..., -6.6814e-02,
1.3448e-01, -6.9005e-02]], device='cuda:0', requires_grad=True)),
('module.weight_ih_l0', Parameter containing:
tensor([[-0.0669, -0.1461, -0.0792, ..., 0.1177, 0.0882, 0.0660],
[ 0.0178, 0.1236, -0.0439, ..., -0.0163, -0.0862, -0.0382],
[ 0.0770, -0.0259, -0.0918, ..., 0.1413, 0.1248, 0.0883],
...,
[-0.1126, -0.0728, 0.3100, ..., -0.0681, -0.0215, 0.0429],
[ 0.0536, 0.0708, 0.0750, ..., 0.0100, 0.2192, -0.0167],
[ 0.2099, -0.0273, 0.0531, ..., -0.2179, 0.0290, -0.0598]],
device='cuda:0', requires_grad=True)),
('module.bias_ih_l0', Parameter containing:
tensor([0.2622, 0.0981, 0.1163, ..., 0.2334, 0.1223, 0.3612], device='cuda:0',
requires_grad=True)),
('module.bias_hh_l0', Parameter containing:
tensor([0.2622, 0.0981, 0.1163, ..., 0.2334, 0.1223, 0.3612], device='cuda:0',
requires_grad=True))]