%matplotlib inline from fastai2.text.all import * from nbdev.showdoc import * path = untar_data(URLs.IMDB_SAMPLE) path.ls() df = pd.read_csv(path/'texts.csv') df.head() df['text'][1] dbunch_lm = TextDataLoaders.from_df(df, text_col='text', label_col='label', path=path, is_lm=True, valid_col='is_valid') dbunch_lm.train_ds[0] dbunch_lm.vocab[:20] dbunch_lm.show_batch() imdb_lm = DataBlock(blocks=(TextBlock.from_df('text', is_lm=True),), get_x=ColReader('text'), splitter=RandomSplitter()) dbunch_lm = imdb_lm.dataloaders(df) bs=128 path = untar_data(URLs.IMDB) path.ls() (path/'train').ls() imdb_lm = DataBlock(blocks=(TextBlock.from_folder(path, is_lm=True),), get_items=partial(get_text_files, folders=['train', 'test', 'unsup']), splitter=RandomSplitter(0.1)) dbunch_lm = imdb_lm.dataloaders(path, path=path, bs=bs, seq_len=80) dbunch_lm.show_batch() len(dbunch_lm.vocab) learn = language_model_learner(dbunch_lm, AWD_LSTM, drop_mult=0.3, metrics=[accuracy, Perplexity()]).to_fp16() learn.lr_find() learn.recorder.plot_lr_find(skip_end=15) learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7,0.8)) learn.save('fit_head') learn.load('fit_head'); learn.unfreeze() learn.fit_one_cycle(10, 2e-3, moms=(0.8,0.7,0.8)) learn.save('fine_tuned') learn.load('fine_tuned'); TEXT = "I liked this movie because" N_WORDS = 40 N_SENTENCES = 2 print("\n".join(learn.predict(TEXT, N_WORDS, temperature=0.75) for _ in range(N_SENTENCES))) learn.save_encoder('fine_tuned_enc') def read_tokenized_file(f): return L(f.read().split(' ')) imdb_clas = DataBlock(blocks=(TextBlock.from_folder(path, vocab=dbunch_lm.vocab),CategoryBlock), get_x=read_tokenized_file, get_y = parent_label, get_items=partial(get_text_files, folders=['train', 'test']), splitter=GrandparentSplitter(valid_name='test')) dbunch_clas = imdb_clas.dataloaders(path, path=path, bs=bs, seq_len=80) dbunch_clas.show_batch() learn = text_classifier_learner(dbunch_clas, AWD_LSTM, drop_mult=0.5, metrics=accuracy).to_fp16() learn.load_encoder('fine_tuned_enc') learn.lr_find() learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7, 0.8)) learn.save('first') learn.load('first'); learn.freeze_to(-2) learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2), moms=(0.8,0.7, 0.8)) learn.save('second') learn.load('second'); learn.freeze_to(-3) learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3),moms=(0.8,0.7, 0.8)) learn.save('third') learn.load('third'); learn.unfreeze() learn.fit_one_cycle(2, slice(1e-3/(2.6**4),1e-3), moms=(0.8,0.7, 0.8)) learn.predict("I really loved that movie , it was awesome !")