from fastai.gen_doc.nbdoc import * from fastai.text import * from fastai.gen_doc.nbdoc import * from fastai import * show_doc(TextLMDataBunch, title_level=3, doc_string=False) show_doc(TextLMDataBunch.show_batch) show_doc(TextClasDataBunch, title_level=3, doc_string=False) show_doc(TextClasDataBunch.show_batch) show_doc(TextDataBunch, title_level=3, doc_string=False) show_doc(TextDataBunch.from_folder, doc_string=False) show_doc(TextDataBunch.from_csv, doc_string=False) show_doc(TextDataBunch.from_df, doc_string=False) show_doc(TextDataBunch.from_tokens, doc_string=False) show_doc(TextDataBunch.from_ids, doc_string=False) show_doc(TextDataBunch.load) show_doc(TextDataBunch.save) path = untar_data(URLs.IMDB_SAMPLE) path pd.read_csv(path/'texts.csv').head() data_lm = TextLMDataBunch.from_csv(Path(path), 'texts.csv') data_clas = TextClasDataBunch.from_csv(Path(path), 'texts.csv') show_doc(Text, doc_string=False, title_level=3) show_doc(Text.show_xys) show_doc(Text.show_xyzs) show_doc(TextList, title_level=3) show_doc(TextList.label_for_lm) show_doc(TextList.from_folder) show_doc(OpenFileProcessor, title_level=3) show_doc(open_text) show_doc(TokenizeProcessor, title_level=3) show_doc(NumericalizeProcessor, title_level=3, doc_string=False) path = untar_data(URLs.IMDB_SAMPLE) data = TextLMDataBunch.from_csv(path, 'texts.csv') x,y = next(iter(data.train_dl)) example = x[:20,:10].cpu() texts = pd.DataFrame([data.train_ds.vocab.textify(l).split(' ') for l in example]) texts iter_dl = iter(data.train_dl) for _ in range(5): x,y = next(iter_dl) print(x.size()) show_doc(LanguageModelLoader, doc_string=False) show_doc(LanguageModelLoader.batchify, doc_string=False) show_doc(LanguageModelLoader.get_batch) path = untar_data(URLs.IMDB_SAMPLE) data = TextClasDataBunch.from_csv(path, 'texts.csv') iter_dl = iter(data.train_dl) _ = next(iter_dl) x,y = next(iter_dl) x[:20,-10:] show_doc(SortSampler, doc_string=False) show_doc(SortishSampler, doc_string=False) show_doc(pad_collate, doc_string=False) show_doc(TextLMDataBunch.create) show_doc(TextClasDataBunch.create) show_doc(TextList.new) show_doc(TextList.get) show_doc(TokenizeProcessor.process_one) show_doc(TokenizeProcessor.process) show_doc(OpenFileProcessor.process_one) show_doc(NumericalizeProcessor.process) show_doc(NumericalizeProcessor.process_one) show_doc(Text) show_doc(TextList.from_folder)