# all_slow #hide from fastai2.data.all import * from nbdev.showdoc import show_doc from fastai2.vision.all import * path = untar_data(URLs.PETS) fnames = get_image_files(path/"images") dblock = DataBlock() dsets = dblock.datasets(fnames) dsets.train[0] dblock = DataBlock(get_items = get_image_files) dsets = dblock.datasets(path/"images") dsets.train[0] def label_func(fname): return "cat" if fname.name[0].isupper() else "dog" dblock = DataBlock(get_items = get_image_files, get_y = label_func) dsets = dblock.datasets(path/"images") dsets.train[0] dblock = DataBlock(blocks = (ImageBlock, CategoryBlock), get_items = get_image_files, get_y = label_func) dsets = dblock.datasets(path/"images") dsets.train[0] dsets.vocab dblock = DataBlock(blocks = (ImageBlock, CategoryBlock), get_items = get_image_files, get_y = label_func, splitter = RandomSplitter()) dsets = dblock.datasets(path/"images") dsets.train[0] dblock = DataBlock(blocks = (ImageBlock, CategoryBlock), get_items = get_image_files, get_y = label_func, splitter = RandomSplitter(), item_tfms = Resize(224)) dls = dblock.dataloaders(path/"images") dls.show_batch() dblock = DataBlock(blocks = (ImageBlock, CategoryBlock), get_items = get_image_files, get_y = label_func, splitter = RandomSplitter(), item_tfms = Resize(224)) from fastai2.vision.all import * mnist = DataBlock(blocks=(ImageBlock(cls=PILImageBW), CategoryBlock), get_items=get_image_files, splitter=GrandparentSplitter(), get_y=parent_label) dls = mnist.dataloaders(untar_data(URLs.MNIST_TINY)) dls.show_batch(max_n=9, figsize=(4,4)) mnist.summary(untar_data(URLs.MNIST_TINY)) pets = DataBlock(blocks=(ImageBlock, CategoryBlock), get_items=get_image_files, splitter=RandomSplitter(), get_y=Pipeline([attrgetter("name"), RegexLabeller(pat = r'^(.*)_\d+.jpg$')]), item_tfms=Resize(128), batch_tfms=aug_transforms()) dls = pets.dataloaders(untar_data(URLs.PETS)/"images") dls.show_batch(max_n=9) pascal_source = untar_data(URLs.PASCAL_2007) df = pd.read_csv(pascal_source/"train.csv") df.head() pascal = DataBlock(blocks=(ImageBlock, MultiCategoryBlock), splitter=ColSplitter(), get_x=ColReader(0, pref=pascal_source/"train"), get_y=ColReader(1, label_delim=' '), item_tfms=Resize(224), batch_tfms=aug_transforms()) dls = pascal.dataloaders(df) dls.show_batch() pascal = DataBlock(blocks=(ImageBlock, MultiCategoryBlock), splitter=ColSplitter(), get_x=lambda x:pascal_source/"train"/f'{x[0]}', get_y=lambda x:x[1].split(' '), item_tfms=Resize(224), batch_tfms=aug_transforms()) dls = pascal.dataloaders(df) dls.show_batch() pascal = DataBlock(blocks=(ImageBlock, MultiCategoryBlock), splitter=ColSplitter(), get_x=lambda o:f'{pascal_source}/train/'+o.fname, get_y=lambda o:o.labels.split(), item_tfms=Resize(224), batch_tfms=aug_transforms()) dls = pascal.dataloaders(df) dls.show_batch() def _pascal_items(x): return ( f'{pascal_source}/train/'+x.fname, x.labels.str.split()) valid_idx = df[df['is_valid']].index.values pascal = DataBlock.from_columns(blocks=(ImageBlock, MultiCategoryBlock), get_items=_pascal_items, splitter=IndexSplitter(valid_idx), item_tfms=Resize(224), batch_tfms=aug_transforms()) dls = pascal.dataloaders(df) dls.show_batch() path = untar_data(URLs.CAMVID_TINY) camvid = DataBlock(blocks=(ImageBlock, MaskBlock(codes = np.loadtxt(path/'codes.txt', dtype=str))), get_items=get_image_files, splitter=RandomSplitter(), get_y=lambda o: path/'labels'/f'{o.stem}_P{o.suffix}', batch_tfms=aug_transforms()) dls = camvid.dataloaders(path/"images") dls.show_batch() biwi_source = untar_data(URLs.BIWI_SAMPLE) fn2ctr = (biwi_source/'centers.pkl').load() biwi = DataBlock(blocks=(ImageBlock, PointBlock), get_items=get_image_files, splitter=RandomSplitter(), get_y=lambda o:fn2ctr[o.name].flip(0), batch_tfms=aug_transforms()) dls = biwi.dataloaders(biwi_source) dls.show_batch(max_n=9) coco_source = untar_data(URLs.COCO_TINY) images, lbl_bbox = get_annotations(coco_source/'train.json') img2bbox = dict(zip(images, lbl_bbox)) coco = DataBlock(blocks=(ImageBlock, BBoxBlock, BBoxLblBlock), get_items=get_image_files, splitter=RandomSplitter(), get_y=[lambda o: img2bbox[o.name][0], lambda o: img2bbox[o.name][1]], item_tfms=Resize(128), batch_tfms=aug_transforms(), n_inp=1) dls = coco.dataloaders(coco_source) dls.show_batch(max_n=9) from fastai2.text.all import * path = untar_data(URLs.IMDB_SAMPLE) df = pd.read_csv(path/'texts.csv') df.head() imdb_lm = DataBlock(blocks=TextBlock.from_df('text', is_lm=True), get_x=ColReader('text'), splitter=ColSplitter()) dls = imdb_lm.dataloaders(df, bs=64, seq_len=72) dls.show_batch(max_n=6) imdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72, vocab=dls.vocab), CategoryBlock), get_x=ColReader('text'), get_y=ColReader('label'), splitter=ColSplitter()) dls = imdb_clas.dataloaders(df, bs=64) dls.show_batch() from fastai2.tabular.core import * adult_source = untar_data(URLs.ADULT_SAMPLE) df = pd.read_csv(adult_source/'adult.csv') df.head() cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race'] cont_names = ['age', 'fnlwgt', 'education-num'] procs = [Categorify, FillMissing, Normalize] splits = RandomSplitter()(range_of(df)) to = TabularPandas(df, procs, cat_names, cont_names, y_names="salary", splits=splits, y_block=CategoryBlock) dls = to.dataloaders() dls.show_batch()