from fastai.vision import * folder = 'black' file = 'urls_black.csv' folder = 'teddys' file = 'urls_teddys.csv' folder = 'grizzly' file = 'urls_grizzly.csv' path = Path('data/bears') dest = path/folder dest.mkdir(parents=True, exist_ok=True) path.ls() classes = ['teddys','grizzly','black'] download_images(path/file, dest, max_pics=200) # If you have problems download, try with `max_workers=0` to see exceptions: download_images(path/file, dest, max_pics=20, max_workers=0) for c in classes: print(c) verify_images(path/c, delete=True, max_size=500) np.random.seed(42) data = ImageDataBunch.from_folder(path, train=".", valid_pct=0.2, ds_tfms=get_transforms(), size=224, num_workers=4).normalize(imagenet_stats) # If you already cleaned your data, run this cell instead of the one before # 如果你已经清洗过你的数据,直接运行这格代码而不是上面的 # np.random.seed(42) # data = ImageDataBunch.from_csv(path, folder=".", valid_pct=0.2, csv_labels='cleaned.csv', # ds_tfms=get_transforms(), size=224, num_workers=4).normalize(imagenet_stats) data.classes data.show_batch(rows=3, figsize=(7,8)) data.classes, data.c, len(data.train_ds), len(data.valid_ds) learn = cnn_learner(data, models.resnet34, metrics=error_rate) learn.fit_one_cycle(4) learn.save('stage-1') learn.unfreeze() learn.lr_find() learn.recorder.plot() learn.fit_one_cycle(2, max_lr=slice(3e-5,3e-4)) learn.save('stage-2') learn.load('stage-2'); interp = ClassificationInterpretation.from_learner(learn) interp.plot_confusion_matrix() from fastai.widgets import * db = (ImageList.from_folder(path) .no_split() .label_from_folder() .transform(get_transforms(), size=224) .databunch() ) # If you already cleaned your data using indexes from `from_toplosses`,

# 如果你已经从`from_toplosses`使用indexes清理了你的数据 # run this cell instead of the one before to proceed with removing duplicates.

# 运行这个单元格里面的代码(而非上面单元格的内容)以便继续删除重复项 # Otherwise all the results of the previous step would be overwritten by

# 否则前一个步骤中的结果都会被覆盖 # the new run of `ImageCleaner`.

# 下面就是要运行的`ImageCleaner`代码,请把下面的注释去掉开始运行 # db = (ImageList.from_csv(path, 'cleaned.csv', folder='.') # .no_split() # .label_from_df() # .transform(get_transforms(), size=224) # .databunch() # ) learn_cln = cnn_learner(db, models.resnet34, metrics=error_rate) learn_cln.load('stage-2'); ds, idxs = DatasetFormatter().from_toplosses(learn_cln) ImageCleaner(ds, idxs, path) ds, idxs = DatasetFormatter().from_similars(learn_cln) ImageCleaner(ds, idxs, path, duplicates=True) learn.export() defaults.device = torch.device('cpu') img = open_image(path/'black'/'00000021.jpg') img learn = load_learner(path) pred_class,pred_idx,outputs = learn.predict(img) pred_class learn = cnn_learner(data, models.resnet34, metrics=error_rate) learn.fit_one_cycle(1, max_lr=0.5) learn = cnn_learner(data, models.resnet34, metrics=error_rate) learn.fit_one_cycle(5, max_lr=1e-5) learn.recorder.plot_losses() learn = cnn_learner(data, models.resnet34, metrics=error_rate, pretrained=False) learn.fit_one_cycle(1) np.random.seed(42) data = ImageDataBunch.from_folder(path, train=".", valid_pct=0.9, bs=32, ds_tfms=get_transforms(do_flip=False, max_rotate=0, max_zoom=1, max_lighting=0, max_warp=0 ),size=224, num_workers=4).normalize(imagenet_stats) learn = cnn_learner(data, models.resnet50, metrics=error_rate, ps=0, wd=0) learn.unfreeze() learn.fit_one_cycle(40, slice(1e-6,1e-4))