from fastai.gen_doc.nbdoc import * from fastai.tabular.models import * from fastai import * from fastai.tabular import * path = untar_data(URLs.ADULT_SAMPLE) path df = pd.read_csv(path/'adult.csv') df.head() tfms = [FillMissing, Categorify] train_df, valid_df = df[:-2000].copy(),df[-2000:].copy() dep_var = '>=50k' cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country'] data = TabularDataBunch.from_df(path, train_df, valid_df, dep_var, tfms=tfms, cat_names=cat_names) print(data.train_ds.cont_names) # `cont_names` defaults to: set(df)-set(cat_names)-{dep_var} (cat_x,cont_x),y = next(iter(data.train_dl)) for o in (cat_x, cont_x, y): print(to_np(o[:5])) learn = get_tabular_learner(data, layers=[200,100], emb_szs={'native-country': 10}, metrics=accuracy) learn.fit_one_cycle(1, 1e-2)