from fastai2.tabular.all import *
Tabular data should be in a Pandas DataFrame.
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
#test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)
splits = IndexSplitter(list(range(800,1000)))(range_of(df))
#splits = (L(splits[0], use_list=True), L(splits[1], use_list=True))
to = TabularPandas(df, procs, cat_names, cont_names, y_names="salary", splits=splits)
dls = to.dataloaders(bs=64)
dls.show_batch()
| workclass | education | marital-status | occupation | relationship | race | age_na | fnlwgt_na | education-num_na | age | fnlwgt | education-num | salary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Private | Bachelors | Never-married | Machine-op-inspct | Not-in-family | Asian-Pac-Islander | False | False | False | 27.0 | 104457.001298 | 13.0 | <50k |
| 1 | Self-emp-not-inc | HS-grad | Never-married | Farming-fishing | Own-child | White | False | False | False | 20.0 | 306709.997905 | 9.0 | <50k |
| 2 | Private | Bachelors | Married-civ-spouse | Prof-specialty | Husband | White | False | False | False | 40.0 | 209547.000700 | 13.0 | >=50k |
| 3 | Private | Bachelors | Never-married | Prof-specialty | Not-in-family | White | False | False | False | 26.0 | 184120.000065 | 13.0 | <50k |
| 4 | Private | HS-grad | Married-civ-spouse | Adm-clerical | Husband | White | False | False | False | 38.0 | 248886.000709 | 9.0 | <50k |
| 5 | Private | HS-grad | Never-married | Machine-op-inspct | Not-in-family | Asian-Pac-Islander | False | False | False | 28.0 | 149769.001037 | 9.0 | <50k |
| 6 | Private | Bachelors | Married-civ-spouse | Exec-managerial | Wife | White | False | False | False | 40.0 | 225659.999761 | 13.0 | >=50k |
| 7 | Private | Some-college | Married-civ-spouse | Craft-repair | Husband | Asian-Pac-Islander | False | False | False | 27.0 | 100668.997583 | 10.0 | >=50k |
| 8 | Private | Masters | Married-civ-spouse | Exec-managerial | Husband | White | False | False | False | 46.0 | 55720.003421 | 14.0 | >=50k |
| 9 | ? | Assoc-acdm | Married-civ-spouse | ? | Wife | White | False | False | False | 35.0 | 144172.001567 | 12.0 | <50k |
learn = tabular_learner(dls, layers=[200,100], metrics=accuracy)
learn.fit(1, 1e-2)
| epoch | train_loss | valid_loss | accuracy | time |
|---|---|---|---|---|
| 0 | 0.372055 | 0.369126 | 0.840000 | 00:10 |
row = df.iloc[0]
learn.predict(row)