Tabular models¶

In [ ]:

from fastai2.tabular.all import *

Tabular data should be in a Pandas DataFrame.

In [ ]:

path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [ ]:

dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]

In [ ]:

#test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)

In [ ]:

splits = IndexSplitter(list(range(800,1000)))(range_of(df))

In [ ]:

#splits = (L(splits[0], use_list=True), L(splits[1], use_list=True))

In [ ]:

to = TabularPandas(df, procs, cat_names, cont_names, y_names="salary", splits=splits)

In [ ]:

dls = to.dataloaders(bs=64)

In [ ]:

dls.show_batch()

	workclass	education	marital-status	occupation	relationship	race	age_na	fnlwgt_na	education-num_na	age	fnlwgt	education-num	salary
0	Private	Bachelors	Never-married	Machine-op-inspct	Not-in-family	Asian-Pac-Islander	False	False	False	27.0	104457.001298	13.0	<50k
1	Self-emp-not-inc	HS-grad	Never-married	Farming-fishing	Own-child	White	False	False	False	20.0	306709.997905	9.0	<50k
2	Private	Bachelors	Married-civ-spouse	Prof-specialty	Husband	White	False	False	False	40.0	209547.000700	13.0	>=50k
3	Private	Bachelors	Never-married	Prof-specialty	Not-in-family	White	False	False	False	26.0	184120.000065	13.0	<50k
4	Private	HS-grad	Married-civ-spouse	Adm-clerical	Husband	White	False	False	False	38.0	248886.000709	9.0	<50k
5	Private	HS-grad	Never-married	Machine-op-inspct	Not-in-family	Asian-Pac-Islander	False	False	False	28.0	149769.001037	9.0	<50k
6	Private	Bachelors	Married-civ-spouse	Exec-managerial	Wife	White	False	False	False	40.0	225659.999761	13.0	>=50k
7	Private	Some-college	Married-civ-spouse	Craft-repair	Husband	Asian-Pac-Islander	False	False	False	27.0	100668.997583	10.0	>=50k
8	Private	Masters	Married-civ-spouse	Exec-managerial	Husband	White	False	False	False	46.0	55720.003421	14.0	>=50k
9	?	Assoc-acdm	Married-civ-spouse	?	Wife	White	False	False	False	35.0	144172.001567	12.0	<50k

In [ ]:

learn = tabular_learner(dls, layers=[200,100], metrics=accuracy)

In [ ]:

learn.fit(1, 1e-2)

epoch	train_loss	valid_loss	accuracy	time
0	0.372055	0.369126	0.840000	00:10

Inference -> To do¶

In [ ]:

row = df.iloc[0]

In [ ]:

learn.predict(row)

In [ ]: