Basic core¶

This module contains all the basic functions we need in other modules of the fastai library (split with torch_core that contains the ones requiring pytorch). Its documentation can easily be skipped at a first read, unless you want to know what a given function does.

In [ ]:

from fastai.gen_doc.nbdoc import *
from fastai.core import * 

Global constants¶

default_cpus = min(16, num_cpus())

[source]

Check functions¶

In [ ]:

show_doc(has_arg)

`has_arg`[source]

has_arg(func, arg) → bool

Check if func accepts arg.

Examples for two fastai.core functions. Docstring shown before calling has_arg for reference

In [ ]:

has_arg(download_url,'url')

Out[ ]:

True

In [ ]:

has_arg(index_row,'x')

Out[ ]:

False

In [ ]:

has_arg(index_row,'a')

Out[ ]:

True

In [ ]:

show_doc(ifnone)

`ifnone`[source]

ifnone(a:Any, b:Any) → Any

a if a is not None, otherwise b.

In [ ]:

param,alt_param = None,5
ifnone(param,alt_param)

Out[ ]:

In [ ]:

param,alt_param = None,[1,2,3]
ifnone(param,alt_param)

Out[ ]:

[1, 2, 3]

In [ ]:

show_doc(is1d)

`is1d`[source]

is1d(a:Collection[T_co]) → bool

Return True if a is one-dimensional

In [ ]:

two_d_array = np.arange(12).reshape(6,2)
print( two_d_array )
print( is1d(two_d_array) )

[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]]
False

In [ ]:

is1d(two_d_array.flatten())

Out[ ]:

True

In [ ]:

show_doc(is_listy)

`is_listy`[source]

is_listy(x:Any) → bool

Check if x is a Collection. Tuple or List qualify

In [ ]:

some_data = [1,2,3]
is_listy(some_data)

Out[ ]:

True

In [ ]:

some_data = (1,2,3)
is_listy(some_data)

Out[ ]:

True

In [ ]:

some_data = 1024
print( is_listy(some_data) )

False

In [ ]:

print( is_listy( [some_data] ) )

True

In [ ]:

some_data = dict([('a',1),('b',2),('c',3)])
print( some_data )
print( some_data.keys() )

{'a': 1, 'b': 2, 'c': 3}
dict_keys(['a', 'b', 'c'])

In [ ]:

print( is_listy(some_data) )
print( is_listy(some_data.keys()) )

False
False

In [ ]:

print( is_listy(list(some_data.keys())) )

True

In [ ]:

show_doc(is_tuple)

`is_tuple`[source]

is_tuple(x:Any) → bool

Check if x is a tuple.

In [ ]:

print( is_tuple( [1,2,3] ) )

False

In [ ]:

print( is_tuple( (1,2,3) ) )

True

In [ ]:

show_doc(arange_of)

`arange_of`[source]

arange_of(x)

Same as range_of but returns an array.

In [ ]:

arange_of([5,6,7])

Out[ ]:

array([0, 1, 2])

In [ ]:

type(arange_of([5,6,7]))

Out[ ]:

numpy.ndarray

In [ ]:

show_doc(array)

`array`[source]

array(a, dtype:type=*None, ***kwargs**) → ndarray

Same as np.array but also handles generators. kwargs are passed to np.array with dtype.

In [ ]:

array([1,2,3])

Out[ ]:

array([1, 2, 3])

Note that after we call the generator, we do not reset. So the array call has 5 less entries than it would if we ran from the start of the generator.

In [ ]:

def data_gen():
    i = 100.01
    while i<200:
        yield i
        i += 1.

ex_data_gen = data_gen()
for _ in range(5):
    print(next(ex_data_gen))

In [ ]:

array(ex_data_gen)

Out[ ]:

array([105.01, 106.01, 107.01, 108.01, ..., 196.01, 197.01, 198.01, 199.01])

In [ ]:

ex_data_gen_int = data_gen()

array(ex_data_gen_int,dtype=int)  #Cast output to int array

Out[ ]:

array([100, 101, 102, 103, ..., 196, 197, 198, 199])

In [ ]:

show_doc(arrays_split)

`arrays_split`[source]

arrays_split(mask:ndarray, ***arrs**:NPArrayableList) → SplitArrayList

Given arrs is [a,b,...] and maskindex - return[(a[mask],a[~mask]),(b[mask],b[~mask]),...].

In [ ]:

data_a = np.arange(15)
data_b = np.arange(15)[::-1]

mask_a = (data_a > 10)
print(data_a)
print(data_b)
print(mask_a)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[14 13 12 11 10  9  8  7  6  5  4  3  2  1  0]
[False False False False False False False False False False False  True  True  True  True]

In [ ]:

arrays_split(mask_a,data_a)

Out[ ]:

[(array([11, 12, 13, 14]),),
 (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),)]

In [ ]:

np.vstack([data_a,data_b]).transpose().shape

Out[ ]:

(15, 2)

In [ ]:

arrays_split(mask_a,np.vstack([data_a,data_b]).transpose()) #must match on dimension 0

Out[ ]:

[(array([[11,  3],
         [12,  2],
         [13,  1],
         [14,  0]]),), (array([[ 0, 14],
         [ 1, 13],
         [ 2, 12],
         [ 3, 11],
         [ 4, 10],
         [ 5,  9],
         [ 6,  8],
         [ 7,  7],
         [ 8,  6],
         [ 9,  5],
         [10,  4]]),)]

In [ ]:

show_doc(chunks)

`chunks`[source]

chunks(l:Collection[T_co], n:int) → Iterable

Yield successive n-sized chunks from l.

You can transform a Collection into an Iterable of 'n' sized chunks by calling chunks:

In [ ]:

data = [0,1,2,3,4,5,6,7,8,9]
for chunk in chunks(data, 2):
    print(chunk)

[0, 1]
[2, 3]
[4, 5]
[6, 7]
[8, 9]

In [ ]:

for chunk in chunks(data, 3):
    print(chunk)

[0, 1, 2]
[3, 4, 5]
[6, 7, 8]
[9]

In [ ]:

show_doc(df_names_to_idx)

`df_names_to_idx`[source]

df_names_to_idx(names:IntsOrStrs, df:DataFrame)

Return the column indexes of names in df.

In [ ]:

ex_df = pd.DataFrame.from_dict({"a":[1,1,1],"b":[2,2,2]})
print(ex_df)

In [ ]:

df_names_to_idx('b',ex_df)

Out[ ]:

[1]

In [ ]:

show_doc(extract_kwargs)

`extract_kwargs`[source]

extract_kwargs(names:StrList, kwargs:KWArgs)

Extract the keys in names from the kwargs.

In [ ]:

key_word_args = {"a":2,"some_list":[1,2,3],"param":'mean'}
key_word_args

Out[ ]:

{'a': 2, 'some_list': [1, 2, 3], 'param': 'mean'}

In [ ]:

(extracted_val,remainder) = extract_kwargs(['param'],key_word_args)
print( extracted_val,remainder )

{'param': 'mean'} {'a': 2, 'some_list': [1, 2, 3]}

In [ ]:

show_doc(idx_dict)

`idx_dict`[source]

idx_dict(a)

Create a dictionary value to index from a.

In [ ]:

idx_dict(['a','b','c'])

Out[ ]:

{'a': 0, 'b': 1, 'c': 2}

In [ ]:

show_doc(index_row)

`index_row`[source]

index_row(a:Union[Collection[T_co], DataFrame, Series], idxs:Collection[int]) → Any

Return the slice of a corresponding to idxs.

In [ ]:

data = [0,1,2,3,4,5,6,7,8,9]
index_row(data,4)

Out[ ]:

In [ ]:

index_row(pd.Series(data),7)

Out[ ]:

In [ ]:

data_df = pd.DataFrame([data[::-1],data]).transpose()
data_df

Out[ ]:

	0	1
0	9	0
1	8	1
2	7	2
3	6	3
4	5	4
5	4	5
6	3	6
7	2	7
8	1	8
9	0	9

In [ ]:

index_row(data_df,7)

Out[ ]:

0    2
1    7
Name: 7, dtype: int64

In [ ]:

show_doc(listify)

`listify`[source]

listify(p:OptListOrItem=*None, q:OptListOrItem=None*)

Make p listy and the same length as q.

In [ ]:

to_match = np.arange(12)
listify('a',to_match)

Out[ ]:

['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']

In [ ]:

listify('a',5)

Out[ ]:

['a', 'a', 'a', 'a', 'a']

In [ ]:

listify(77.1,3)

Out[ ]:

[77.1, 77.1, 77.1]

In [ ]:

listify( (1,2,3) )

Out[ ]:

[1, 2, 3]

In [ ]:

listify((1,2,3),('a','b','c'))

Out[ ]:

[1, 2, 3]

In [ ]:

show_doc(random_split)

`random_split`[source]

random_split(valid_pct:float, ***arrs**:NPArrayableList) → SplitArrayList

Randomly split arrs with valid_pct ratio. good for creating validation set.

Splitting is done here with random.uniform() so you may not get the exact split percentage for small data sets

In [ ]:

data = np.arange(20).reshape(10,2)
data.tolist()

Out[ ]:

[[0, 1],
 [2, 3],
 [4, 5],
 [6, 7],
 [8, 9],
 [10, 11],
 [12, 13],
 [14, 15],
 [16, 17],
 [18, 19]]

In [ ]:

random_split(0.20,data.tolist())

Out[ ]:

[(array([[ 0,  1],
         [ 2,  3],
         [ 6,  7],
         [ 8,  9],
         [10, 11],
         [16, 17],
         [18, 19]]),), (array([[ 4,  5],
         [12, 13],
         [14, 15]]),)]

In [ ]:

random_split(0.20,pd.DataFrame(data))

Out[ ]:

[(array([[ 0,  1],
         [ 2,  3],
         [ 4,  5],
         [ 6,  7],
         [ 8,  9],
         [10, 11],
         [12, 13],
         [14, 15],
         [16, 17],
         [18, 19]]),), (array([], shape=(0, 2), dtype=int64),)]

In [ ]:

show_doc(range_of)

`range_of`[source]

range_of(x)

Create a range from 0 to len(x).

In [ ]:

range_of([5,4,3])

Out[ ]:

[0, 1, 2]

In [ ]:

range_of(np.arange(10)[::-1])

Out[ ]:

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [ ]:

show_doc(series2cat)

`series2cat`[source]

series2cat(df:DataFrame, ***col_names**)

Categorifies the columns col_names in df.

In [ ]:

data_df = pd.DataFrame.from_dict({"a":[1,1,1,2,2,2],"b":['f','e','f','g','g','g']})
data_df

Out[ ]:

	a	b
0	1	f
1	1	e
2	1	f
3	2	g
4	2	g
5	2	g

In [ ]:

data_df['b']

Out[ ]:

0    f
1    e
2    f
3    g
4    g
5    g
Name: b, dtype: object

In [ ]:

series2cat(data_df,'b')
data_df['b']

Out[ ]:

0    f
1    e
2    f
3    g
4    g
5    g
Name: b, dtype: category
Categories (3, object): [e < f < g]

In [ ]:

series2cat(data_df,'a')
data_df['a']

Out[ ]:

0    1
1    1
2    1
3    2
4    2
5    2
Name: a, dtype: category
Categories (2, int64): [1 < 2]

In [ ]:

show_doc(split_kwargs_by_func)

`split_kwargs_by_func`[source]

split_kwargs_by_func(kwargs, func)

Split kwargs between those expected by func and the others.

In [ ]:

key_word_args = {'url':'http://fast.ai','dest':'./','new_var':[1,2,3],'testvalue':42}
split_kwargs_by_func(key_word_args,download_url)

Out[ ]:

({'url': 'http://fast.ai', 'dest': './'},
 {'new_var': [1, 2, 3], 'testvalue': 42})

In [ ]:

show_doc(to_int)

`to_int`[source]

to_int(b:Any) → Union[int, List[int]]

Convert b to an int or list of ints (if is_listy); raises exception if not convertible

In [ ]:

to_int(3.1415)

Out[ ]:

In [ ]:

data = [1.2,3.4,7.25]
to_int(data)

Out[ ]:

[1, 3, 7]

In [ ]:

show_doc(uniqueify)

`uniqueify`[source]

uniqueify(x:Series) → List[T]

Return sorted unique values of x.

In [ ]:

uniqueify( pd.Series(data=['a','a','b','b','f','g']) )

Out[ ]:

['a', 'b', 'f', 'g']

Files management and downloads¶

In [ ]:

show_doc(download_url)

`download_url`[source]

download_url(url:str, dest:str, overwrite:bool=*False, pbar:ProgressBar=None, show_progress=True, chunk_size=1048576, timeout=4, retries=5*)

Download url to dest unless it exists and not overwrite.

In [ ]:

show_doc(find_classes)

`find_classes`[source]

find_classes(folder:Path) → FilePathList

List of label subdirectories in imagenet-style folder.

In [ ]:

show_doc(join_path)

`join_path`[source]

join_path(fname:PathOrStr, path:PathOrStr=*'.'*) → Path

Return Path(path)/Path(fname), path defaults to current dir.

In [ ]:

show_doc(join_paths)

`join_paths`[source]

join_paths(fnames:FilePathList, path:PathOrStr=*'.'*) → FilePathList

Join path to every file name in fnames.

In [ ]:

show_doc(loadtxt_str)

`loadtxt_str`[source]

loadtxt_str(path:PathOrStr) → ndarray

Return ndarray of str of lines of text from path.

In [ ]:

show_doc(save_texts)

`save_texts`[source]

save_texts(fname:PathOrStr, texts:StrList)

Save in fname the content of texts.

Multiprocessing¶

In [ ]:

show_doc(num_cpus)

`num_cpus`[source]

num_cpus() → int

Get number of cpus

In [ ]:

show_doc(parallel)

`parallel`[source]

parallel(func, arr:Collection[T_co], max_workers:int=*None*)

Call func on every element of arr in parallel using max_workers.

In [ ]:

show_doc(partition)

`partition`[source]

partition(a:Collection[T_co], sz:int) → List[Collection[T_co]]

Split iterables a in equal parts of size sz

In [ ]:

show_doc(partition_by_cores)

`partition_by_cores`[source]

partition_by_cores(a:Collection[T_co], n_cpus:int) → List[Collection[T_co]]

Split data in a equally among n_cpus cores

Data block API¶

In [ ]:

show_doc(ItemBase, title_level=3)

`class` `ItemBase`[source]

ItemBase(data:Any)

Base item type in the fastai library.

All items used in fastai should subclass this. Must have a data field that will be used when collating in mini-batches.

In [ ]:

show_doc(ItemBase.apply_tfms)

`apply_tfms`[source]

apply_tfms(tfms:Collection[T_co], ****kwargs**)

Subclass this method if you want to apply data augmentation with tfms to this ItemBase.

In [ ]:

show_doc(ItemBase.show)

`show`[source]

show(ax:Axes, ****kwargs**)

Subclass this method if you want to customize the way this ItemBase is shown on ax.

The default behavior is to set the string representation of this object as title of ax.

In [ ]:

show_doc(Category, title_level=3)

`class` `Category`[source]

Category(data, obj) :: ItemBase

Basic class for single classification labels.

Create a Category with an obj of index data in a certain classes list.

In [ ]:

show_doc(EmptyLabel, title_level=3)

`class` `EmptyLabel`[source]

EmptyLabel() :: ItemBase

Should be used for a dummy label.

In [ ]:

show_doc(MultiCategory, title_level=3)

`class` `MultiCategory`[source]

MultiCategory(data, obj, raw) :: ItemBase

Basic class for multi-classification labels.

Create a MultiCategory with an obj that is a collection of labels. data corresponds to the one-hot encoded labels and raw is a list of associated string.

In [ ]:

show_doc(FloatItem)

`class` `FloatItem`[source]

FloatItem(obj) :: ItemBase

Basic class for float items.

Others¶

In [ ]:

show_doc(camel2snake)

`camel2snake`[source]

camel2snake(name:str) → str

Change name from camel to snake style.

In [ ]:

camel2snake('DeviceDataLoader')

Out[ ]:

'device_data_loader'

In [ ]:

show_doc(even_mults)

`even_mults`[source]

even_mults(start:float, stop:float, n:int) → ndarray

Build log-stepped array from start to stop in n steps.

In [ ]:

show_doc(func_args)

`func_args`[source]

func_args(func) → bool

Return the arguments of func.

In [ ]:

show_doc(noop)

`noop`[source]

noop(x)

Return x.

In [ ]:

show_doc(one_hot)

`one_hot`[source]

one_hot(x:Collection[int], c:int)

One-hot encode x with c classes.

In [ ]:

show_doc(show_some)

`show_some`[source]

show_some(items:Collection[T_co], n_max:int=*5, sep:str=','*)

Return the representation of the first n_max elements in items.

In [ ]:

show_doc(subplots)

`subplots`[source]

subplots(rows:int, cols:int, imgsize:int=*4, figsize:Optional[Tuple[int, int]]=None, title=None, ***kwargs**)

Like plt.subplots but with consistent axs shape, kwargs passed to fig.suptitle with title

In [ ]:

show_doc(text2html_table)

`text2html_table`[source]

text2html_table(items:Tokens, widths:Collection[int]) → str

Put the texts in items in an HTML table, widths are the widths of the columns in %.

Basic core¶

Global constants¶

Check functions¶

has_arg[source]

ifnone[source]

is1d[source]

is_listy[source]

is_tuple[source]

Collection related functions¶

arange_of[source]

array[source]

arrays_split[source]

chunks[source]

df_names_to_idx[source]

extract_kwargs[source]

idx_dict[source]

index_row[source]

listify[source]

random_split[source]

range_of[source]

series2cat[source]

split_kwargs_by_func[source]

to_int[source]

uniqueify[source]

Files management and downloads¶

download_url[source]

find_classes[source]

join_path[source]

join_paths[source]

loadtxt_str[source]

save_texts[source]

Multiprocessing¶

num_cpus[source]

parallel[source]

partition[source]

partition_by_cores[source]

Data block API¶

class ItemBase[source]

apply_tfms[source]

show[source]

class Category[source]

class EmptyLabel[source]

class MultiCategory[source]

class FloatItem[source]

Others¶

camel2snake[source]

even_mults[source]

func_args[source]

noop[source]

one_hot[source]

show_some[source]

subplots[source]

text2html_table[source]

Undocumented Methods - Methods moved below this line will intentionally be hidden¶

New Methods - Please document or move to the undocumented section¶

`has_arg`[source]

`ifnone`[source]

`is1d`[source]

`is_listy`[source]

`is_tuple`[source]

`arange_of`[source]

`array`[source]

`arrays_split`[source]

`chunks`[source]

`df_names_to_idx`[source]

`extract_kwargs`[source]

`idx_dict`[source]

`index_row`[source]

`listify`[source]

`random_split`[source]

`range_of`[source]

`series2cat`[source]

`split_kwargs_by_func`[source]

`to_int`[source]

`uniqueify`[source]

`download_url`[source]

`find_classes`[source]

`join_path`[source]

`join_paths`[source]

`loadtxt_str`[source]

`save_texts`[source]

`num_cpus`[source]

`parallel`[source]

`partition`[source]

`partition_by_cores`[source]

`class` `ItemBase`[source]

`apply_tfms`[source]

`show`[source]

`class` `Category`[source]

`class` `EmptyLabel`[source]

`class` `MultiCategory`[source]

`class` `FloatItem`[source]

`camel2snake`[source]

`even_mults`[source]

`func_args`[source]

`noop`[source]

`one_hot`[source]

`show_some`[source]

`subplots`[source]

`text2html_table`[source]