#!/usr/bin/env python # coding: utf-8 # ## Basic core # This module contains all the basic functions we need in other modules of the fastai library (split with [`torch_core`](/torch_core.html#torch_core) that contains the ones requiring pytorch). Its documentation can easily be skipped at a first read, unless you want to know what a given function does. # In[1]: from fastai.gen_doc.nbdoc import * from fastai.core import * # ## Global constants # `default_cpus = min(16, num_cpus())`
# ## Check functions # In[2]: show_doc(has_arg) # Examples for two [`fastai.core`](/core.html#core) functions. Docstring shown before calling [`has_arg`](/core.html#has_arg) for reference # # In[ ]: has_arg(download_url,'url') # In[ ]: has_arg(index_row,'x') # In[ ]: has_arg(index_row,'a') # In[3]: show_doc(ifnone) # In[ ]: param,alt_param = None,5 ifnone(param,alt_param) # In[ ]: param,alt_param = None,[1,2,3] ifnone(param,alt_param) # In[4]: show_doc(is1d) # In[ ]: two_d_array = np.arange(12).reshape(6,2) print( two_d_array ) print( is1d(two_d_array) ) # In[ ]: is1d(two_d_array.flatten()) # In[5]: show_doc(is_listy) # Check if `x` is a `Collection`. `Tuple` or `List` qualify # In[ ]: some_data = [1,2,3] is_listy(some_data) # In[ ]: some_data = (1,2,3) is_listy(some_data) # In[ ]: some_data = 1024 print( is_listy(some_data) ) # In[ ]: print( is_listy( [some_data] ) ) # In[ ]: some_data = dict([('a',1),('b',2),('c',3)]) print( some_data ) print( some_data.keys() ) # In[ ]: print( is_listy(some_data) ) print( is_listy(some_data.keys()) ) # In[ ]: print( is_listy(list(some_data.keys())) ) # In[6]: show_doc(is_tuple) # Check if `x` is a `tuple`. # In[ ]: print( is_tuple( [1,2,3] ) ) # In[ ]: print( is_tuple( (1,2,3) ) ) # ## Collection related functions # In[7]: show_doc(arange_of) # In[ ]: arange_of([5,6,7]) # In[ ]: type(arange_of([5,6,7])) # In[8]: show_doc(array) # In[ ]: array([1,2,3]) # Note that after we call the generator, we do not reset. So the [`array`](/core.html#array) call has 5 less entries than it would if we ran from the start of the generator. # In[ ]: def data_gen(): i = 100.01 while i<200: yield i i += 1. ex_data_gen = data_gen() for _ in range(5): print(next(ex_data_gen)) # In[ ]: array(ex_data_gen) # In[ ]: ex_data_gen_int = data_gen() array(ex_data_gen_int,dtype=int) #Cast output to int array # In[9]: show_doc(arrays_split) # In[ ]: data_a = np.arange(15) data_b = np.arange(15)[::-1] mask_a = (data_a > 10) print(data_a) print(data_b) print(mask_a) # In[ ]: arrays_split(mask_a,data_a) # In[ ]: np.vstack([data_a,data_b]).transpose().shape # In[ ]: arrays_split(mask_a,np.vstack([data_a,data_b]).transpose()) #must match on dimension 0 # In[10]: show_doc(chunks) # You can transform a `Collection` into an `Iterable` of 'n' sized chunks by calling [`chunks`](/core.html#chunks): # In[ ]: data = [0,1,2,3,4,5,6,7,8,9] for chunk in chunks(data, 2): print(chunk) # In[ ]: for chunk in chunks(data, 3): print(chunk) # In[11]: show_doc(df_names_to_idx) # In[ ]: ex_df = pd.DataFrame.from_dict({"a":[1,1,1],"b":[2,2,2]}) print(ex_df) # In[ ]: df_names_to_idx('b',ex_df) # In[12]: show_doc(extract_kwargs) # In[ ]: key_word_args = {"a":2,"some_list":[1,2,3],"param":'mean'} key_word_args # In[ ]: (extracted_val,remainder) = extract_kwargs(['param'],key_word_args) print( extracted_val,remainder ) # In[13]: show_doc(idx_dict) # In[ ]: idx_dict(['a','b','c']) # In[14]: show_doc(index_row) # `a` is basically something you can index into like a dataframe, an array or a list. # In[ ]: data = [0,1,2,3,4,5,6,7,8,9] index_row(data,4) # In[ ]: index_row(pd.Series(data),7) # In[ ]: data_df = pd.DataFrame([data[::-1],data]).transpose() data_df # In[ ]: index_row(data_df,7) # In[15]: show_doc(listify) # In[ ]: to_match = np.arange(12) listify('a',to_match) # In[ ]: listify('a',5) # In[ ]: listify(77.1,3) # In[ ]: listify( (1,2,3) ) # In[ ]: listify((1,2,3),('a','b','c')) # In[16]: show_doc(random_split) # Splitting is done here with `random.uniform()` so you may not get the exact split percentage for small data sets # In[ ]: data = np.arange(20).reshape(10,2) data.tolist() # In[ ]: random_split(0.20,data.tolist()) # In[ ]: random_split(0.20,pd.DataFrame(data)) # In[17]: show_doc(range_of) # In[ ]: range_of([5,4,3]) # In[ ]: range_of(np.arange(10)[::-1]) # In[18]: show_doc(series2cat) # In[ ]: data_df = pd.DataFrame.from_dict({"a":[1,1,1,2,2,2],"b":['f','e','f','g','g','g']}) data_df # In[ ]: data_df['b'] # In[ ]: series2cat(data_df,'b') data_df['b'] # In[ ]: series2cat(data_df,'a') data_df['a'] # In[19]: show_doc(split_kwargs_by_func) # In[ ]: key_word_args = {'url':'http://fast.ai','dest':'./','new_var':[1,2,3],'testvalue':42} split_kwargs_by_func(key_word_args,download_url) # In[20]: show_doc(to_int) # In[ ]: to_int(3.1415) # In[ ]: data = [1.2,3.4,7.25] to_int(data) # In[21]: show_doc(uniqueify) # In[ ]: uniqueify( pd.Series(data=['a','a','b','b','f','g']) ) # ## Metaclasses # In[22]: show_doc(PrePostInitMeta) # In[ ]: class _T(metaclass=PrePostInitMeta): def __pre_init__(self): self.a = 0; assert self.a==0 def __init__(self): self.a += 1; assert self.a==1 def __post_init__(self): self.a += 1; assert self.a==2 t = _T() t.a # ## Files management and downloads # In[23]: show_doc(download_url) # In[24]: show_doc(find_classes) # In[25]: show_doc(join_path) # In[26]: show_doc(join_paths) # In[27]: show_doc(loadtxt_str) # In[28]: show_doc(save_texts) # ## Multiprocessing # In[29]: show_doc(num_cpus) # In[30]: show_doc(parallel) # `func` must accept both the value and index of each `arr` element. # In[ ]: def my_func(value, index): print("Index: {}, Value: {}".format(index, value)) my_array = [i*2 for i in range(5)] parallel(my_func, my_array, max_workers=3) # In[31]: show_doc(partition) # In[32]: show_doc(partition_by_cores) # ## Data block API # In[33]: show_doc(ItemBase, title_level=3) # All items used in fastai should subclass this. Must have a [`data`](/tabular.data.html#tabular.data) field that will be used when collating in mini-batches. # In[34]: show_doc(ItemBase.apply_tfms) # In[35]: show_doc(ItemBase.show) # The default behavior is to set the string representation of this object as title of `ax`. # In[36]: show_doc(Category, title_level=3) # Create a [`Category`](/core.html#Category) with an `obj` of index [`data`](/tabular.data.html#tabular.data) in a certain classes list. # In[37]: show_doc(EmptyLabel, title_level=3) # In[38]: show_doc(MultiCategory, title_level=3) # Create a [`MultiCategory`](/core.html#MultiCategory) with an `obj` that is a collection of labels. [`data`](/tabular.data.html#tabular.data) corresponds to the one-hot encoded labels and `raw` is a list of associated string. # In[39]: show_doc(FloatItem) # ## Others # In[40]: show_doc(camel2snake) # In[ ]: camel2snake('DeviceDataLoader') # In[41]: show_doc(even_mults) # In linear scales each element is equidistant from its neighbors: # In[ ]: # from 1 to 10 in 5 steps t = np.linspace(1, 10, 5) t # In[ ]: for i in range(len(t) - 1): print(t[i+1] - t[i]) # In logarithmic scales, each element is a multiple of the previous entry: # In[ ]: t = even_mults(1, 10, 5) t # In[ ]: # notice how each number is a multiple of its predecessor for i in range(len(t) - 1): print(t[i+1] / t[i]) # In[42]: show_doc(func_args) # In[ ]: func_args(download_url) # Additionally, [`func_args`](/core.html#func_args) can be used with functions that do not belong to the fastai library # In[ ]: func_args(np.linspace) # In[43]: show_doc(noop) # Return `x`. # In[ ]: # object is returned as-is noop([1,2,3]) # In[44]: show_doc(one_hot) # One-hot encoding is a standard machine learning technique. Assume we are dealing with a 10-class classification problem and we are supplied a list of labels: # In[ ]: y = [1, 4, 4, 5, 7, 9, 2, 4, 0] # In[ ]: jekyll_note("""y is zero-indexed, therefore its first element (1) belongs to class 2, its second element (4) to class 5 and so on.""") # In[ ]: len(y) # y can equivalently be expressed as a matrix of 9 rows and 10 columns, where each row represents one element of the original y. # In[ ]: for label in y: print(one_hot(label, 10)) # In[45]: show_doc(show_some) # In[ ]: # select 3 elements from a list some_data = show_some([10, 20, 30, 40, 50], 3) some_data # In[ ]: type(some_data) # In[ ]: # the separator can be changed some_data = show_some([10, 20, 30, 40, 50], 3, sep = '---') some_data # In[ ]: some_data[:-3] # [`show_some`](/core.html#show_some) can take as input any class with \_\_len\_\_ and \_\_getitem\_\_ # In[ ]: class Any(object): def __init__(self, data): self.data = data def __len__(self): return len(self.data) def __getitem__(self,i): return self.data[i] some_other_data = Any('nice') show_some(some_other_data, 2) # In[46]: show_doc(subplots) # In[47]: show_doc(text2html_table) # ## Undocumented Methods - Methods moved below this line will intentionally be hidden # ## New Methods - Please document or move to the undocumented section # In[48]: show_doc(is_dict) #