#!/usr/bin/env python
# coding: utf-8

# ## Basic core

# This module contains all the basic functions we need in other modules of the fastai library (split with [`torch_core`](/torch_core.html#torch_core) that contains the ones requiring pytorch). Its documentation can easily be skipped at a first read, unless you want to know what a given function does.

# In[1]:


from fastai.gen_doc.nbdoc import *
from fastai.core import * 


# ## Global constants

# `default_cpus = min(16, num_cpus())` <div style="text-align: right"><a href="https://github.com/fastai/fastai/blob/master/fastai/core.py#L45">[source]</a></div>

# ## Check functions

# In[2]:


show_doc(has_arg)


# Examples for two [`fastai.core`](/core.html#core) functions.  Docstring shown before calling [`has_arg`](/core.html#has_arg) for reference
# 

# In[ ]:


has_arg(download_url,'url')


# In[ ]:


has_arg(index_row,'x')


# In[ ]:


has_arg(index_row,'a')


# In[3]:


show_doc(ifnone)


# In[ ]:


param,alt_param = None,5
ifnone(param,alt_param)


# In[ ]:


param,alt_param = None,[1,2,3]
ifnone(param,alt_param)


# In[4]:


show_doc(is1d)


# In[ ]:


two_d_array = np.arange(12).reshape(6,2)
print( two_d_array )
print( is1d(two_d_array) )


# In[ ]:


is1d(two_d_array.flatten())


# In[5]:


show_doc(is_listy)


# Check if `x` is a `Collection`. `Tuple` or `List` qualify

# In[ ]:


some_data = [1,2,3]
is_listy(some_data)


# In[ ]:


some_data = (1,2,3)
is_listy(some_data)


# In[ ]:


some_data = 1024
print( is_listy(some_data) )


# In[ ]:


print( is_listy( [some_data] ) )


# In[ ]:


some_data = dict([('a',1),('b',2),('c',3)])
print( some_data )
print( some_data.keys() )


# In[ ]:


print( is_listy(some_data) )
print( is_listy(some_data.keys()) )


# In[ ]:


print( is_listy(list(some_data.keys())) )


# In[6]:


show_doc(is_tuple)


# Check if `x` is a `tuple`.

# In[ ]:


print( is_tuple( [1,2,3] ) )


# In[ ]:


print( is_tuple( (1,2,3) ) )


# ## Collection related functions

# In[7]:


show_doc(arange_of)


# In[ ]:


arange_of([5,6,7])


# In[ ]:


type(arange_of([5,6,7]))


# In[8]:


show_doc(array)


# In[ ]:


array([1,2,3])


# Note that after we call the generator, we do not reset.  So the [`array`](/core.html#array) call has 5 less entries than it would if we ran from the start of the generator.

# In[ ]:


def data_gen():
    i = 100.01
    while i<200:
        yield i
        i += 1.

ex_data_gen = data_gen()
for _ in range(5):
    print(next(ex_data_gen))


# In[ ]:


array(ex_data_gen)


# In[ ]:


ex_data_gen_int = data_gen()

array(ex_data_gen_int,dtype=int)  #Cast output to int array


# In[9]:


show_doc(arrays_split)


# In[ ]:


data_a = np.arange(15)
data_b = np.arange(15)[::-1]

mask_a = (data_a > 10)
print(data_a)
print(data_b)
print(mask_a)


# In[ ]:


arrays_split(mask_a,data_a)


# In[ ]:


np.vstack([data_a,data_b]).transpose().shape


# In[ ]:


arrays_split(mask_a,np.vstack([data_a,data_b]).transpose()) #must match on dimension 0


# In[10]:


show_doc(chunks)


# You can transform a `Collection` into an `Iterable` of 'n' sized chunks by calling [`chunks`](/core.html#chunks):

# In[ ]:


data = [0,1,2,3,4,5,6,7,8,9]
for chunk in chunks(data, 2):
    print(chunk)


# In[ ]:


for chunk in chunks(data, 3):
    print(chunk)


# In[11]:


show_doc(df_names_to_idx)


# In[ ]:


ex_df = pd.DataFrame.from_dict({"a":[1,1,1],"b":[2,2,2]})
print(ex_df)


# In[ ]:


df_names_to_idx('b',ex_df)


# In[12]:


show_doc(extract_kwargs)


# In[ ]:


key_word_args = {"a":2,"some_list":[1,2,3],"param":'mean'}
key_word_args


# In[ ]:


(extracted_val,remainder) = extract_kwargs(['param'],key_word_args)
print( extracted_val,remainder )


# In[13]:


show_doc(idx_dict)


# In[ ]:


idx_dict(['a','b','c'])


# In[14]:


show_doc(index_row)


# `a` is basically something you can index into like a dataframe, an array or a list.

# In[ ]:


data = [0,1,2,3,4,5,6,7,8,9]
index_row(data,4)


# In[ ]:


index_row(pd.Series(data),7)


# In[ ]:


data_df = pd.DataFrame([data[::-1],data]).transpose()
data_df


# In[ ]:


index_row(data_df,7)


# In[15]:


show_doc(listify)


# In[ ]:


to_match = np.arange(12)
listify('a',to_match)


# In[ ]:


listify('a',5)


# In[ ]:


listify(77.1,3)


# In[ ]:


listify( (1,2,3) )


# In[ ]:


listify((1,2,3),('a','b','c'))


# In[16]:


show_doc(random_split)


# Splitting is done here with `random.uniform()` so you may not get the exact split percentage for small data sets

# In[ ]:


data = np.arange(20).reshape(10,2)
data.tolist()


# In[ ]:


random_split(0.20,data.tolist())


# In[ ]:


random_split(0.20,pd.DataFrame(data))


# In[17]:


show_doc(range_of)


# In[ ]:


range_of([5,4,3])


# In[ ]:


range_of(np.arange(10)[::-1])


# In[18]:


show_doc(series2cat)


# In[ ]:


data_df = pd.DataFrame.from_dict({"a":[1,1,1,2,2,2],"b":['f','e','f','g','g','g']})
data_df


# In[ ]:


data_df['b']


# In[ ]:


series2cat(data_df,'b')
data_df['b']


# In[ ]:


series2cat(data_df,'a')
data_df['a']


# In[19]:


show_doc(split_kwargs_by_func)


# In[ ]:


key_word_args = {'url':'http://fast.ai','dest':'./','new_var':[1,2,3],'testvalue':42}
split_kwargs_by_func(key_word_args,download_url)


# In[20]:


show_doc(to_int)


# In[ ]:


to_int(3.1415)


# In[ ]:


data = [1.2,3.4,7.25]
to_int(data)


# In[21]:


show_doc(uniqueify)


# In[ ]:


uniqueify( pd.Series(data=['a','a','b','b','f','g']) )


# ## Metaclasses

# In[22]:


show_doc(PrePostInitMeta)


# In[ ]:


class _T(metaclass=PrePostInitMeta):
    def __pre_init__(self):  self.a  = 0; assert self.a==0
    def __init__(self):      self.a += 1; assert self.a==1
    def __post_init__(self): self.a += 1; assert self.a==2

t = _T()
t.a


# ## Files management and downloads

# In[23]:


show_doc(download_url)


# In[24]:


show_doc(find_classes)


# In[25]:


show_doc(join_path)


# In[26]:


show_doc(join_paths)


# In[27]:


show_doc(loadtxt_str)


# In[28]:


show_doc(save_texts)


# ## Multiprocessing

# In[29]:


show_doc(num_cpus)


# In[30]:


show_doc(parallel)


# `func` must accept both the value and index of each `arr` element.

# In[ ]:


def my_func(value, index):
    print("Index: {}, Value: {}".format(index, value))
 
my_array = [i*2 for i in range(5)]
parallel(my_func, my_array, max_workers=3)


# In[31]:


show_doc(partition)


# In[32]:


show_doc(partition_by_cores)


# ## Data block API

# In[33]:


show_doc(ItemBase, title_level=3)


# All items used in fastai should subclass this. Must have a [`data`](/tabular.data.html#tabular.data) field that will be used when collating in mini-batches.

# In[34]:


show_doc(ItemBase.apply_tfms)


# In[35]:


show_doc(ItemBase.show)


# The default behavior is to set the string representation of this object as title of `ax`.

# In[36]:


show_doc(Category, title_level=3)


# Create a [`Category`](/core.html#Category) with an `obj` of index [`data`](/tabular.data.html#tabular.data) in a certain classes list. 

# In[37]:


show_doc(EmptyLabel, title_level=3)


# In[38]:


show_doc(MultiCategory, title_level=3)


# Create a [`MultiCategory`](/core.html#MultiCategory) with an `obj` that is a collection of labels. [`data`](/tabular.data.html#tabular.data) corresponds to the one-hot encoded labels and `raw` is a list of associated string.

# In[39]:


show_doc(FloatItem)


# ## Others

# In[40]:


show_doc(camel2snake)


# In[ ]:


camel2snake('DeviceDataLoader')


# In[41]:


show_doc(even_mults)


# In linear scales each element is equidistant from its neighbors:

# In[ ]:


# from 1 to 10 in 5 steps
t = np.linspace(1, 10, 5)
t


# In[ ]:


for i in range(len(t) - 1):
    print(t[i+1] - t[i])


# In logarithmic scales, each element is a multiple of the previous entry:

# In[ ]:


t = even_mults(1, 10, 5)
t


# In[ ]:


# notice how each number is a multiple of its predecessor
for i in range(len(t) - 1):
    print(t[i+1] / t[i])


# In[42]:


show_doc(func_args)


# In[ ]:


func_args(download_url)


# Additionally, [`func_args`](/core.html#func_args) can be used with functions that do not belong to the fastai library

# In[ ]:


func_args(np.linspace)


# In[43]:


show_doc(noop)


# Return `x`.

# In[ ]:


# object is returned as-is
noop([1,2,3])


# In[44]:


show_doc(one_hot)


# One-hot encoding is a standard machine learning technique. Assume we are dealing with a 10-class classification problem and we are supplied a list of labels:

# In[ ]:


y = [1, 4, 4, 5, 7, 9, 2, 4, 0]


# In[ ]:


jekyll_note("""y is zero-indexed, therefore its first element (1) belongs to class 2, its second element (4) to class 5 and so on.""")


# In[ ]:


len(y)


# y can equivalently be expressed as a matrix of 9 rows and 10 columns, where each row represents one element of the original y. 

# In[ ]:


for label in y:
    print(one_hot(label, 10))


# In[45]:


show_doc(show_some)


# In[ ]:


# select 3 elements from a list
some_data = show_some([10, 20, 30, 40, 50], 3) 
some_data


# In[ ]:


type(some_data) 


# In[ ]:


# the separator can be changed
some_data = show_some([10, 20, 30, 40, 50], 3, sep = '---') 
some_data


# In[ ]:


some_data[:-3]


# [`show_some`](/core.html#show_some) can take as input any class with \_\_len\_\_ and \_\_getitem\_\_ 

# In[ ]:


class Any(object):
    def __init__(self, data):
        self.data = data
    def __len__(self):
        return len(self.data)
    def __getitem__(self,i):
        return self.data[i]
 
some_other_data = Any('nice')
show_some(some_other_data, 2)


# In[46]:


show_doc(subplots)


# In[47]:


show_doc(text2html_table)


# ## Undocumented Methods - Methods moved below this line will intentionally be hidden

# ## New Methods - Please document or move to the undocumented section

# In[48]:


show_doc(is_dict)


#