#!/usr/bin/env python
# coding: utf-8

# ### Downloads and resizes imagenet

# 1. Create spot instance
# 2. Mount EFS
# 3. Download imagenet from kaggle and untar
# 4. Resize images to 80, 160, 320, 375

# In[1]:


get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


from aws_setup import *


# #### Define parameters

# In[3]:


vpc_name='fast-ai'


# #### Get Existing VPC by tag name

# In[4]:


vpc = get_vpc(vpc_name); vpc


# #### Create EFS (if you haven't already)

# In[6]:


efs_tag = f'{vpc_name}-efs'


# In[ ]:


efs = create_efs(efs_tag, vpc, performance_mode='maxIO')


# #### Request Spot instance

# In[7]:


instance_name = f'{vpc_name}-instance'
# Recommend a high compute instance as we need to do multi-threaded resizing later on
instance_type = 'c5.4xlarge'


# In[8]:


spot_price = get_spot_prices()[instance_type]
bid_price = "%.4f" % (float(spot_price)*3)
print(f'Spot price: {spot_price}, Bid price: {bid_price}')


# In[9]:


launch_specs = LaunchSpecs(vpc, instance_type=instance_type).build()


# In[10]:


launch_specs['BlockDeviceMappings'][0]['Ebs']['VolumeSize'] = 1000


# In[11]:


launch_specs


# In[12]:


instance = create_spot_instance(instance_name, launch_specs, spot_price=bid_price); instance


# In[ ]:


# instance = get_instance(instance_name); instance
get_ssh_command(instance)


# ### SSH

# In[35]:


client = connect_to_instance(instance)


# #### Mount EFS

# In[16]:


efs_addr = get_efs_address('fast-ai-efs'); efs_addr


# In[17]:


_ = run_command(client, 'mkdir ~/efs_mount')


# In[18]:


efs_mount_cmd = f'sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 {efs_addr}:/ ~/efs_mount'
_ = run_command(client, efs_mount_cmd)


# In[34]:


_ = run_command(client, 'ls efs_mount') # no reformatting


# ## Tmux

# In[37]:


tsess = TmuxSession(client, 'sess')


# ### Download dataset from kaggle

# In[19]:


_ = run_command(client, 'mkdir ~/.kaggle')


# In[21]:


kaggle_file = Path.home()/'.kaggle/kaggle.json'
upload_file(client, str(kaggle_file), '.kaggle/kaggle.json')


# In[32]:


download_kaggle_file = Path.cwd()/'upload_scripts/download_kaggle_imagenet.sh'
upload_file(client, str(download_kaggle_file), 'download_kaggle_imagenet.sh')


# In[33]:


tsess.run_cmd('bash download_kaggle_imagenet.sh')


# ### Upload image resize

# In[40]:


# imagenet_formatting.sh uses this for multithreaded resizing
# resize_imags.py methods are taken from fast.ai dataset.py
upload_path = Path.cwd()/'upload_scripts/resize_images.py'
upload_file(client, str(upload_path), 'resize_images.py')


# In[47]:


# creates sizes 80, 160, 320, 375 and stores files in EFS
upload_path = Path.cwd()/'upload_scripts/imagenet_formatting.sh'
upload_file(client, str(upload_path), 'imagenet_formatting.sh')


# In[ ]:


tsess.run_cmd('bash imagenet_formatting.sh')