# default_exp core

# export
from fastprogress.fastprogress import progress_bar
from fastcore.imports import *
from fastcore.basics import *
from fastcore.foundation import *
from fastcore.net import *
from fastcore.xtras import untar_dir
import hashlib,shutil
from pprint import pformat

#hide
from nbdev.showdoc import show_doc
import tempfile,fastdownload

dest = Path('tmp')
url = 'https://s3.amazonaws.com/fast-ai-sample/mnist_tiny.tgz'

#hide
shutil.rmtree(dest, ignore_errors=True)
Path.BASE_PATH = Path.home()

#export
def download_url(url, dest=None):
    "Download `url` to `dest` and show progress"
    pbar = progress_bar([])
    def progress(count=1, bsize=1, tsize=None):
        pbar.total = tsize
        pbar.update(count*bsize)
    return urlsave(url, dest, reporthook=progress)

dest.mkdir()
fpath = download_url(url, dest)
fpath

# export
def path_stats(fpath):
    "`True` if size and hash of `fpath` matches `size_check` and `hash_check`"
    size = os.path.getsize(fpath)
    # Just use first 1MB of file for performance
    with open(fpath, "rb") as f: hashed = hashlib.md5(f.read(2**20)).hexdigest()
    return size,hashed

path_stats(fpath)

#export
def checks_module(module):
    "Location of `download_checks.py`"
    if not module: return {}
    return Path(module.__file__).parent/'download_checks.py'

mod = checks_module(fastdownload)
mod

#export
def read_checks(fmod):
    "Evaluated contents of `download_checks.py`"
    if not fmod.exists(): return {}
    txt = fmod.read_text()
    return eval(txt) if txt else {}

#export
def check(fmod, url, fpath):
    "Check whether size and hash of `fpath` matches stored data for `url` or data is missing"
    checks = read_checks(fmod).get(url)
    return not checks or path_stats(fpath)==checks

# export
def update_checks(fpath, url, fmod):
    "Store the hash and size of `fpath` for `url` in `download_checks.py`"
    checks = read_checks(fmod)
    checks[url] = path_stats(fpath)
    fmod.write_text(pformat(checks))

if mod.exists(): mod.unlink()
update_checks(fpath, url, mod)
read_checks(mod)

#export
def download_and_check(url, fpath, fmod, force):
    "Download `url` to `fpath`, unless exists and `check` fails and not `force`"
    if not force and fpath.exists():
        if check(fmod, url, fpath): return fpath
        else: print("Downloading a new version of this dataset...")
    res = download_url(url, fpath)
    if not check(fmod, url, fpath): raise Exception("Downloaded file is corrupt or not latest version")
    return res

# export
class FastDownload:
    def __init__(self, cfg=None, base='~/.fastdownload', archive=None, data=None, module=None):
        base = Path(base).expanduser().absolute()
        default = {'data':(data or 'data'), 'archive':(archive or 'archive')}
        self.cfg = Config(base, 'config.ini', create=default) if cfg is None else cfg
        self.module = checks_module(module)
        if data is not None: self.cfg['data'] = data
        if archive is not None: self.cfg['archive'] = archive
    
    def arch_path(self):
        "Path to archives"
        return self.cfg.path('archive')
    
    def data_path(self, extract_key='data'):
        "Path to extracted data"
        return self.cfg.path(extract_key)

    def check(self, url, fpath):
        "Check whether size and hash of `fpath` matches stored data for `url` or data is missing"
        checks = read_checks(self.module).get(url)
        return not checks or path_stats(fpath)==checks

    def download(self, url, force=False):
        "Download `url` to archive path, unless exists and `self.check` fails and not `force`"
        self.arch_path().mkdir(exist_ok=True, parents=True)
        return download_and_check(url, urldest(url, self.arch_path()), self.module, force)
    
    def rm(self, url, rm_arch=True, rm_data=True, extract_key='data'):
        "Delete downloaded archive and extracted data for `url`"
        arch = urldest(url, self.arch_path())
        if rm_arch: arch.delete()
        if rm_data:
            dest = self.data_path(extract_key)
            (dest/remove_suffix(arch.stem, '.tar')).delete()

    def update(self, url):
        "Store the hash and size in `download_checks.py`"
        update_checks(urldest(url, self.arch_path()), url, self.module)

    def extract(self, url, extract_key='data', force=False):
        "Extract archive already downloaded from `url`, overwriting existing if `force`"
        arch = urldest(url, self.arch_path())
        if not arch.exists(): raise Exception(f'{arch} does not exist')
        dest = self.data_path(extract_key)
        dest.mkdir(exist_ok=True, parents=True)
        return untar_dir(arch, dest, rename=True, overwrite=force)
    
    def get(self, url, extract_key='data', force=False):
        "Download and extract `url`, overwriting existing if `force`"
        self.download(url, force=force)
        return self.extract(url, extract_key=extract_key, force=force)

d = FastDownload(module=fastdownload)
d.module

d.cfg.config_file

print(d.cfg.config_file.read_text())

show_doc(FastDownload.download)

if d.module.exists(): d.module.unlink()
arch = d.download(url)
arch

show_doc(FastDownload.update)

d.update(url)
eval(d.module.read_text())

d.download(url)

show_doc(FastDownload.extract)

extr = d.extract(url, force=True)
extr

extr.ls()

d.cfg['model_path'] = 'models'
d.extract(url, extract_key='model_path')

show_doc(FastDownload.rm)

d.rm(url)
extr.exists(),arch.exists()

show_doc(FastDownload.get)

res = d.get(url)
res,extr.exists()

res = d.get(url, extract_key='model_path')
res,res.exists()

#hide
from nbdev.export import notebook2script
notebook2script()