from fastcore.all import *
from execnb.nbio import *
from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
b = BeautifulSoup('hi <img alt="Natural" caption="artificial" id="neuron" src="images/chapter7.png" width="500"/>')
im = b.img
im['alt']
'Natural'
def img2md(im):
r = ''
d = {}
a = im.get('width', None)
if a: d['width'] = a
a = im.get('id', None)
if a: d['id'] = 'fig-'+re.sub('_', '-', a)
a = im.get('alt', None)
if a: d['fig-alt'] = a
if d: r+='{' + ' '.join(f'{k}="{v}"' for k,v in d.items()) + '}'
return r
class UnsortedAttributes(HTMLFormatter):
def attributes(self, tag): yield from tag.attrs.items()
def enc(t): return t.encode_contents(formatter=UnsortedAttributes()).decode('utf-8')
def nb_images(nb):
for c in nb.cells:
o = c.source
if c.cell_type=='markdown' and '<img ' in o:
b = BeautifulSoup(o)
im = b.img
im.replace_with(img2md(im))
o = b.body
if getattr(o,'p',None): o = o.p
c.source = enc(o)
# print(enc(o))
def fix_sec(o): return '@sec-' + re.sub('_', '-', o.group(1))
def fix_fig(o): return '@fig-' + re.sub('_', '-', o.group(1))
def nb_xrefs(nb):
for c in nb.cells:
o = c.source
if c.cell_type=='markdown' and '<<' in o:
o = re.sub(r'<<chapter_(\w+)>>', fix_sec, o)
o = re.sub(r'<<(\w+)>>', fix_fig, o)
c.source = o
# print(o)
def fix_lab(o): return '#| label: fig-' + re.sub('_', '-', o.group(1))
def nb_comput(nb):
for c in nb.cells:
o = c.source
if c.cell_type=='code' and '#id' in o:
o = re.sub(r'^#id +(\w+)', fix_lab, o, flags=re.MULTILINE)
o = re.sub(r'^#caption +(.+)', r'#| fig-cap: "\1"', o, flags=re.MULTILINE)
o = re.sub(r'^#alt +(.+)', r'#| fig-alt: "\1"', o, flags=re.MULTILINE)
c.source = o
# print(o)
def fix_nb(path):
nb = read_nb(path)
nb_images(nb)
nb_xrefs(nb)
nb_comput(nb)
write_nb(nb, path)
# fix_nb('../mnist_basics.ipynb')
# for path in Path('..').ls(file_exts=['.ipynb']): fix_nb(path)