media
#default_exp media
Convert HTML and Images to MDX
Make allowances for plots and dataframes in MDX
#export
from nbconvert.preprocessors import Preprocessor
from fastcore.xtras import Path
from html.parser import HTMLParser
#hide
from nbdoc.test_utils import run_preprocessor
#export
class HTMLdf(HTMLParser):
"""HTML Parser that finds a dataframe."""
df = False
scoped = False
def handle_starttag(self, tag, attrs):
if tag == 'style':
for k,v in attrs:
if k == 'scoped': self.scoped=True
def handle_data(self, data):
if '.dataframe' in data and self.scoped:
self.df=True
def handle_endtag(self, tag):
if tag == 'style': self.scoped=False
@classmethod
def search(cls, x):
parser = cls()
parser.feed(x)
return parser.df
_test_html = """<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>"""
assert HTMLdf.search(_test_html)
assert not HTMLdf.search('<div></div>')
#export
class HTMLEscape(Preprocessor):
"""
Place HTML in a codeblock and surround it with a <HTMLOutputBlock> component.
"""
def preprocess_cell(self, cell, resources, index):
if cell.cell_type =='code':
outputs = []
for o in cell.outputs:
if o.get('data') and o['data'].get('text/html'):
cell.metadata.html_output = True
html = o['data']['text/html']
cell.metadata.html_center = False if HTMLdf.search(html) else True
o['data']['text/html'] = '```html\n'+html.strip()+'\n```'
return cell, resources
By default, HTML is incompatible with MDX. We place HTML in a code block and wrap it with the a custom component so that the static site generator can render it.
c, _ = run_preprocessor([HTMLEscape], 'test_files/pandas.ipynb', display_results=True)
assert '<HTMLOutputBlock' in c and '</HTMLOutputBlock>' in c and 'center' not in c
assert '```html\n<div>' in c and '</div>\n```' in c
```python
import pandas as pd
pd.read_csv('https://github.com/outerbounds/.data/raw/main/hospital_readmission.csv').head().iloc[:, :15]
```
<HTMLOutputBlock >
```html
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>time_in_hospital</th>
<th>num_lab_procedures</th>
<th>num_procedures</th>
<th>num_medications</th>
<th>number_outpatient</th>
<th>number_emergency</th>
<th>number_inpatient</th>
<th>number_diagnoses</th>
<th>race_Caucasian</th>
<th>race_AfricanAmerican</th>
<th>gender_Female</th>
<th>age_[70-80)</th>
<th>age_[60-70)</th>
<th>age_[50-60)</th>
<th>age_[80-90)</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>14</td>
<td>41</td>
<td>0</td>
<td>11</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>6</td>
<td>True</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>True</td>
<td>False</td>
</tr>
<tr>
<th>1</th>
<td>2</td>
<td>30</td>
<td>0</td>
<td>12</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>9</td>
<td>True</td>
<td>False</td>
<td>True</td>
<td>False</td>
<td>False</td>
<td>True</td>
<td>False</td>
</tr>
<tr>
<th>2</th>
<td>5</td>
<td>66</td>
<td>0</td>
<td>22</td>
<td>1</td>
<td>0</td>
<td>2</td>
<td>9</td>
<td>True</td>
<td>False</td>
<td>True</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>True</td>
</tr>
<tr>
<th>3</th>
<td>3</td>
<td>63</td>
<td>0</td>
<td>8</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>8</td>
<td>True</td>
<td>False</td>
<td>True</td>
<td>False</td>
<td>False</td>
<td>True</td>
<td>False</td>
</tr>
<tr>
<th>4</th>
<td>5</td>
<td>40</td>
<td>0</td>
<td>6</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>9</td>
<td>True</td>
<td>False</td>
<td>True</td>
<td>False</td>
<td>False</td>
<td>False</td>
<td>True</td>
</tr>
</tbody>
</table>
</div>
```
</HTMLOutputBlock>
#hide
c, _ = run_preprocessor([HTMLEscape], 'test_files/altair.ipynb')
assert 'center' in c
#export
class ImageSave(Preprocessor):
"Saves images stored as bytes in notebooks to disk."
def preprocess(self, nb, resources):
meta = resources.get('metadata', {})
nb_name = meta.get('name')
nb_path = meta.get('path')
outfiles = resources.get('outputs')
if nb_name and outfiles:
resources['fmap'] = {}
for k,v in outfiles.items():
dest = Path(nb_path)/f'_{nb_name}_files/{k}'
dest.parent.mkdir(exist_ok=True)
dest.write_bytes(v)
resources['fmap'][f'{k}'] = f'_{nb_name}_files/{k}'
return nb, resources
class ImagePath(Preprocessor):
"Changes the image path to the location where `ImageSave` saved the files."
def preprocess_cell(self, cell, resources, index):
fmap = resources.get('fmap')
if fmap:
for o in cell.get('outputs', []):
fnames = o.get('metadata', {}).get('filenames', {})
for k,v in fnames.items():
fnames[k] = fmap.get(v,v)
return cell, resources
ImageSave and ImagePath must be used together to extract and save images from notebooks and change the path. This is necessary to enable compatiblity with certain types of plotting libraries like matplotlib.
c, _ = run_preprocessor([ImageSave, ImagePath], 'test_files/matplotlib.ipynb', display_results=True)
assert '' in c
```python
from matplotlib import pyplot as plt
plt.plot(range(20), range(20))
plt.plot(range(10), range(10))
```
<CodeOutputBlock lang="python">
[<matplotlib.lines.Line2D at 0x7f9d6922faf0>]

</CodeOutputBlock>
c, _ = run_preprocessor([ImageSave, ImagePath], 'test_files/altair_jpeg.ipynb')
assert '![svg](_altair_jpeg_files/output_0_0.svg' in c