Source code for sqltrack.notebook
from __future__ import annotations
import hashlib
import inspect
import math
from datetime import datetime
from datetime import timedelta
from functools import partial
from pathlib import Path
from typing import Sequence
from typing import Union
import humanize
from IPython.display import HTML
from IPython.display import display
import pandas as pd
__all__ = [
"init_notebook_mode",
"textcolor",
"format_marked",
"format_status",
"format_tags",
"format_datetime",
"format_datetime_relative",
"format_timedelta",
"format_percentage",
"format_float",
"format_dataframe",
]
def _isnan(v):
try:
return math.isnan(v)
except TypeError:
return False
def _not_a_value(v):
return v is None or _isnan(v) or pd.isnull(v)
CSS_DIR = (Path(__file__).parent / "css").absolute()
[docs]def init_notebook_mode():
"""
Add the sqltrack stylesheet to the notebook.
"""
path = CSS_DIR / "notebook.css"
with path.open() as fp:
display(HTML("<style>\n" + fp.read() + "\n</style>"))
COLORS = [
'Amethyst', 'Blue', 'Caramel', 'Damson', 'Ebony', 'Forest', 'Green',
'Honeydew', 'Iron', 'Jade', 'Khaki', 'Lime', 'Mallow', 'Navy',
'Orpiment', 'Pink', 'Quagmire', 'Red', 'Sky', 'Turquoise', 'Uranium',
'Violet', 'Wine', 'Xanthin', 'Yellow', 'Zinnia',
]
[docs]def textcolor(text: str, colors: Sequence[str] = None):
"""
Return a color name for the given text, based on its hash value.
"""
if colors is None:
colors = COLORS
h = hashlib.blake2b(text.encode('utf-8'), digest_size=1).digest()
return colors[int.from_bytes(h, byteorder='big') % len(colors)]
[docs]def format_marked(is_marked: bool):
"""
If ``is_marked`` is True, return a gold star, else empty string.
"""
if is_marked:
return '<span class="invisible">marked</span>' \
+ '<span title="marked" class="marked">⭐</span>'
return ''
[docs]def format_status(status: str):
"""
Return an icon for the given status.
"""
return f'<span class="invisible">{status}</span>' \
+ f'<span title="{status}" class="status {status}"></span>'
[docs]def format_tags(tags: dict):
"""
Return tag bubbles for the given tags.
"""
tags = tags or {}
tags.pop("marked", None)
return " ".join(f'<span title="{t}" class="tag {textcolor(t)}">{t}</span>' for t in tags)
[docs]def format_bool(b: bool, na_rep="--") -> str:
"""
Return bool value.
Parameters:
b: bool to format
na_rep: replacement string for NaN values
"""
rep = b
if b is None:
rep = na_rep
elif b:
rep = "⏺"
else:
rep = "⭘"
return f'<span title="{b}" class="center">{rep}</span>'
[docs]def format_string(s: str, ellipsis="left", na_rep="--") -> str:
"""
Return string wrapped to display long text with ellipsis.
Parameters:
s: string to wrap
ellipsis: if "left" (default), ellipsis is placed on the
left and the end is displayed fully;
if True, ellipsis is placed on the right and the
beginning of the string is displayed;
if False, the string is returne as-is
na_rep: replacement string for NaN values
"""
rep = s
if s is None:
rep = na_rep
if ellipsis == "left":
return f'<span class="ellipsis-left"><bdi title="{s}">{rep}</bdi></span>'
if ellipsis:
return f'<span class="ellipsis-right" title="{s}">{rep}</span>'
return s
def _localize(dt: datetime):
try:
# unlike native datetime objects, pandas timestamps simply
# drop the timezone when astimezone is called with tz=None,
# so we try to convert to native datetime first...
dt = dt.to_pydatetime(warn=False)
except TypeError:
pass
return dt.astimezone().replace(tzinfo=None)
[docs]def format_datetime(dt: datetime, sep=" ", timespec="seconds", na_rep="--") -> str:
"""
Return datetime in ISO format.
Parameters:
dt: datetime to format
sep: date and time separator
timespec: precision of the time part, one of 'auto', 'hours',
'minutes', 'seconds', 'milliseconds' and 'microseconds'
na_rep: replacement string for NaN values
"""
if _not_a_value(dt):
return na_rep
dt = _localize(dt)
s = dt.isoformat(sep=sep, timespec=timespec)
return f'<span class="invisible">{dt}</span><span>{s}</span>'
[docs]def format_datetime_relative(dt: datetime, sep=" ", timespec="seconds", na_rep="--") -> str:
"""
Return time since given datetime in human-readable form.
Parameters:
dt: datetime to format
sep: date and time separator
timespec: precision of the time part, one of 'auto', 'hours',
'minutes', 'seconds', 'milliseconds' and 'microseconds'
na_rep: replacement string for NaN values
"""
if _not_a_value(dt):
return na_rep
dt = _localize(dt)
natural = humanize.naturaltime(dt)
title = dt.isoformat(sep=sep, timespec=timespec)
return f'<span class="invisible">{title}</span><span title="{title}">{natural}</span>'
[docs]def format_timedelta(td: timedelta, na_rep="--") -> str:
"""
Return a timedelta in human-readable form.
Parameters:
td: timedelta to format
na_rep: replacement string for NaN values
"""
if _not_a_value(td):
return na_rep
seconds = td.total_seconds()
if math.isnan(seconds):
return na_rep
natural = humanize.naturaldelta(td)
title = str(timedelta(seconds=round(seconds)))
return f'<span class="invisible">{title}</span><span title="{title}">{natural}</span>'
[docs]def format_percentage(v, mul=100, spec=".1f", na_rep="--") -> str:
"""
Returns a percentage value with bar in background.
Parameters:
v: percentage value to format
mul: multiplicative factor for display;
defaults to 100 for float values in [0,1]
spec: format spec; default :python:`".1f"`
na_rep: replacement string for NaN values
"""
if _not_a_value(v):
return na_rep
classes = "bar"
sortvalue = v
v *= mul
pct = int(round(v))
prop = f"padding-right: {mul-pct+5}%"
if v < 50:
classes += " left"
prop = f"text-indent: {pct+5}%"
return f'<span class="invisible">{sortvalue}</span>' \
+ f'<span class="{classes}" style="background: linear-gradient' \
+ f'(to right, var(--barcolor) {pct}%, transparent 0%); ' \
+ f'{prop}">{v:{spec}}%</span>'
[docs]def format_float(v: float, spec=".2f", na_rep="--") -> str:
"""
Format a float value.
Parameters:
v: float value to format
spec: format spec; default :python:`".2f"`
na_rep: replacement string for NaN values
"""
rep = v
if _not_a_value(v):
rep = na_rep
else:
rep = f"{v:{spec}}"
return f"<span title={v}>{rep}</span>"
DEFAULT_MAPPING = {
" ": format_marked,
"m": format_marked,
"marked": format_marked,
"s": format_status,
"status": format_status,
"tags": format_tags,
"progress": format_percentage,
}
def _is_datetime(v):
return pd.api.types.is_datetime64_any_dtype(v)
def _is_timedelta(v):
return pd.api.types.is_timedelta64_dtype(v)
def _is_float(v):
return pd.api.types.is_float_dtype(v)
def _is_bool(v):
return pd.api.types.is_bool_dtype(v)
def _is_string(v):
return pd.api.types.is_string_dtype(v)
def _partial_function(func, **kwargs):
parameters = inspect.signature(func).parameters
mapped_kwargs = {k: v for k, v in kwargs.items() if k in parameters}
return partial(func, **mapped_kwargs)
def _normalize(v):
try:
return v.lower()
except AttributeError:
return v
[docs]def format_dataframe(
df: pd.DataFrame,
formatting: Union[dict, None] = None,
na_rep="--",
relative_datetimes: bool = True,
string_ellipsis: Union[str, bool] = "left",
) -> str:
"""
Returns a copy of the given Pandas DataFrame with
formatting applied.
By default the following functions are applied to
these the following columns (case-insensitive):
* ``" "``: :py:func:`format_marked`
* ``"m"``: :py:func:`format_marked`
* ``"marked"``: :py:func:`format_marked`
* ``"s"``: :py:func:`format_status`
* ``"status"``: :py:func:`format_status`
* ``"tags"``: :py:func:`format_tags`
* ``"progress"``: :py:func:`format_percentage`
If the column name is not found in the formatting
function dictionary, then the formatting function
is selected based based on dtype:
* Any :py:class:`datetime`-like: :py:func:`format_datetime_relative`
or :py:func:`format_datetime` if ``relative_datetimes`` is False
* Any :py:class:`str`-like: :py:func:`format_string` with the given
``string_ellipsis`` parameter
Parameters:
formatting: overwrite the default format functions
for named columns; names are case-insensitive
relative_datetimes: if True (default), use
:py:func:`format_datetime_relative` for columns with
datetime-like dtype, else :py:func:`format_datetime`
string_ellipsis: passed as ``ellipsis`` parameter to
:py:func:`format_string` for columns with str-like dtype
"""
formatting = dict(DEFAULT_MAPPING, **(formatting or {}))
formatting = {
_normalize(name): _partial_function(
func,
na_rep=na_rep,
ellipsis=string_ellipsis,
)
for name, func in formatting.items()
}
# use a shallow copy of the DataFrame so we can replace columns
df = df.copy(deep=False)
# remember correct functions to call for types
# so we don't have to do this for every column
format_datetime_ = partial(format_datetime, na_rep=na_rep)
if relative_datetimes:
format_datetime_ = partial(format_datetime_relative, na_rep=na_rep)
format_timedelta_ = partial(format_timedelta, na_rep=na_rep)
format_string_ = partial(format_string, na_rep=na_rep, ellipsis=string_ellipsis)
format_float_ = partial(format_float, na_rep=na_rep)
format_bool_ = partial(format_bool, na_rep=na_rep)
# apply formatting to columns
for c in df.columns:
col = df[c]
func = formatting.get(_normalize(c))
if func:
df[c] = col.apply(func)
elif _is_datetime(col):
df[c] = col.apply(format_datetime_)
elif _is_timedelta(col):
df[c] = col.apply(format_timedelta_)
elif _is_float(col):
df[c] = col.apply(format_float_)
elif _is_bool(col):
df[c] = col.apply(format_bool_)
elif _is_string(col):
df[c] = col.apply(format_string_)
# replace any remaining NaN values
return df.fillna(na_rep)