Source code for sqltrack.notebook

from __future__ import annotations

import hashlib
import inspect
import math
from datetime import datetime
from datetime import timedelta
from functools import partial
from pathlib import Path
from typing import Sequence
from typing import Union

import humanize
from IPython.display import HTML
from IPython.display import display

import pandas as pd

__all__ = [
    "init_notebook_mode",
    "textcolor",
    "format_marked",
    "format_status",
    "format_tags",
    "format_datetime",
    "format_datetime_relative",
    "format_timedelta",
    "format_percentage",
    "format_float",
    "format_dataframe",
]


def _isnan(v):
    try:
        return math.isnan(v)
    except TypeError:
        return False


def _not_a_value(v):
    return v is None or _isnan(v) or pd.isnull(v)


CSS_DIR = (Path(__file__).parent / "css").absolute()


[docs]def init_notebook_mode(): """ Add the sqltrack stylesheet to the notebook. """ path = CSS_DIR / "notebook.css" with path.open() as fp: display(HTML("<style>\n" + fp.read() + "\n</style>"))
COLORS = [ 'Amethyst', 'Blue', 'Caramel', 'Damson', 'Ebony', 'Forest', 'Green', 'Honeydew', 'Iron', 'Jade', 'Khaki', 'Lime', 'Mallow', 'Navy', 'Orpiment', 'Pink', 'Quagmire', 'Red', 'Sky', 'Turquoise', 'Uranium', 'Violet', 'Wine', 'Xanthin', 'Yellow', 'Zinnia', ]
[docs]def textcolor(text: str, colors: Sequence[str] = None): """ Return a color name for the given text, based on its hash value. """ if colors is None: colors = COLORS h = hashlib.blake2b(text.encode('utf-8'), digest_size=1).digest() return colors[int.from_bytes(h, byteorder='big') % len(colors)]
[docs]def format_marked(is_marked: bool): """ If ``is_marked`` is True, return a gold star, else empty string. """ if is_marked: return '<span class="invisible">marked</span>' \ + '<span title="marked" class="marked">⭐</span>' return ''
[docs]def format_status(status: str): """ Return an icon for the given status. """ return f'<span class="invisible">{status}</span>' \ + f'<span title="{status}" class="status {status}"></span>'
[docs]def format_tags(tags: dict): """ Return tag bubbles for the given tags. """ tags = tags or {} tags.pop("marked", None) return " ".join(f'<span title="{t}" class="tag {textcolor(t)}">{t}</span>' for t in tags)
[docs]def format_bool(b: bool, na_rep="--") -> str: """ Return bool value. Parameters: b: bool to format na_rep: replacement string for NaN values """ rep = b if b is None: rep = na_rep elif b: rep = "⏺" else: rep = "⭘" return f'<span title="{b}" class="center">{rep}</span>'
[docs]def format_string(s: str, ellipsis="left", na_rep="--") -> str: """ Return string wrapped to display long text with ellipsis. Parameters: s: string to wrap ellipsis: if "left" (default), ellipsis is placed on the left and the end is displayed fully; if True, ellipsis is placed on the right and the beginning of the string is displayed; if False, the string is returne as-is na_rep: replacement string for NaN values """ rep = s if s is None: rep = na_rep if ellipsis == "left": return f'<span class="ellipsis-left"><bdi title="{s}">{rep}</bdi></span>' if ellipsis: return f'<span class="ellipsis-right" title="{s}">{rep}</span>' return s
def _localize(dt: datetime): try: # unlike native datetime objects, pandas timestamps simply # drop the timezone when astimezone is called with tz=None, # so we try to convert to native datetime first... dt = dt.to_pydatetime(warn=False) except TypeError: pass return dt.astimezone().replace(tzinfo=None)
[docs]def format_datetime(dt: datetime, sep=" ", timespec="seconds", na_rep="--") -> str: """ Return datetime in ISO format. Parameters: dt: datetime to format sep: date and time separator timespec: precision of the time part, one of 'auto', 'hours', 'minutes', 'seconds', 'milliseconds' and 'microseconds' na_rep: replacement string for NaN values """ if _not_a_value(dt): return na_rep dt = _localize(dt) s = dt.isoformat(sep=sep, timespec=timespec) return f'<span class="invisible">{dt}</span><span>{s}</span>'
[docs]def format_datetime_relative(dt: datetime, sep=" ", timespec="seconds", na_rep="--") -> str: """ Return time since given datetime in human-readable form. Parameters: dt: datetime to format sep: date and time separator timespec: precision of the time part, one of 'auto', 'hours', 'minutes', 'seconds', 'milliseconds' and 'microseconds' na_rep: replacement string for NaN values """ if _not_a_value(dt): return na_rep dt = _localize(dt) natural = humanize.naturaltime(dt) title = dt.isoformat(sep=sep, timespec=timespec) return f'<span class="invisible">{title}</span><span title="{title}">{natural}</span>'
[docs]def format_timedelta(td: timedelta, na_rep="--") -> str: """ Return a timedelta in human-readable form. Parameters: td: timedelta to format na_rep: replacement string for NaN values """ if _not_a_value(td): return na_rep seconds = td.total_seconds() if math.isnan(seconds): return na_rep natural = humanize.naturaldelta(td) title = str(timedelta(seconds=round(seconds))) return f'<span class="invisible">{title}</span><span title="{title}">{natural}</span>'
[docs]def format_percentage(v, mul=100, spec=".1f", na_rep="--") -> str: """ Returns a percentage value with bar in background. Parameters: v: percentage value to format mul: multiplicative factor for display; defaults to 100 for float values in [0,1] spec: format spec; default :python:`".1f"` na_rep: replacement string for NaN values """ if _not_a_value(v): return na_rep classes = "bar" sortvalue = v v *= mul pct = int(round(v)) prop = f"padding-right: {mul-pct+5}%" if v < 50: classes += " left" prop = f"text-indent: {pct+5}%" return f'<span class="invisible">{sortvalue}</span>' \ + f'<span class="{classes}" style="background: linear-gradient' \ + f'(to right, var(--barcolor) {pct}%, transparent 0%); ' \ + f'{prop}">{v:{spec}}%</span>'
[docs]def format_float(v: float, spec=".2f", na_rep="--") -> str: """ Format a float value. Parameters: v: float value to format spec: format spec; default :python:`".2f"` na_rep: replacement string for NaN values """ rep = v if _not_a_value(v): rep = na_rep else: rep = f"{v:{spec}}" return f"<span title={v}>{rep}</span>"
DEFAULT_MAPPING = { " ": format_marked, "m": format_marked, "marked": format_marked, "s": format_status, "status": format_status, "tags": format_tags, "progress": format_percentage, } def _is_datetime(v): return pd.api.types.is_datetime64_any_dtype(v) def _is_timedelta(v): return pd.api.types.is_timedelta64_dtype(v) def _is_float(v): return pd.api.types.is_float_dtype(v) def _is_bool(v): return pd.api.types.is_bool_dtype(v) def _is_string(v): return pd.api.types.is_string_dtype(v) def _partial_function(func, **kwargs): parameters = inspect.signature(func).parameters mapped_kwargs = {k: v for k, v in kwargs.items() if k in parameters} return partial(func, **mapped_kwargs) def _normalize(v): try: return v.lower() except AttributeError: return v
[docs]def format_dataframe( df: pd.DataFrame, formatting: Union[dict, None] = None, na_rep="--", relative_datetimes: bool = True, string_ellipsis: Union[str, bool] = "left", ) -> str: """ Returns a copy of the given Pandas DataFrame with formatting applied. By default the following functions are applied to these the following columns (case-insensitive): * ``" "``: :py:func:`format_marked` * ``"m"``: :py:func:`format_marked` * ``"marked"``: :py:func:`format_marked` * ``"s"``: :py:func:`format_status` * ``"status"``: :py:func:`format_status` * ``"tags"``: :py:func:`format_tags` * ``"progress"``: :py:func:`format_percentage` If the column name is not found in the formatting function dictionary, then the formatting function is selected based based on dtype: * Any :py:class:`datetime`-like: :py:func:`format_datetime_relative` or :py:func:`format_datetime` if ``relative_datetimes`` is False * Any :py:class:`str`-like: :py:func:`format_string` with the given ``string_ellipsis`` parameter Parameters: formatting: overwrite the default format functions for named columns; names are case-insensitive relative_datetimes: if True (default), use :py:func:`format_datetime_relative` for columns with datetime-like dtype, else :py:func:`format_datetime` string_ellipsis: passed as ``ellipsis`` parameter to :py:func:`format_string` for columns with str-like dtype """ formatting = dict(DEFAULT_MAPPING, **(formatting or {})) formatting = { _normalize(name): _partial_function( func, na_rep=na_rep, ellipsis=string_ellipsis, ) for name, func in formatting.items() } # use a shallow copy of the DataFrame so we can replace columns df = df.copy(deep=False) # remember correct functions to call for types # so we don't have to do this for every column format_datetime_ = partial(format_datetime, na_rep=na_rep) if relative_datetimes: format_datetime_ = partial(format_datetime_relative, na_rep=na_rep) format_timedelta_ = partial(format_timedelta, na_rep=na_rep) format_string_ = partial(format_string, na_rep=na_rep, ellipsis=string_ellipsis) format_float_ = partial(format_float, na_rep=na_rep) format_bool_ = partial(format_bool, na_rep=na_rep) # apply formatting to columns for c in df.columns: col = df[c] func = formatting.get(_normalize(c)) if func: df[c] = col.apply(func) elif _is_datetime(col): df[c] = col.apply(format_datetime_) elif _is_timedelta(col): df[c] = col.apply(format_timedelta_) elif _is_float(col): df[c] = col.apply(format_float_) elif _is_bool(col): df[c] = col.apply(format_bool_) elif _is_string(col): df[c] = col.apply(format_string_) # replace any remaining NaN values return df.fillna(na_rep)