Source code for ehrapy.plot._missingno

from __future__ import annotations

from typing import TYPE_CHECKING

import ehrdata as ed
import missingno as msno

from ehrapy._compat import function_2D_only, use_ehrdata

if TYPE_CHECKING:
    from anndata import AnnData
    from ehrdata import EHRData


[docs] @function_2D_only() @use_ehrdata(deprecated_after="1.0.0") def missing_values_matrix( edata: EHRData | AnnData, *, filter: str | None = None, max_cols: int = 0, max_percentage: float = 0, sort: str | None = None, figsize: tuple = (25, 10), width_ratios: tuple = (15, 1), color: tuple = (0.25, 0.25, 0.25), fontsize: float = 16, labels: bool = True, label_rotation: float = 45, sparkline: bool = True, categoricals: bool = False, layer: str | None = None, ): # pragma: no cover """A matrix visualization of the nullity of the given data object. Args: edata: Central data object. filter: The filter to apply to the matrix. Should be one of "top", "bottom", or None. max_cols: The max number of columns from the data object to include. max_percentage: The max percentage fill of the columns from the data object. sort: The row sort order to apply. Can be "ascending", "descending", or None. figsize: The size of the figure to display. width_ratios: The ratio of the width of the matrix to the width of the sparkline. color: The color of the filled columns. fontsize: The figure's font size. labels: Whether or not to display the column names. label_rotation: What angle to rotate the text labels to. sparkline: Whether or not to display the sparkline. categoricals: Whether to include "ehrapycat" columns to the plot. layer: The layer to use. Returns: The plot axis. Examples: >>> import ehrdata as ed >>> import ehrapy as ep >>> edata = ed.dt.mimic_2() >>> ep.pl.missing_values_matrix(edata, filter="bottom", max_cols=15, max_percentage=0.999) Preview: .. image:: /_static/docstring_previews/missingno_matrix.png """ df = ed.io.to_pandas(edata, layer=layer) if not categoricals: non_categorical_columns = [col for col in df if not col.startswith("ehrapycat")] return msno.matrix( df[non_categorical_columns], filter, max_cols, max_percentage, sort, figsize, width_ratios, color, fontsize, labels, label_rotation, sparkline, ) else: return msno.matrix( df, filter, max_cols, max_percentage, sort, figsize, width_ratios, color, fontsize, labels, label_rotation, sparkline, )
[docs] @function_2D_only() @use_ehrdata(deprecated_after="1.0.0") def missing_values_barplot( edata: EHRData | AnnData, *, log: bool = False, filter: str | None = None, max_cols: int = 0, max_percentage: float = 0, sort: str | None = None, figsize: tuple | None = None, color: str = "dimgray", fontsize: float = 16, labels: str | None = None, label_rotation: float = 45, orientation: str | None = None, categoricals: bool = False, layer: str | None = None, ): # pragma: no cover """A bar chart visualization of the nullity of the given data object. Args: edata: Central data object. log: Whether to display a logarithmic plot. filter: The filter to apply to the barplot. Should be one of "top", "bottom", or None. max_cols: The max number of columns from the data object to include. max_percentage: The max percentage fill of the columns from the data object. sort: The row sort order to apply. Can be "ascending", "descending", or None. figsize: The size of the figure to display. color: The color of the filled columns. fontsize: The figure's font size. labels: Whether to display the column names. label_rotation: What angle to rotate the text labels to. orientation: The way the bar plot is oriented. categoricals: Whether to include "ehrapycat" columns to the plot. layer: The layer to use. Returns: The plot axis. Examples: >>> import ehrdata as ed >>> import ehrapy as ep >>> edata = ed.dt.mimic_2() >>> ep.pl.missing_values_barplot(edata, filter="bottom", max_cols=15, max_percentage=0.999) Preview: .. image:: /_static/docstring_previews/missingno_barplot.png """ df = ed.io.to_pandas(edata, layer=layer) if not categoricals: non_categorical_columns = [col for col in df if not col.startswith("ehrapycat")] return msno.bar( df[non_categorical_columns], figsize, fontsize, labels, label_rotation, log, color, filter, max_cols, max_percentage, sort, orientation, ) else: return msno.bar( df, figsize, fontsize, labels, label_rotation, log, color, filter, max_cols, max_percentage, sort, orientation, )
[docs] @function_2D_only() @use_ehrdata(deprecated_after="1.0.0") def missing_values_heatmap( edata: EHRData | AnnData, *, filter: str | None = None, max_cols: int = 0, max_percentage: float = 0, sort: str | None = None, figsize: tuple = (20, 12), fontsize: float = 16, labels: bool = True, label_rotation: float = 45, cmap: str = "RdBu", vmin: int = -1, vmax: int = 1, cbar: bool = True, categoricals: bool = False, layer: str | None = None, ): # pragma: no cover """Presents a `seaborn` heatmap visualization of nullity correlation in the given data object. Note that this visualization has no special support for large datasets. For those, try the dendrogram instead. Args: edata: Central data object. filter: The filter to apply to the heatmap. Should be one of "top", "bottom", or None. max_cols: The max number of columns from the data object to include. max_percentage: The max percentage fill of the columns from the data object. sort: The row sort order to apply. Can be "ascending", "descending", or None. figsize: The size of the figure to display. fontsize: The figure's font size. labels: Whether or not to display the column names. label_rotation: What angle to rotate the text labels to. cmap: What `matplotlib` colormap to use. vmin: The normalized colormap threshold. vmax: The normalized colormap threshold. cbar: Whether to draw a colorbar. categoricals: Whether to include "ehrapycat" columns to the plot. layer: The layer to use. Returns: The plot axis. Examples: >>> import ehrdata as ed >>> import ehrapy as ep >>> edata = ed.dt.mimic_2() >>> ep.pl.missing_values_heatmap(edata, filter="bottom", max_cols=15, max_percentage=0.999) Preview: .. image:: /_static/docstring_previews/missingno_heatmap.png """ df = ed.io.to_pandas(edata, layer=layer) if not categoricals: non_categorical_columns = [col for col in df if not col.startswith("ehrapycat")] return msno.heatmap( df[non_categorical_columns], filter, max_cols, max_percentage, sort, figsize, fontsize, labels, label_rotation, cmap, vmin, vmax, cbar, ) else: return msno.heatmap( df, filter, max_cols, max_percentage, sort, figsize, fontsize, labels, label_rotation, cmap, vmin, vmax, cbar, )
[docs] @function_2D_only() @use_ehrdata(deprecated_after="1.0.0") def missing_values_dendrogram( edata: EHRData | AnnData, *, method: str = "average", filter: str | None = None, max_cols: int = 0, max_percentage: float = 0, orientation: str | None = None, figsize: tuple | None = None, fontsize: float = 16, label_rotation: float = 45, categoricals: bool = False, layer: str | None = None, ): """Fits a `scipy` hierarchical clustering algorithm and visualizes the results as a `scipy` dendrogram. The default vertical display will fit up to 50 columns. If more than 50 columns are specified and orientation is left unspecified the dendrogram will automatically swap to a horizontal display to fit the additional variables. Args: edata: Central data object. method: The distance measure being used for clustering. This parameter is passed to `scipy.hierarchy`. filter: The filter to apply to the dendrogram. Should be one of "top", "bottom", or None. max_cols: The max number of columns from the data object to include. max_percentage: The max percentage fill of the columns from the data object. figsize: The size of the figure to display. fontsize: The figure's font size. orientation: The way the dendrogram is oriented. label_rotation: What angle to rotate the text labels to. . categoricals: Whether to include "ehrapycat" columns to the plot. layer: The layer to use. Returns: The plot axis. Example: >>> import ehrdata as ed >>> import ehrapy as ep >>> edata = ed.dt.mimic_2() >>> ep.pl.missing_values_dendrogram(edata, filter="bottom", max_cols=15, max_percentage=0.999) Preview: .. image:: /_static/docstring_previews/missingno_dendrogram.png """ df = ed.io.to_pandas(edata, layer=layer) if not categoricals: non_categorical_columns = [col for col in df if not col.startswith("ehrapycat")] return msno.dendrogram( df[non_categorical_columns], method, filter, max_cols, max_percentage, orientation, figsize, fontsize, label_rotation, ) else: return msno.dendrogram( df, method, filter, max_cols, max_percentage, orientation, figsize, fontsize, label_rotation )