Source code for elastipy.plot.aggregation_plot_pd

from typing import Union, Sequence, Tuple, Optional, Any

from ..aggregation import Aggregation
from .backend import get_backend


[docs]class PandasPlotWrapper:
    """
    This is a short-hand accessor to the
    `pandas.DataFrame.plot <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html>`__
    interface.

    The documented parameters below will be passed to
    :link:`Aggregation.to_pandas`. All other parameters
    are passed to the respective functions of the pandas
    interface.

    .. CODE::

        s = Search()
        s.agg_terms("idx", field="a").execute().plot(
            to_index="idx",
            kind="bar",
        )

    :param index: ``bool`` or ``str``
        Sets a specific column as the index of the DataFrame.

            - If ``False`` no explicit index is set.
            - If ``True`` the root aggregation's keys will be the index.
            - if ``str`` explicitly set a certain column as the DataFrame index.

        .. NOTE::

            The column is kept in the DataFrame. If you wan't to set a
            column as index and remove it from the columns, use ``to_index``.

    :param to_index: ``bool`` or ``str``
        Same as ``index`` but the column is removed from DataFrame.

            - If ``False`` no explicit index is set.
            - If ``True`` the root aggregation's keys will be the index.
            - if ``str`` explicitly set a certain column as the DataFrame index.

    :param include: ``str or list of str``
        Can be one or more (OR-combined) wildcard patterns.
        If used, any column that does not fit a pattern is removed

    :param exclude: ``str or list of str``
        Can be one or more (OR-combined) wildcard patterns.
        If used, any column that fits a pattern is removed

    :param flat: ``bool``, ``str`` or ``sequence of str``
        Can be one or more aggregation names that should be *flattened out*,
        meaning that each key of the aggregation creates a new column
        instead of a new row. If ``True``, all bucket aggregations are
        *flattened*.

        Only supported for bucket aggregations!

        .. NOTE::
            Currently not supported for the root aggregation!

    :param dtype:
        Numpy data type to force. Only a single dtype is allowed. If None, infer.

    :param default:
        This value will be used wherever a value is undefined.

    :return: :link:`matplotlib.axes.Axes` or numpy.ndarray of them
        If the backend is not the default matplotlib one, the return value
        will be the object returned by the backend.
    """

    def __init__(self, agg: Aggregation):
        self._agg = agg

    def __call__(
            self,
            *args,
            index: Union[bool, str] = False,
            to_index: Union[bool, str] = False,
            include: Union[str, Sequence[str]] = None,
            exclude: Union[str, Sequence[str]] = None,
            flat: Union[bool, str, Sequence[str]] = False,
            dtype=None,
            default=None,
            **kwargs,
    ):
        return self._pd_plot(
            None,
            *args,
            index=index,
            to_index=to_index,
            include=include,
            exclude=exclude,
            flat=flat,
            dtype=dtype,
            default=default,
            **kwargs,
        )

    def _pd_plot(
            self,
            _function,
            *args,
            index: Union[bool, str] = False,
            to_index: Union[bool, str] = False,
            include: Union[str, Sequence[str]] = None,
            exclude: Union[str, Sequence[str]] = None,
            flat: Union[bool, str, Sequence[str]] = False,
            dtype=None,
            default=None,
            **kwargs,
    ):
        df = self._agg.to_pandas(
            index=index,
            to_index=to_index,
            include=include,
            exclude=exclude,
            flat=flat,
            dtype=dtype,
            default=default,
        )
        if not _function:
            return df.plot(*args, **kwargs)
        else:
            return getattr(df.plot, _function)(*args, **kwargs)

[docs]    def line(self, x=None, y=None, **kwargs):
        """
        Plot Series or DataFrame as lines.

        See :link:`pandas.DataFrame.plot.line`
        """
        return self._pd_plot(
            "line", x=x, y=y, **kwargs,
        )

[docs]    def bar(self, x=None, y=None, **kwargs):
        """
        Vertical bar plot.

        See :link:`pandas.DataFrame.plot.bar`
        """
        return self._pd_plot(
            "bar", x=x, y=y, **kwargs,
        )

[docs]    def barh(self, x=None, y=None, **kwargs):
        """
        Horizontal bar plot.

        See :link:`pandas.DataFrame.plot.barh`
        """
        return self._pd_plot(
            "barh", x=x, y=y, **kwargs,
        )

[docs]    def box(self, by=None, **kwargs):
        r"""
        Make a box plot of the DataFrame columns.

        See :link:`pandas.DataFrame.plot.box`
        """
        return self._pd_plot(
            "box", by=by, **kwargs,
        )

[docs]    def hist(self, by=None, bins=10, **kwargs):
        """
        Draw one histogram of the DataFrame's columns.

        See :link:`pandas.DataFrame.plot.hist`
        """
        return self._pd_plot(
            "hist", by=by, bins=bins, **kwargs,
        )

[docs]    def kde(self, bw_method=None, ind=None, **kwargs):
        """
        Generate Kernel Density Estimate plot using Gaussian kernels.

        See :link:`pandas.DataFrame.plot.kde`
        """
        return self._pd_plot(
            "kde", bw_method=bw_method, ind=ind, **kwargs,
        )

[docs]    def area(self, x=None, y=None, **kwargs):
        """
        Draw a stacked area plot.

        See :link:`pandas.DataFrame.plot.area`
        """
        return self._pd_plot(
            "area", x=x, y=y, **kwargs,
        )

[docs]    def pie(self, **kwargs):
        """
        Generate a pie plot.

        See :link:`pandas.DataFrame.plot.pie`
        """
        return self._pd_plot(
            "pie", **kwargs,
        )

[docs]    def scatter(self, x, y, s=None, c=None, **kwargs):
        """
        Create a scatter plot with varying marker point size and color.

        See :link:`pandas.DataFrame.plot.scatter`
        """
        return self._pd_plot(
            "scatter", x=x, y=y, s=s, c=c, **kwargs,
        )

[docs]    def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
        """
        Generate a hexagonal binning plot.

        See :link:`pandas.DataFrame.plot.hexbin`
        """
        return self._pd_plot(
            "hexbin", x=x, y=y, C=C, reduce_C_function=reduce_C_function, gridsize=gridsize, **kwargs,
        )

[docs]    def heatmap(
            self,
            sort: Optional[Union[bool, str, int, Sequence[Union[str, int]]]] = None,
            default: Optional[Any] = None,
            replace=None,
            include: Optional[Union[str, Sequence[str]]] = None,
            exclude: Optional[Union[str, Sequence[str]]] = None,
            transpose: bool = False,
            figsize: Tuple[Union[int, float], Union[int, float]] = None,
            **kwargs,
    ):
        """
        Plots a heatmap using the data from :link:`Aggregation.df_matrix`.

        Pandas' default plotting backend is matplotlib. In this case the
        `seaborn.heatmap <http://seaborn.pydata.org/generated/seaborn.heatmap.html>`__.
        is used and the ``seaborn`` package must be installed along with
        ``pandas`` and ``matplotlib``.

        The `ploty backend <>`__
        is also supported in which case the
        `plotly.express.imshow <https://plotly.com/python/imshow/>`
        function is used.

        In matplotlib-mode, the ``figsize`` parameter will
        create a new Axes before calling
        `seaborn.heatmap <http://seaborn.pydata.org/generated/seaborn.heatmap.html>`__.
        For plotly it's ignored.

        The documented parameters below are passed to
        :link:`Aggregation.df_matrix`, generating a
        `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`__.
        All other parameters are passed to the heatmap function.

        In matplotlib-mode, the ``figsize`` parameter will
        create a new Axes before calling
        `seaborn.heatmap <http://seaborn.pydata.org/generated/seaborn.heatmap.html>`__.
        For plotly it's ignored.

        **Labels** can be defined in **plotly** with the ``labels`` parameter,
        e.g. ``labels={"x": "date", "y": "temperature", "color": "date.doc_count"}``.
        If ``labels`` or any of the keys are not defined they will be set to the
        name of each aggregation. ``color`` will either be ``<bucket-agg-name>.doc_count``
        or ``<metric-name>`` (or pipeline).

        :param sort:
            Can sort one or several keys/axises.

                - ``True`` sorts all keys ascending
                - ``"-"`` sorts all keys descending
                - The **name of an aggregation** sorts it's keys ascending.
                  A "-" prefix sorts descending.
                - An **integer** defines the aggregation by index.
                  Negative integers sort descending.
                - A **sequence** of strings or integers can sort multiple keys

            For example, `agg.to_matrix(sort=("color", "-shape", -4))` would
            sort the ``color`` keys ascending, the ``shape`` keys descending and the
            4th aggregation *-whatever that is-* descending.

        :param default:
            If not None any None-value will be replaced by this value

        :param include: ``str | seq[str]``
            One or more wildcard patterns that include matching keys.
            All other keys are removed from the output.

        :param exclude: ``str | seq[str]``
            One or more wildcard patterns that exclude matching keys.

        :param replace:
            ``str, regex, list, dict, Series, int, float, or None``

            If not None, the :link:`pandas.DataFrame.replace` function will be
            called with this parameter as the ``to_replace`` parameter.

        :param transpose ``bool``
            Transposes the matrix, e.g. exchanges X and Y axis.

        :param figsize: ``tuple of ints or floats``
            Optional tuple to change the size of the plot when the plotting
            backend is ``matplotlib``.
            ``int`` values will be passed to :link:`matplotlib.axes.Axes` unchanged.
            A ``float`` value defines the size in terms of the number of
            keys per axis and is converted to int with ``int(len(keys) * value)``

        :param kwargs: Passed to :meth:`seaborn.heatmap`
        :return: :class:`matplotlib.axes.Axes` Axis object with the heatmap.
        """
        from .heatmap_ import heatmap
        from ..aggregation.visitor import Visitor

        df = self._agg.df_matrix(
            sort=sort,
            default=default,
            include=include,
            exclude=exclude,
        )
        if replace is not None:
            df.replace(to_replace=replace, inplace=True)
        if transpose:
            df = df.transpose()

        # set plotly labels
        if get_backend() == "plotly":

            labels = kwargs.get("labels") or dict()

            names = Visitor(self._agg).key_names()
            if transpose:
                names[0], names[1] = names[1], names[0]

            if self._agg.is_bucket():
                names.append(f"{self._agg.name}.doc_count")
            else:
                names.append(self._agg.name)

            labels.setdefault("x", names[1])
            labels.setdefault("y", names[0])
            labels.setdefault("color", names[2])

            #labels["x"], labels["y"] = labels["y"], labels["x"]

            kwargs["labels"] = labels

        return heatmap(df, figsize=figsize, **kwargs)