Source code for pyinfer.inference_report

import multiprocessing as mp
import signal
import statistics
import warnings
from contextlib import contextmanager
from time import perf_counter, perf_counter_ns, sleep, time
from typing import Any, Callable, List, Union

import psutil
from tabulate import tabulate

from .errors import (
    MatplotlibNotInstalledError,
    MeasurementIntervalNotSetError,
    ModelIsNotCallableError,
    NamesNotEqualsModelsLengthError,
)


def quantiles(data, *, n=4, method="exclusive"):
    """
    Monkey patched quantiles function from statistics package
    present in python >= 3.8 as quantiles() isn't available for python 3.7.x
    """
    if n < 1:
        raise StatisticsError("n must be at least 1")
    data = sorted(data)
    ld = len(data)
    if ld < 2:
        raise StatisticsError("must have at least two data points")
    if method == "inclusive":
        m = ld - 1
        result = []
        for i in range(1, n):
            j = i * m // n
            delta = i * m - j * n
            interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n
            result.append(interpolated)
        return result
    if method == "exclusive":
        m = ld + 1
        result = []
        for i in range(1, n):
            j = i * m // n  # rescale i to m/n
            j = 1 if j < 1 else ld - 1 if j > ld - 1 else j  # clamp to 1 .. ld-1
            delta = i * m - j * n  # exact integer math
            interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n
            result.append(interpolated)
        return result
    raise ValueError(f"Unknown method: {method!r}")


[docs]class InferenceReport:
    "A model agnostic report of inference related stats for any callable model"

[docs]    def __init__(
        self,
        model: Callable,
        inputs: Any,
        n_seconds: Union[int, float, None] = None,
        n_iterations: int = None,
        exit_on_inputs_exhausted: bool = False,
        infer_failure_point: Union[int, float, None] = None,
        model_name: str = None,
        drop_stats: List[str] = None,
    ):
        """
        Args:
            model (Callable): The callable method or function for the model.

            inputs (Any): The input(s) parameters the model receives.

            n_seconds (Union[int, float, None], optional): Number of seconds to run model inferences.
                If this is `None` it is expected that `n_iterations` will be set. Defaults to None.

            n_iterations (int, optional): Number of iterations to run model inferences for.
                If this is `None` it is expected that `n_seconds` will be set. Defaults to None.

            exit_on_inputs_exhausted (bool, optional): If inputs are a iterable of inputs exit
                on completion. This feature is not yet implemented. Defaults to False.

            infer_failure_point (Union[int, float, None], optional): Time in seconds (int or float)
                at which an inference is to be considered a failure in the reporting stats.
                Defaults to None.

            model_name (str, optional): The name to give to the model for the report.
                Defaults to None.

            drop_stats (List[str], optional): List of keys to drop from the report.
                Defaults to None.

        Raises:
            ModelIsNotCallableError: Will raise if the model provided is not callable.
            MeasurementIntervalNotSetError: Will raise if neither `n_seconds` or
                `n_iterations` are set.
        """
        if not isinstance(model, Callable):
            raise ModelIsNotCallableError(
                "The model provided is not callable. Please provide a model that has a method call."
            )
        self.model = model
        self.inputs = inputs
        self.exit_on_inputs_exhausted = exit_on_inputs_exhausted
        self.infer_failure_point = infer_failure_point
        self.drop_stats = drop_stats

        self.runs = []

        if model_name:
            self.model_name = str(model_name)
        else:
            self.model_name = "Model"

        if not n_iterations and not n_seconds:
            s = "You have not specified either `n_seconds` or `n_iterations`."
            s += " Please specify a valid measurement interval."
            raise MeasurementIntervalNotSetError(s)

        if n_iterations and n_seconds:
            s = f"You have set both `n_seconds={n_seconds}` and `n_iterations={n_iterations}` "
            s += f"only one can be specified per instance. Defaulting measurement interval to `seconds={n_seconds}``"

            warnings.warn(s)
            self.n_seconds = n_seconds
            self.n_iterations = None
        else:
            self.n_seconds = n_seconds
            self.n_iterations = n_iterations

        self.terminated = False

    @contextmanager
    def _timeout(self, duration: int):
        "Creates signal to terminate execution once `duration` seconds have passed"

        def timeout_handler(signum, frame):
            self.terminated = True

        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(duration)
        yield
        signal.alarm(0)

[docs]    def run(self, print_report: bool = True) -> dict:
        """
        Runs the inference report for `self.model` with input(s) `self.inputs`

        Args:
            print_report (bool, optional): If true a table representation of the report will be
            printed to console. Defaults to True.

        Returns:
            dict: A dictionary containing all the report stats created during the run.
        """
        iterations = 0
        runs: List[datetime.timedelta] = []
        total_time_taken = 0
        failed = 0
        completed = 0

        if self.n_seconds:
            with self._timeout(self.n_seconds):
                while not self.terminated:
                    start = perf_counter_ns() * 1e-9
                    self.model(self.inputs)
                    end = perf_counter_ns() * 1e-9
                    run = end - start
                    if self.infer_failure_point:
                        if run >= self.infer_failure_point:
                            failed += 1
                        else:
                            completed += 1
                    else:
                        completed += 1
                    runs.append(run)

                    total_time_taken += run
        else:
            while iterations < self.n_iterations:
                start = perf_counter_ns() * 1e-9
                self.model(self.inputs)
                end = perf_counter()
                end = perf_counter_ns() * 1e-9
                run = end - start
                if self.infer_failure_point:
                    if run >= self.infer_failure_point:
                        failed += 1
                    else:
                        completed += 1
                else:
                    completed += 1
                runs.append(round(run, 4))
                iterations += 1
                total_time_taken += run

        self.runs = runs
        results_dict = self._make_results_dict(
            completed, failed, self.runs, total_time_taken
        )

        if print_report:
            self.report(results_dict)
        return results_dict

    def _make_results_dict(
        self,
        completed: int,
        failed: int,
        runs: List[float],
        total_time_taken: float,
    ) -> dict:
        "Creates dict of all the stats using info collected from run"
        total = completed + failed
        return {
            "Model": self.model_name,
            "Success": completed,
            "Fail": failed,
            "Took": total_time_taken,
            "Infer(p/sec)": round(total / total_time_taken, 2),
            "MaxRun(ms)": self._max_run(runs),
            "MinRun(ms)": self._min_run(runs),
            "Std(ms)": self._stdev(runs),
            "Mean(ms)": self._mean_run(runs),
            "Median(ms)": self._median_run(runs),
            "IQR(ms)": self._iqr(runs),
            "Cores(L)": psutil.cpu_count(logical=True),
            "Cores(P)": psutil.cpu_count(logical=False),
        }

    @staticmethod
    def _cpu_monitor(target_function):
        """
        Monitors cpu usage of a `target_function`. Not currently in use
        As psutil requires root access when running a worker process.
        """
        worker_process = mp.Process(target=target_function)
        worker_process.start()
        p = psutil.Process(worker_process.pid)

        cpu_usage_pc = []
        while worker_process.is_alive():
            cpu_usage_pc.append(p.cpu_percent())
            sleep(0.01)

        worker_process.join()
        return cpu_usage_pc

[docs]    def report(self, results_dict: dict):
        """
        Prints a report to console based on the values
        found in `results_dict`

        Args:
            results_dict (dict): Dictionary containing compiled stats from a run.
        """
        if self.drop_stats:
            for drop_key in self.drop_stats:
                results_dict.pop(drop_key, None)
        print(
            tabulate(
                [results_dict.values()],
                headers=results_dict.keys(),
                tablefmt="fancy_grid",
                numalign="right",
            )
        )

[docs]    def plot(self, show: bool = True, save_location: str = None):
        """
        Creates a simple plot of `self.runs`. Plots run number
        on the x-axis and run time in milliseconds on the y-axis.

        Args:
            show (bool, optional): Whether to show the plot after calling method. Defaults to True.

            save_location (str, optional): Location to save plot at. If None the plot will not
                be saved. Defaults to None.

        Raises:
            MatplotlibNotInstalledError: Raise if matplotlib is not installed in python environment.
            ValueError: Raise if the runs have not yet been calculated but `plot` is called.
        """
        try:
            import matplotlib
            import matplotlib.pyplot as plt
        except:
            s = "InferenceReport.plot() requires matplotlib to be installed."
            s += 'To use this method please install by running: pip install "pyinfer[plotting]" '
            raise MatplotlibNotInstalledError(s)

        if self.runs:
            t = list(range(0, len(self.runs)))
            ms_runs = [(run * 1000) for run in self.runs]
            fig, ax = plt.subplots()
            ax.plot(t, ms_runs, marker="o")
            ax.set(xlabel="run", ylabel="run time (ms)")
            ax.grid()
            if show:
                plt.show()

            if save_location:
                plt.savefig(save_location)
        else:
            raise ValueError(
                "self.runs is not yet set, please run the report before plotting."
            )

    def _max_run(self, runs: list) -> float:
        "Returns max run in milliseconds from `runs`"
        return max(runs) * 1000

    def _min_run(self, runs: list) -> float:
        "Returns min run in milliseconds from `runs`"
        return min(runs) * 1000

    def _stdev(self, runs: list) -> float:
        "Returns standard deviation in milliseconds from `runs`"
        if len(runs) >= 2:
            return statistics.stdev(runs) * 1000
        else:
            return None

    def _iqr(self, runs: list) -> float:
        "Returns interquartile range in milliseconds from `runs`"
        if len(runs) >= 2:
            quartiles = quantiles(runs, n=4)
            return (quartiles[2] - quartiles[0]) * 1000
        else:
            return None

    def _mean_run(self, runs: list) -> float:
        "Returns mean run time in milliseconds from `runs`"
        return statistics.mean(runs) * 1000

    def _median_run(self, runs: list) -> float:
        "Returns median run time in milliseconds from `runs`"
        return statistics.median(runs) * 1000


[docs]class MultiInferenceReport:
    "A model agnostic report of inference related stats for any list of callable models"

[docs]    def __init__(
        self,
        models: List[Callable],
        inputs: List[Any],
        n_seconds: Union[int, float, None] = None,
        n_iterations: int = None,
        exit_on_inputs_exhausted: bool = False,
        infer_failure_point: Union[int, float, None] = None,
        model_names: List[str] = None,
        drop_stats: List[str] = None,
    ):
        """
        Args:
            models (List[Callable]): A list of the callable methods or functions for the models.

            inputs (List[Any]): The input(s) parameters each of the models receives. If only one
                input is given then it is assumed each model takes the same shape/type of input and
                that input will be passed to each model.

            n_seconds (Union[int, float, None], optional): Number of seconds to run model inferences.
                If this is `None` it is expected that `n_iterations` will be set. Defaults to None.

            n_iterations (int, optional): Number of iterations to run model inferences for.
                If this is `None` it is expected that `n_seconds` will be set. Defaults to None.

            exit_on_inputs_exhausted (bool, optional): If inputs are a iterable of inputs exit
                on completion. This feature is not yet implemented. Defaults to False.

            infer_failure_point (Union[int, float, None], optional): Time in seconds (int or float)
                at which an inference. is to be considered a failure in the reporting stats.
                Defaults to None.

            model_names (List[str], optional): The names to give to the models for the report. Must
                be the same length as number of models provided. Defaults to None.

            drop_stats (List[str], optional): List of keys to drop from the report.
                Defaults to None.

        Raises:
            ModelIsNotCallableError: Will raise if the model provided is not callable.

            NamesNotEqualsModelsLengthError: Will raise if the number of models names
                does not match the number of model callables provided.

            MeasurementIntervalNotSetError: Will raise if neither `n_seconds` or
                `n_iterations` are set.
        """

        for i, model in enumerate(models):
            if not isinstance(model, Callable):
                raise ModelIsNotCallableError(
                    f"The model at index {i} is not callable. Please provide a model that has a method call."
                )

        self.models = models
        if not isinstance(inputs, list):
            self.inputs = [inputs]
        else:
            self.inputs = inputs

        if len(self.inputs) != len(self.models):
            self.inputs = [self.inputs[0]] * len(self.models)

        self.exit_on_inputs_exhausted = exit_on_inputs_exhausted
        self.infer_failure_point = infer_failure_point
        self.models_runs = []
        self.drop_stats = drop_stats

        if model_names:
            if len(model_names) != len(self.models):
                s = f"Length of model_names is {len(model_names)}, does not equal number of models provided {len(self.models)}. "
                s += "Please ensure that these lengths are equal if you want to set custom model names. "
                s += "Otherwise you can leave model_names as None."
                raise NamesNotEqualsModelsLengthError(s)
            else:
                self.model_names = model_names
            pass
        else:
            self.model_names = ["Model " + str(i) for i, _ in enumerate(self.models)]
        if not n_iterations and not n_seconds:
            s = "You have not specified either `n_seconds` or `n_iterations`."
            s += " Please specify a valid measurement interval."
            raise MeasurementIntervalNotSetError(s)

        if n_iterations and n_seconds:
            s = f"You have set both `n_seconds={n_seconds}` and `n_iterations={n_iterations}` "
            s += f"only one can be specified per instance. Defaulting measurement interval to `seconds={n_seconds}``"

            warnings.warn(s)
            self.n_seconds = n_seconds
            self.n_iterations = None
        else:
            self.n_seconds = n_seconds
            self.n_iterations = n_iterations

        self.terminated = False

[docs]    def run(self, print_report: bool = True) -> List[dict]:
        """
        Runs the multi inference report for `self.models` with input(s) `self.inputs`

        Args:
            print_report (bool, optional): If true a table representation of the report will be
                printed to console. Defaults to True.

        Returns:
            List[dict]: A list of dictionaries containing all the report stats created during the run
                for each model callable.
        """
        results = []
        for i, (model, _input) in enumerate(zip(self.models, self.inputs)):
            report = InferenceReport(
                model=model,
                inputs=_input,
                n_seconds=self.n_seconds,
                n_iterations=self.n_iterations,
                infer_failure_point=self.infer_failure_point,
                model_name=self.model_names[i],
                drop_stats=self.drop_stats,
            )
            results.append(report.run(print_report=False))
            self.models_runs.append(report.runs)

        if print_report:
            self.report(results)

        return results

[docs]    def report(self, results_list: List[dict]):
        """
        Prints a report to console based on the values
        found in `results_list`

        Args:
            results_list (dict): A list of dictionaries containing compiled stats from the runs.
        """
        print(
            tabulate(
                [results_dict.values() for results_dict in results_list],
                headers=results_list[0].keys(),
                tablefmt="fancy_grid",
                numalign="right",
            )
        )

[docs]    def plot(self, show: bool = True, save_location: str = None):
        """
        Creates a simple plot of `self.models_runs`. For each run it
        plots run number on the x-axis and run time in milliseconds on the y-axis.

        Args:
            show (bool, optional): Whether to show the plot after calling method. Defaults to True.

            save_location (str, optional): Location to save plot at. If None the plot will not
                be saved. Defaults to None.

        Raises:
            MatplotlibNotInstalledError: Raise if matplotlib is not installed in python environment.
            ValueError: Raise if the model_runs have not yet been calculated but `plot` is called.
        """
        try:
            import matplotlib
            import matplotlib.pyplot as plt
        except:
            s = "InferenceReport.plot() requires matplotlib to be installed."
            s += 'To use this method please install by running: pip install "pyinfer[plotting]" '
            raise MatplotlibNotInstalledError(s)

        if self.models_runs:
            fig, ax = plt.subplots()
            ax.set(xlabel="run", ylabel="run time (ms)")
            ax.grid()
            for i, runs in enumerate(self.models_runs):
                t = list(range(0, len(runs)))
                ms_runs = [(run * 1000) for run in runs]
                plt.plot(t, ms_runs, marker="o", label=self.model_names[i])
                plt.legend()
            if save_location:
                plt.savefig(save_location)
        else:
            raise ValueError(
                "self.models_runs is not yet set, please run the report before plotting."
            )

        if show:
            plt.show()