Initial configuration commit

2023-10-24 22:54:55 -04:00 · 2023-10-24 22:54:55 -04:00 · 31c8abea59
commit 31c8abea59
266 changed files with 780274 additions and 0 deletions
--- a/typings/seaborn/_statistics.pyi
+++ b/typings/seaborn/_statistics.pyi
@ -0,0 +1,132 @@
+"""
+This type stub file was generated by pyright.
+"""
+
+"""Statistical transformations for visualization.
+
+This module is currently private, but is being written to eventually form part
+of the public API.
+
+The classes should behave roughly in the style of scikit-learn.
+
+- All data-independent parameters should be passed to the class constructor.
+- Each class should impelment a default transformation that is exposed through
+  __call__. These are currently written for vector arguements, but I think
+  consuming a whole `plot_data` DataFrame and return it with transformed
+  variables would make more sense.
+- Some class have data-dependent preprocessing that should be cached and used
+  multiple times (think defining histogram bins off all data and then counting
+  observations within each bin multiple times per data subsets). These currently
+  have unique names, but it would be good to have a common name. Not quite
+  `fit`, but something similar.
+- Alternatively, the transform interface could take some information about grouping
+  variables and do a groupby internally.
+- Some classes should define alternate transforms that might make the most sense
+  with a different function. For example, KDE usually evaluates the distribution
+  on a regular grid, but it would be useful for it to transform at the actual
+  datapoints. Then again, this could be controlled by a parameter at  the time of
+  class instantiation.
+
+"""
+class KDE:
+    """Univariate and bivariate kernel density estimator."""
+    def __init__(self, *, bw_method=..., bw_adjust=..., gridsize=..., cut=..., clip=..., cumulative=...) -> None:
+        """Initialize the estimator with its parameters.
+
+        Parameters
+        ----------
+        bw_method : string, scalar, or callable, optional
+            Method for determining the smoothing bandwidth to use; passed to
+            :class:`scipy.stats.gaussian_kde`.
+        bw_adjust : number, optional
+            Factor that multiplicatively scales the value chosen using
+            ``bw_method``. Increasing will make the curve smoother. See Notes.
+        gridsize : int, optional
+            Number of points on each dimension of the evaluation grid.
+        cut : number, optional
+            Factor, multiplied by the smoothing bandwidth, that determines how
+            far the evaluation grid extends past the extreme datapoints. When
+            set to 0, truncate the curve at the data limits.
+        clip : pair of numbers None, or a pair of such pairs
+            Do not evaluate the density outside of these limits.
+        cumulative : bool, optional
+            If True, estimate a cumulative distribution function.
+
+        """
+        ...
+    
+    def define_support(self, x1, x2=..., weights=..., cache=...): # -> NDArray[floating[Any]] | tuple[NDArray[floating[Any]], NDArray[floating[Any]]]:
+        """Create the evaluation grid for a given data set."""
+        ...
+    
+    def __call__(self, x1, x2=..., weights=...): # -> tuple[NDArray[Unknown] | Unknown, NDArray[floating[Any]] | tuple[NDArray[floating[Any]], NDArray[floating[Any]]]] | tuple[NDArray[float64] | Unknown, NDArray[floating[Any]] | tuple[NDArray[floating[Any]], NDArray[floating[Any]]]]:
+        """Fit and evaluate on univariate or bivariate data."""
+        ...
+    
+
+
+class Histogram:
+    """Univariate and bivariate histogram estimator."""
+    def __init__(self, stat=..., bins=..., binwidth=..., binrange=..., discrete=..., cumulative=...) -> None:
+        """Initialize the estimator with its parameters.
+
+        Parameters
+        ----------
+        stat : {"count", "frequency", "density", "probability"}
+            Aggregate statistic to compute in each bin.
+
+            - ``count`` shows the number of observations
+            - ``frequency`` shows the number of observations divided by the bin width
+            - ``density`` normalizes counts so that the area of the histogram is 1
+            - ``probability`` normalizes counts so that the sum of the bar heights is 1
+
+        bins : str, number, vector, or a pair of such values
+            Generic bin parameter that can be the name of a reference rule,
+            the number of bins, or the breaks of the bins.
+            Passed to :func:`numpy.histogram_bin_edges`.
+        binwidth : number or pair of numbers
+            Width of each bin, overrides ``bins`` but can be used with
+            ``binrange``.
+        binrange : pair of numbers or a pair of pairs
+            Lowest and highest value for bin edges; can be used either
+            with ``bins`` or ``binwidth``. Defaults to data extremes.
+        discrete : bool or pair of bools
+            If True, set ``binwidth`` and ``binrange`` such that bin
+            edges cover integer values in the dataset.
+        cumulative : bool
+            If True, return the cumulative statistic.
+
+        """
+        ...
+    
+    def define_bin_edges(self, x1, x2=..., weights=..., cache=...): # -> NDArray[Any] | tuple[Unknown, ...]:
+        """Given data, return the edges of the histogram bins."""
+        ...
+    
+    def __call__(self, x1, x2=..., weights=...): # -> tuple[Any | ndarray[Any, Any] | NDArray[float64] | NDArray[Any], Unknown | NDArray[Any] | tuple[Unknown, ...]] | tuple[Unknown, Unknown | NDArray[Any] | tuple[Unknown, ...]]:
+        """Count the occurrances in each bin, maybe normalize."""
+        ...
+    
+
+
+class ECDF:
+    """Univariate empirical cumulative distribution estimator."""
+    def __init__(self, stat=..., complementary=...) -> None:
+        """Initialize the class with its paramters
+
+        Parameters
+        ----------
+        stat : {{"proportion", "count"}}
+            Distribution statistic to compute.
+        complementary : bool
+            If True, use the complementary CDF (1 - CDF)
+
+        """
+        ...
+    
+    def __call__(self, x1, x2=..., weights=...): # -> tuple[Any, Any]:
+        """Return proportion or count of observations below each sorted datapoint."""
+        ...
+    
+
+