Source code for nnsa.stats.paired

import os
import warnings

import pandas as pd
import numpy as np
import scipy.stats


__all__ = [
    'wilcoxon',
]


[docs]def wilcoxon(x, y=None, fill_outside_table=np.nan, **kwargs): """ This function is deprecated, as SciPy has implemented a 'mode' parameter that allows for exact p value computation. Calculate the Wilcoxon signed-rank test. The scipy.stats.wilcoxon() function is used to compute the test statistic. For the p-value, the scipy function uses a normal approximation, which is only valid for larger sample sizes (see docs scipy.stats.wilcoxon()). Therefore, for len(x) > 20, the scipy.stats.wilcoxon() function is also used to compute the p-value. For len(x) <= 20, a lookup table is used to determine the lowest p-value for which the statistic is equal or lower. Args: x (np.ndarray): see scipy.stats.wilcoxon(). y (np.ndarray, optional): see scipy.stats.wilcoxon(). fill_outside_table (float, optional): p-value to return if the test statistic does not fall in the range of the lookup table (for computing the p-value if len(x) <= 20). Defaults to np.nan. **kwargs (optional): keyword arguments for scipy.stats.wilcoxon(). Returns: statistic (float): see scipy.stats.wilcoxon(). pvalue (float): see scipy.stats.wilcoxon(). If len(x) <= 20, this is the lowest p-value for which the statistic is equal or lower. Examples: Create random data with different means. >>> np.random.seed(65) >>> n = 20 >>> x = np.random.normal(2, 1, n) >>> y = np.random.normal(2.5, 1, n) Compute Wilcoxon p-value using scipy (uses the normal equation). >>> scipy.stats.wilcoxon(x, y, mode='approx')[1] 0.020633435105949553 Compute the lowest Wilcoxon p-value in the look up table for which the statistic is equal or lower. It should be similar to the wilcoxon computed above using the normal equation with n = 20. >>> wilcoxon(x, y)[1] 0.025 """ raise DeprecationWarning("This function is deprecated. Use the scipy.stats.wilcoxon with mode='auto' instead.") # If all differences are zero, return np.nan (otherwise, scipy will raise an error). d = x - y if y is not None else x if np.all(d == 0): return np.nan, np.nan # Compute statistic and p-value. statistic, pvalue = scipy.stats.wilcoxon(x, y, **kwargs) n = len(x) if n <= 20: # The scipy package uses the normal equation, which requires that n > 20 (see scipy.stats.wilcoxon() docs). # For lower values, a lookup table with critical values for the test statistic may be used. if 'alternative' in kwargs and kwargs['alternative'] != 'two-sided': raise NotImplementedError('Look-up Wilcoxon p-value not implemented for `alternative` == "{}"' .format(kwargs['alternative'])) # Filepath to Excel file with critical values. # (copied from http://www.real-statistics.com/statistics-tables/wilcoxon-signed-ranks-table/) file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'wilcoxon_signed_ranks_critical_values.xlsx') df = pd.read_excel(file) df.set_index('n', inplace=True) # Find the p-values for which the statistic is significantly small. critical_values = df.loc[n, :].dropna().sort_index() significant = critical_values[statistic <= critical_values] if len(significant) == 0: # No p-value match in table (p-value is higher than the ones in the table). pvalue = fill_outside_table else: # Select the smallest p-value. pvalue = significant.idxmin() return statistic, pvalue