import os
import warnings
import pandas as pd
import numpy as np
import scipy.stats
__all__ = [
'wilcoxon',
]
[docs]def wilcoxon(x, y=None, fill_outside_table=np.nan, **kwargs):
"""
This function is deprecated, as SciPy has implemented a 'mode' parameter that allows for exact p value computation.
Calculate the Wilcoxon signed-rank test.
The scipy.stats.wilcoxon() function is used to compute the test statistic. For the p-value, the scipy function
uses a normal approximation, which is only valid for larger sample sizes (see docs scipy.stats.wilcoxon()).
Therefore, for len(x) > 20, the scipy.stats.wilcoxon() function is also used to compute the p-value.
For len(x) <= 20, a lookup table is used to determine the lowest p-value for which the statistic is equal or lower.
Args:
x (np.ndarray): see scipy.stats.wilcoxon().
y (np.ndarray, optional): see scipy.stats.wilcoxon().
fill_outside_table (float, optional): p-value to return if the test statistic does not fall in the range of the
lookup table (for computing the p-value if len(x) <= 20).
Defaults to np.nan.
**kwargs (optional): keyword arguments for scipy.stats.wilcoxon().
Returns:
statistic (float): see scipy.stats.wilcoxon().
pvalue (float): see scipy.stats.wilcoxon(). If len(x) <= 20, this is the lowest p-value for which the statistic
is equal or lower.
Examples:
Create random data with different means.
>>> np.random.seed(65)
>>> n = 20
>>> x = np.random.normal(2, 1, n)
>>> y = np.random.normal(2.5, 1, n)
Compute Wilcoxon p-value using scipy (uses the normal equation).
>>> scipy.stats.wilcoxon(x, y, mode='approx')[1]
0.020633435105949553
Compute the lowest Wilcoxon p-value in the look up table for which the statistic is equal or lower. It should
be similar to the wilcoxon computed above using the normal equation with n = 20.
>>> wilcoxon(x, y)[1]
0.025
"""
raise DeprecationWarning("This function is deprecated. Use the scipy.stats.wilcoxon with mode='auto' instead.")
# If all differences are zero, return np.nan (otherwise, scipy will raise an error).
d = x - y if y is not None else x
if np.all(d == 0):
return np.nan, np.nan
# Compute statistic and p-value.
statistic, pvalue = scipy.stats.wilcoxon(x, y, **kwargs)
n = len(x)
if n <= 20:
# The scipy package uses the normal equation, which requires that n > 20 (see scipy.stats.wilcoxon() docs).
# For lower values, a lookup table with critical values for the test statistic may be used.
if 'alternative' in kwargs and kwargs['alternative'] != 'two-sided':
raise NotImplementedError('Look-up Wilcoxon p-value not implemented for `alternative` == "{}"'
.format(kwargs['alternative']))
# Filepath to Excel file with critical values.
# (copied from http://www.real-statistics.com/statistics-tables/wilcoxon-signed-ranks-table/)
file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'wilcoxon_signed_ranks_critical_values.xlsx')
df = pd.read_excel(file)
df.set_index('n', inplace=True)
# Find the p-values for which the statistic is significantly small.
critical_values = df.loc[n, :].dropna().sort_index()
significant = critical_values[statistic <= critical_values]
if len(significant) == 0:
# No p-value match in table (p-value is higher than the ones in the table).
pvalue = fill_outside_table
else:
# Select the smallest p-value.
pvalue = significant.idxmin()
return statistic, pvalue