Source code for mousestyles.mww
from __future__ import (absolute_import, division,
print_function, unicode_literals)
import itertools
import numpy as np
from scipy.stats import mannwhitneyu
import matplotlib.pyplot as plt
from mousestyles.data import distances_bymouse, distances_bystrain
[docs]def get_pvalues(m):
"""
This function takes a bunch of sampled distributions and compute the
p-values of the two sided Mann Whitney U test for each couple of samples.
The Mann-Whitney U test is a test for assessing whether two independent
samples come from the same distribution. The null hypothesis for this test
is that the two groups have the same distribution, while the alternative
hypothesis is that one group has larger (or smaller) values than the other.
Null hypothesis $H_0$: $P(X>Y)=P(Y>X)$.
Alternative $H_1$: not $H_0$.
The Mann-Whitney U test is similar to the Wilcoxon test, but can be used to
compare multiple samples that aren't necessarily paired.
Parameters
----------
m: list of numpy arrays
Sampled distributions.
Returns
-------
cor: 2 dimensional array of pvalues.
cor[i,j] is the p-value of the MWW test between the samples i and j.
Notes:
------
A p-value < 0.05 means that there is strong evidence to reject the null
hypothesis.
References:
-----------
1. Mann-Whitney U test:
http://tqmp.org/RegularArticles/vol04-1/p013/p013.pdf
2. Non parametric tests
http://www.mit.edu/~6.s085/notes/lecture5.pdf
Examples:
---------
>>> cor = get_pvalues([np.array([1, 2, 3]), np.array([1, 1, 2])])
"""
n = len(m)
indices = list(itertools.product(*[range(n), range(n)]))
cor = np.empty([n, n])
for (a, b) in indices:
cor[a, b] = 2 * mannwhitneyu(m[a], m[b])[1]
return cor
[docs]def MWW_mice(strain, step=50, verbose=False):
"""
Compare distributions of distances among mice of the same strain.
Use p-values of the Mann-Whitney U test.
Parameters
----------
strain: integer
Number of the strain.
step: floeat
Time interval length used to compute distances. Default is 1s.
See data.distances_bymouse for more information.
verbose: boolean
Returns
-------
cor: pvalues of the Mann-Whitney U test for each couple of distances
samples among mice of the corresponding strain.
Examples:
---------
>>> cor = MWW_mice(0)
"""
mouse = 0
res = []
dist = np.array([0])
while dist.size > 0:
dist = distances_bymouse(strain, mouse,
step=step)
res.append(dist)
mouse += 1
if verbose:
print('mouse %s done.' % mouse)
cor = get_pvalues(res[:-1])
return cor
[docs]def MWW_allmice(step=50, verbose=False):
""" Aggregates MWW_mice data for all available strains of mice.
Parameters
----------
step: time interval length used to compute distances. Default is 1s.
See data.distances_bymouse for more information.
verbose: boolean
Returns
-------
mww_values: MWW_mice outputs for each strain.
mww_values[i] corresponds to the ith strain.
Examples:
---------
>>> mww_values = MWW_allmice()
"""
strain = 0
mww = np.array([0])
mww_values = []
while mww.size > 0:
mww = MWW_mice(strain, verbose=False)
mww_values.append(mww)
if verbose:
print('strain %s done.' % strain)
strain += 1
mww_values = mww_values[:-1]
return mww_values
[docs]def MWW_strains(step=50, verbose=False):
"""
Compare distributions of distances among strains. Proceed as if
the mice in each strain are i.i.d. samples, and compare the p-values
of the Mann-Whitney U test.
Parameters
----------
step: time interval length used to compute distances. Default is 1s.
See data.distances_bymouse for more information.
verbose: boolean
Returns
-------
cor: pvalues of the Mann-Whitney U test for each couple of distances
samples among strains of mice.
Examples:
---------
>>> cor = MWW_strains()
"""
strain = 0
res = []
dist = np.array([0])
while dist.size > 0:
dist = distances_bystrain(strain,
step=step)
res.append(dist)
if verbose:
print('strain %s done.' % strain)
strain += 1
cor = get_pvalues(res[:-1])
return cor
[docs]def plot_cor(data):
"""
Plot the p-values outputed by the Mann-Whitney U test using
a correlation matrix representation.
Parameters
----------
data: MWW_allmice output
Returns
-------
plot: correlation matrix
Examples:
--------
>>> strains = MWW_strains()
>>> plot_cor(strains)
"""
plt.style.use('seaborn-notebook')
n = len(data)
column_labels = range(n)
row_labels = range(n)
fig, ax = plt.subplots()
ax.pcolor(data, cmap=plt.cm.Blues)
ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.set_xticklabels(row_labels, minor=False)
ax.set_yticklabels(column_labels, minor=False)
plt.ylabel('Strains')
plt.xlabel('Strains')
plt.show()
[docs]def plot_cor_multi(mww_values):
"""
Vectorized version of plot_cor. Plot several correlation
matrices side by side using plot_cor.
Parameters
----------
data: MWW_allmice output
Returns
-------
plot: correlation matrix
Examples:
--------
>>> allmice = MWW_allmice()
>>> plot_cor_multi(allmice)
"""
plt.style.use('seaborn-notebook')
nb_plots = len(mww_values)
fig, ax = plt.subplots(nb_plots, sharex=True)
fig.subplots_adjust(hspace=.4)
for i, mww in enumerate(mww_values):
ax[i].pcolor(mww, cmap=plt.cm.Blues)
labels = range(len(mww))
ax[i].set_xticklabels(labels, minor=False)
ax[i].set_yticklabels(labels, minor=False)
ax[i].set_xticks(np.arange(mww.shape[0]) + 0.5, minor=False)
ax[i].set_yticks(np.arange(mww.shape[1]) + 0.5, minor=False)
ax[i].set_ylabel('Mouse')
ax[i].set_title('strain %s' % i)
plt.xlabel('Mouse')
plt.show()