Source code for mousestyles.mww

from __future__ import (absolute_import, division,
                        print_function, unicode_literals)

import itertools

import numpy as np
from scipy.stats import mannwhitneyu
import matplotlib.pyplot as plt

from mousestyles.data import distances_bymouse, distances_bystrain


[docs]def get_pvalues(m): """ This function takes a bunch of sampled distributions and compute the p-values of the two sided Mann Whitney U test for each couple of samples. The Mann-Whitney U test is a test for assessing whether two independent samples come from the same distribution. The null hypothesis for this test is that the two groups have the same distribution, while the alternative hypothesis is that one group has larger (or smaller) values than the other. Null hypothesis $H_0$: $P(X>Y)=P(Y>X)$. Alternative $H_1$: not $H_0$. The Mann-Whitney U test is similar to the Wilcoxon test, but can be used to compare multiple samples that aren't necessarily paired. Parameters ---------- m: list of numpy arrays Sampled distributions. Returns ------- cor: 2 dimensional array of pvalues. cor[i,j] is the p-value of the MWW test between the samples i and j. Notes: ------ A p-value < 0.05 means that there is strong evidence to reject the null hypothesis. References: ----------- 1. Mann-Whitney U test: http://tqmp.org/RegularArticles/vol04-1/p013/p013.pdf 2. Non parametric tests http://www.mit.edu/~6.s085/notes/lecture5.pdf Examples: --------- >>> cor = get_pvalues([np.array([1, 2, 3]), np.array([1, 1, 2])]) """ n = len(m) indices = list(itertools.product(*[range(n), range(n)])) cor = np.empty([n, n]) for (a, b) in indices: cor[a, b] = 2 * mannwhitneyu(m[a], m[b])[1] return cor
[docs]def MWW_mice(strain, step=50, verbose=False): """ Compare distributions of distances among mice of the same strain. Use p-values of the Mann-Whitney U test. Parameters ---------- strain: integer Number of the strain. step: floeat Time interval length used to compute distances. Default is 1s. See data.distances_bymouse for more information. verbose: boolean Returns ------- cor: pvalues of the Mann-Whitney U test for each couple of distances samples among mice of the corresponding strain. Examples: --------- >>> cor = MWW_mice(0) """ mouse = 0 res = [] dist = np.array([0]) while dist.size > 0: dist = distances_bymouse(strain, mouse, step=step) res.append(dist) mouse += 1 if verbose: print('mouse %s done.' % mouse) cor = get_pvalues(res[:-1]) return cor
[docs]def MWW_allmice(step=50, verbose=False): """ Aggregates MWW_mice data for all available strains of mice. Parameters ---------- step: time interval length used to compute distances. Default is 1s. See data.distances_bymouse for more information. verbose: boolean Returns ------- mww_values: MWW_mice outputs for each strain. mww_values[i] corresponds to the ith strain. Examples: --------- >>> mww_values = MWW_allmice() """ strain = 0 mww = np.array([0]) mww_values = [] while mww.size > 0: mww = MWW_mice(strain, verbose=False) mww_values.append(mww) if verbose: print('strain %s done.' % strain) strain += 1 mww_values = mww_values[:-1] return mww_values
[docs]def MWW_strains(step=50, verbose=False): """ Compare distributions of distances among strains. Proceed as if the mice in each strain are i.i.d. samples, and compare the p-values of the Mann-Whitney U test. Parameters ---------- step: time interval length used to compute distances. Default is 1s. See data.distances_bymouse for more information. verbose: boolean Returns ------- cor: pvalues of the Mann-Whitney U test for each couple of distances samples among strains of mice. Examples: --------- >>> cor = MWW_strains() """ strain = 0 res = [] dist = np.array([0]) while dist.size > 0: dist = distances_bystrain(strain, step=step) res.append(dist) if verbose: print('strain %s done.' % strain) strain += 1 cor = get_pvalues(res[:-1]) return cor
[docs]def plot_cor(data): """ Plot the p-values outputed by the Mann-Whitney U test using a correlation matrix representation. Parameters ---------- data: MWW_allmice output Returns ------- plot: correlation matrix Examples: -------- >>> strains = MWW_strains() >>> plot_cor(strains) """ plt.style.use('seaborn-notebook') n = len(data) column_labels = range(n) row_labels = range(n) fig, ax = plt.subplots() ax.pcolor(data, cmap=plt.cm.Blues) ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False) ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False) ax.set_xticklabels(row_labels, minor=False) ax.set_yticklabels(column_labels, minor=False) plt.ylabel('Strains') plt.xlabel('Strains') plt.show()
[docs]def plot_cor_multi(mww_values): """ Vectorized version of plot_cor. Plot several correlation matrices side by side using plot_cor. Parameters ---------- data: MWW_allmice output Returns ------- plot: correlation matrix Examples: -------- >>> allmice = MWW_allmice() >>> plot_cor_multi(allmice) """ plt.style.use('seaborn-notebook') nb_plots = len(mww_values) fig, ax = plt.subplots(nb_plots, sharex=True) fig.subplots_adjust(hspace=.4) for i, mww in enumerate(mww_values): ax[i].pcolor(mww, cmap=plt.cm.Blues) labels = range(len(mww)) ax[i].set_xticklabels(labels, minor=False) ax[i].set_yticklabels(labels, minor=False) ax[i].set_xticks(np.arange(mww.shape[0]) + 0.5, minor=False) ax[i].set_yticks(np.arange(mww.shape[1]) + 0.5, minor=False) ax[i].set_ylabel('Mouse') ax[i].set_title('strain %s' % i) plt.xlabel('Mouse') plt.show()