Source code for enigmatoolbox.cross_disorder.cross_disorder

import numpy as np
from sklearn.decomposition import PCA

from enigmatoolbox.datasets.base import load_summary_stats


[docs]def cross_disorder_effect(disorder='all_disorder', measure=None, additional_data_cortex=None, additional_name_cortex=None, additional_data_subcortex=None, additional_name_subcortex=None, ignore=None, include=None, method='pca'): """Cross-disorder effect (authors: @boyongpark, @saratheriver) Parameters ---------- disorder : list, optional Any combination of disorder name. Default is all available disorders, except 'adhd'. Options are: {'22q', 'adhd', 'asd', 'bipolar', 'depression', 'epilepsy', 'ocd', 'schizophrenia'}. measure : list, optional Any combination of measure names. Default is {'CortThick', 'CortSurf', 'SubVol'}. additional_data_cortex : ndarray, optional Name for additional cortical ENIGMA-type data. Must also provide 'additional_name_cortex'. additional_name_cortex : list, optional Additional cortical ENIGMA-type data (n, 68). Must also provide 'additional_name_cortex'. additional_data_subcortex : ndarray, optional Name for additional subcortical ENIGMA-type data. Must also provide 'additional_name_subcortex'. additional_name_subcortex : list, optional Additional subcortical ENIGMA-type data (n, 16). Must also provide 'additional_name_subcortex'. ignore : list, optional Ignore summary statistics with these expressions. Default is ('mega') as it contains NaNs. include : list, optional Include only summary statistics with these expressions. Default is empty, i.e., include everything. method : string, optional Analysis method {'pca', 'correlation'}. Default is 'pca'. Returns ------- components : dict Principal components of shared effects in descending order in terms of component variance. Only is method is 'pca'. variance : dict Variance of components. Only is method is 'pca'. correlation_matrix : dict Correlation matrices of for every pair of shared effect maps. Only is method is 'correlation'. names : dict Names of disorder and case-control effect maps included in analysis. """ if measure is None: measure = ['CortThick', 'CortSurf', 'SubVol'] if ignore is None: ignore = ['mega'] if include is None: include = [] if disorder is 'all_disorder': disorder = ['22q', 'asd', 'bipolar', 'depression', 'epilepsy', 'ocd', 'schizophrenia'] mat_d = {'cortex': [], 'subcortex': []} names = {'cortex': [], 'subcortex': []} for _, ii in enumerate(disorder): # Load summary statistics sum_stats = load_summary_stats(ii) fieldos = list(sum_stats.keys()) # Loop through structure fields (case-control options) for _, jj in enumerate(fieldos): if 'Cort' in jj: if not include: if not any(ig in jj for ig in ignore) and any(meas in jj for meas in measure): mat_d['cortex'].append(sum_stats[jj].iloc[:, 2]) names['cortex'].append(ii + ': ' + jj) elif include: if any(inc in jj for inc in include) and not any(ig in jj for ig in ignore) \ and any(meas in jj for meas in measure): mat_d['cortex'].append(sum_stats[jj].iloc[:, 2]) names['cortex'].append(ii + ': ' + jj) if 'Sub' in jj: if not include: if not any(ig in jj for ig in ignore) and any(meas in jj for meas in measure): mat_d['subcortex'].append(sum_stats[jj].iloc[:, 2]) names['subcortex'].append(ii + ': ' + jj) elif include: if any(inc in jj for inc in include) and not any(ig in jj for ig in ignore) \ and any(meas in jj for meas in measure): mat_d['subcortex'].append(sum_stats[jj].iloc[:, 2]) names['subcortex'].append(ii + ': ' + jj) for ii, jj in enumerate(mat_d): mat_d[jj] = (np.asarray(mat_d[jj])) # If additional data and name if additional_data_cortex is not None and additional_name_cortex is not None: mat_d['cortex'] = np.append(mat_d['cortex'], additional_data_cortex) names['cortex'] = np.append(names['cortex'], additional_name_cortex) if additional_data_subcortex is not None and additional_name_subcortex is not None: mat_d['subcortex'] = np.append(mat_d['subcortex'], additional_data_subcortex) names['subcortex'] = np.append(names['subcortex'], additional_name_subcortex) if method == 'pca': components = {'cortex': [], 'subcortex': []} variance = {'cortex': [], 'subcortex': []} pca = PCA() components['cortex'] = pca.fit_transform(np.transpose(mat_d['cortex'])) variance['cortex'] = pca.explained_variance_ratio_ components['subcortex'] = pca.fit_transform(np.transpose(mat_d['subcortex'])) variance['subcortex'] = pca.explained_variance_ratio_ return components, variance, names elif method == 'correlation': correlation_matrix = {'cortex': [], 'subcortex': []} correlation_matrix['cortex'] = np.corrcoef(mat_d['cortex']) correlation_matrix['subcortex'] = np.corrcoef(mat_d['subcortex']) return correlation_matrix, names