2. Checkerboard plots
[64]:
# assume already have the flux snapshots at desired cycle
import os
import pandas as pd
import ast
import seaborn as sns
import matplotlib.pyplot as plt
from scarcc.preparation.find_directory import find_directory
from scarcc.data_analysis import get_fs_change
from scarcc.data_analysis import (get_row_color_legend, relabel_clustermap, generate_row_colors, assign_plot_total_E_wide,
get_fs_kde_plot, plot_kde, scatter_xycol_A, scatter_xycol_B)
from scarcc.data_analysis import convert_po_col
# data_directory = find_directory('Data', os.path.abspath(''))
# flux_path = os.path.join(data_directory, 'Enormalized_carbon.xlsx')
flux_analysis = pd.read_excel(flux_path, sheet_name='flux', index_col=0)
# carbon_allocation_E_wide = pd.read_csv(os.path.join(data_directory, 'carbon_allocation_E_wide_m1.csv'), index_col=0) # equivalent to below
carbon_allocation_E_wide = pd.read_excel(flux_path, sheet_name='normalized_carbon', index_col=0).loc[:,'total_carbon_BIOMASS_Ec_iML1515_core_75p37M':]
fs_change = pd.read_excel(flux_path, sheet_name='difference_in_normalized_carbon', index_col=0)
carbon_allocation_E_wide.query('XG=="DG"').head()
[64]:
| total_carbon_BIOMASS_Ec_iML1515_core_75p37M | total_carbon_EX_bulk_ac_e | total_carbon_EX_co2_e | total_carbon_EX_for_e | total_carbon_EX_gal_e | total_carbon_EX_glyclt_e | total_carbon_EX_lcts_e | total_carbon_EX_met__L_e | total_carbon_Waste | percent_BIOMASS_Ec_iML1515_core_75p37M | ... | percent_EX_gal_e | percent_EX_glyclt_e | percent_EX_lcts_e | percent_EX_met__L_e | percent_Waste | XG | Drug_comb_effect_coc | Drug_comb_effect_Emono | Drug_comb_effect_Smono | BM_consortia_frac_binned | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Gene_inhibition | |||||||||||||||||||||
| dadX.aroA | -0.827687 | -2.523712 | -1.795225 | NaN | NaN | NaN | 7.948830 | 0.016210 | 2.802206 | -0.104127 | ... | NaN | NaN | 1 | 0.002039 | -0.352531 | DG | Antagonistic | Antagonistic | Antagonistic | E |
| dadX.eno | -0.008114 | -4.534300 | -4.977270 | NaN | NaN | NaN | 33.683051 | 0.000158 | 24.163367 | -0.000241 | ... | NaN | NaN | 1 | 0.000005 | -0.717375 | DG | Additive | Antagonistic | Additive | No growth |
| dadX.pheA | -0.759820 | -2.501663 | -1.736529 | NaN | NaN | NaN | 7.491709 | 0.014881 | 2.493697 | -0.101422 | ... | NaN | NaN | 1 | 0.001986 | -0.332861 | DG | Antagonistic | Antagonistic | Antagonistic | E |
| dadX.acnB | -0.561175 | -2.249162 | -1.427180 | NaN | NaN | NaN | 6.142324 | 0.010991 | 1.904807 | -0.091362 | ... | NaN | NaN | 1 | 0.001789 | -0.310112 | DG | Additive | Antagonistic | Additive | E |
| aroA.dapF | -0.701962 | -2.231143 | -1.893485 | NaN | NaN | NaN | 7.046120 | 0.013748 | 2.219530 | -0.099624 | ... | NaN | NaN | 1 | 0.001951 | -0.315000 | DG | Antagonistic | Antagonistic | Antagonistic | E |
5 rows × 23 columns
[19]:
scatter_xycol_A(flux_analysis, x_col='EX_bulk_ac_e', y_col='BIOMASS_iRR1083_metals')
[20]:
[21]:
# gr_path = os.path.join(data_directory, 'gr_DG_m1_normalized.csv')
# gr_df = pd.read_csv(gr_path, index_col=0)
# from scarcc.data_analysis import get_fs_change
# fs_change = get_fs_change(gr_path=gr_path,carbon_allocation_E_wide=carbon_allocation_E_wide)
[65]:
sns.set_context('talk', font_scale=1)
x_col = 'EX_bulk_ac_e' # other options: 'Waste', 'BIOMASS_Ec_iML1515_core_75p37M'
y_col = 'EX_co2_e'
scatter_xycol_B(fs_change, x_col, y_col)
read fs_change from excel or derive from E_wide
[66]:
fs_change = pd.read_excel(os.path.join(data_directory, 'Enormalized_carbon.xlsx'), sheet_name='difference_in_normalized_carbon', index_col=0) # same as below
# gr_path = os.path.join(data_directory, 'gr_DG_checkerboard_normalized.csv')
# fs_change = get_fs_change(gr_path=gr_path,carbon_allocation_E_wide=carbon_allocation_E_wide)
Graph C Kde plot
[67]:
fs_plot = get_fs_kde_plot(fs_change)
plot_kde(fs_plot, col_prefix='percent_')
print('Unique columns: ', fs_plot.query("reaction.str.contains('percent')").reaction.unique())
Index(['Drug Combination Effect', 'reaction', 'Normalized Carbon Flux'], dtype='object')
Unique columns: ['percent_BIOMASS_Ec_iML1515_core_75p37M' 'percent_EX_bulk_ac_e'
'percent_EX_co2_e' 'percent_Waste']
2.1. Heatmaps
[68]:
single_pathway_df = pd.read_csv(os.path.join(data_directory, 'single_pathway.csv'), index_col=0)
single_pathway_df['Pathway_list'] = single_pathway_df.Pathway_list.apply(ast.literal_eval)
[69]:
def get_without_carbon_gcomb(l=None, want_carbon_gcomb=False):
carbon_pwy = ['Glycolysis/Gluconeogenesis', 'Citric Acid Cycle', 'Alternate Carbon Metabolism', 'Pentose Phosphate Pathway']
carbon_gcomb = list(set(single_pathway_df.explode('Pathway_list').query('Pathway_list in @carbon_pwy & XG=="DG"').index))
result_l = [ele for ele in l if ele not in carbon_gcomb] if not want_carbon_gcomb else carbon_gcomb
if 'Normal' not in result_l:
result_l.append('Normal')
return result_l
[70]:
p_cols = ['percent_BIOMASS_Ec_iML1515_core_75p37M', 'percent_EX_bulk_ac_e', 'percent_EX_co2_e', 'percent_Waste']
response_cols = ['Drug_comb_effect_coc', 'Drug_comb_effect_Emono', 'Drug_comb_effect_Smono', 'BM_consortia_frac_binned']
p_cols_w_effect = p_cols + response_cols
fs_sub_change= (fs_change
.set_index('Nth_gene', append=True)
.loc[:,p_cols_w_effect]).dropna(axis=0, how='any')
# if 'rm_spec_DG_list' in globals():
# fs_sub_change = fs_sub_change.query('Gene_inhibition in @rm_spec_DG_list')
Normal_row = pd.DataFrame(0, columns = fs_sub_change.columns, index = ['Normal'])
Normal_row.loc[:,'Drug_comb_effect_coc':'BM_consortia_frac_binned']=None
Normal_row['Nth_gene'] = 'Normal'
Normal_row.index.name='Gene_inhibition'
Normal_row.set_index('Nth_gene', append=True, inplace=True)
Normal_row
fs_sub_change = pd.concat([fs_sub_change, Normal_row])
[71]:
def get_response_agree():
return ['Normal','mrdA.mrcA', 'serC.dapD', 'serC.dapB', 'mrdA.aroA', 'mrdA.pheA', 'aroA.serC', 'aroA.dapB', 'aroA.guaB', 'dapA.aroA', 'dadX.guaB', 'aroA.argD', 'acnB.gltA', 'dadX.rffG', 'dadX.aroA', 'aroA.dapF', 'dadX.pyrD', 'dadX.pheA', 'acnB.thrB', 'dapF.pheA', 'dapF.acnB', 'dadX.acnB', 'tktA.gnd', 'dadX.mrdA', 'folA.guaB', 'pyrE.rffG', 'rffG.pyrD', 'dadX.mrcA', 'murA.pyrE', 'thrC.thrB', 'pyrE.pyrD', 'aroA.pheA', 'folC.guaB', 'dapF.dapD', 'folA.rffG', 'murA.pyrD', 'murA.mrcA', 'folP.guaB', 'trpA.aroA', 'folP.folA', 'folC.folA', 'dadX.eno', 'dadX.gapA', 'folP.folC']
response_agree = get_response_agree()
response_agree_without_carbon_gcomb = get_without_carbon_gcomb(response_agree)
sns.set_context('talk', font_scale=0.8)
fig = plt.figure()
clustered = sns.clustermap(fs_sub_change.loc[response_agree_without_carbon_gcomb,p_cols]
, row_colors=generate_row_colors(fs_sub_change)
# , standard_scale=1
, center=0
# , z_score=1
, yticklabels=True
, figsize=(10, 14)
, vmax=0.45
, cmap='coolwarm')
relabel_clustermap(clustered, x_label_middle_str=f'{len(response_agree_without_carbon_gcomb)-1} Agreed responses w/o energy pathway')
# exclude count of Normal
<Figure size 640x480 with 0 Axes>
[41]:
carbon_gcomb = get_without_carbon_gcomb(want_carbon_gcomb=True)
clustered = sns.clustermap(fs_sub_change.loc[carbon_gcomb,p_cols]
, row_colors=generate_row_colors(fs_sub_change, color=[ '#4622fc','#9d22fc', '#da52d5', 'grey'])
# , standard_scale=1
, center=0
# , z_score=1
, yticklabels=True
, cmap='coolwarm')
relabel_clustermap(clustered, x_label_middle_str='Drug Combinations Involved Carbon Pathway')
[ ]:
[36]:
def get_disagree_response(additive_threshold=0.05):
mono_response = pd.concat([convert_po_col(gr_df['po_diff_E0_monoculture']), convert_po_col(gr_df['po_diff_S0_monoculture'])], axis=1)
response_df = gr_df.filter(regex='po_diff(?!.*rescaled)').apply(lambda x: convert_po_col(x, additive_threshold=additive_threshold), axis=0)
finer_disagree_Smc = response_df[response_df['po_diff_E0_coculture'] != response_df['po_diff_S0_monoculture']]
disagreement_response = list(finer_disagree_Smc.index)
disagreement_response.remove('Normal')
return disagreement_response
disagreement_response = get_disagree_response()
disagreement_response
[36]:
['mrdA.mrcA',
'serC.dapD',
'serC.dapB',
'mrdA.aroA',
'mrdA.pheA',
'aroA.serC',
'murA.mrdA',
'dapF.acnB',
'acnB.purT',
'acnB.ackA',
'tktA.gnd',
'dadX.mrdA',
'dapA.dapF',
'gltA.gltD']
[44]:
total_carbon_cols = [ele.replace('percent', 'plot_total_carbon') for ele in p_cols]
sub_disagreement_response_wnormal = disagreement_response.copy()
sub_disagreement_response_wnormal = [ele for ele in sub_disagreement_response_wnormal if ele not in carbon_gcomb]
sub_disagreement_response_wnormal.extend(['Normal'])
plot_cluster_disagree = assign_plot_total_E_wide(carbon_allocation_E_wide,log_waste=False).loc[sub_disagreement_response_wnormal]
row_colors = generate_row_colors(plot_cluster_disagree, color=[ '#4622fc','#9d22fc', '#da52d5', 'grey'])
clustered = sns.clustermap(plot_cluster_disagree[total_carbon_cols] # flux magnitude
,row_colors=row_colors
# , z_score=1
, standard_scale=1
# , center=0
, cmap='Blues')
relabel_clustermap(clustered, )
clustered.ax_heatmap.set_ylabel('Double Drug Inhibition', weight='bold', fontsize=12)
clustered.ax_heatmap.set_xlabel(f'''Carbon Fluxes \n(E. coli, Double Drug Inhibition)''', weight='bold')
[44]:
Text(0.5, -416.1666666666666, 'Carbon Fluxes \n(E. coli, Double Drug Inhibition)')
[38]:
gene_columns_used = {'Response_agree_wo_carbon_pathway': response_agree_without_carbon_gcomb, #
'carbon_gcomb': carbon_gcomb,
'Disagreem=ent_response_wo_carbon_pathway': sub_disagreement_response_wnormal}
[57]:
from collections import defaultdict
gene_columns_used = defaultdict(dict)
gene_columns_used['scatter plot A'] = {'gene_list': 'All',
'sheet': 'flux',
'columns': ['EX_bulk_ac_e', 'BIOMASS_iRR1083_metals']}
gene_columns_used['scatter plot B'] = {'gene_list': 'All',
'sheet': 'difference_in_normalized_carbon',
'columns': ['EX_bulk_ac_e', 'EX_co2_e']}
gene_columns_used['KDE plot C'] = {'gene_list': 'All',
'sheet': 'difference_in_normalized_carbon',
'columns': ['percent_BIOMASS_Ec_iML1515_core_75p37M', 'percent_EX_bulk_ac_e',
'percent_EX_co2_e', 'percent_Waste']}
gene_columns_used[f'{len(response_agree_without_carbon_gcomb)-1} Agreed responses w/o energy pathway'] = {'gene_list': response_agree_without_carbon_gcomb,
'sheet': 'difference_in_normalized_carbon',
'columns': p_cols
}
gene_columns_used['Drug Combinations Involved Carbon Pathway'] = {'gene_list': carbon_gcomb,
'sheet': 'difference_in_normalized_carbon',
'columns': p_cols}
gene_columns_used['Carbon Fluxes'] = {'gene_list': sub_disagreement_response_wnormal,
'sheet': 'normalized_carbon',
'columns': total_carbon_cols}
[62]:
pd.DataFrame(gene_columns_used).to_csv(os.path.join(data_directory, 'gene_columns_used.csv'))