Visualize PMP Modes of Variability results: CMIP6

In this document, you will visualize PMP’s Modes of Variability results for CMIP6, to inter-compare performace. You will create following plots.

Portrait plot
Parallel coordinate plot

Written by Jiwoo Lee (LLNL/PCMDI)

Last update: Nov 2024

Contents

Read data from JSON files

1.1 Download PMP output JSON files for CMIP models
1.2 Normalize each column by its median for portrait plot

Portrait Plot
Parallel Coordinate Plot

1. Read data from JSON files

Input data for portrait plot is expected as a set a (stacked or list of) 2-d numpy array(s) with list of strings for x and y axes labels.

1.1 Download PMP output JSON files for CMIP models

[1]:

import glob
import json
import os
import pandas as pd
import numpy as np

from pcmdi_metrics.graphics import download_archived_results
from pcmdi_metrics.utils import sort_human

PMP output files downloadable from the PMP results archive.

[2]:

modes = ['NAM', 'NAO', 'NPGO', 'NPO', 'PDO', 'PNA', 'SAM']

json_dir = './json_files'

mip = "cmip6"
exp = "historical"
data_version = "v20220825"

"""
mip = "cmip5"
exp = "historical"
data_version = "v20210119"
"""

"""
mip = "cmip3"
exp = "20cm3"
data_version = "v20210119"
"""

stat = "rms"
#stat = "rmsc"
#stat = "stdv_pc_ratio_to_obs"

[3]:

if stat == "rms":
    stat_name = "RMSE"
elif stat == "rmsc":
    stat_name = "Centered RMSE"
elif stat == "stdv_pc_ratio_to_obs":
    stat_name = "Amplitude ratio to OBS"

Provide directory path and filename in the PMP results archive.

[4]:

for mode in modes:
    if mode in ['PDO', 'NPGO']:
        ref = "HadISSTv1.1"
    else:
        ref = "NOAA-CIRES_20CR"

    if mode in ['NPO', 'NPGO']:
        eof = "EOF2"
    else:
        eof = "EOF1"

    path = os.path.join("metrics_results/variability_modes/"+mip+"/"+exp+"/"+data_version,
                        mode, ref,
                        "_".join(["var", "mode", mode, eof, "stat", mip, exp, "mo_atm_allModels_allRuns_1900-2005.json"]))
    download_archived_results(path, json_dir)

Check JSON files

[5]:

json_list = sorted(glob.glob(os.path.join(json_dir, 'var_mode_*' + mip + '*' + '.json')))
for json_file in json_list:
    print(json_file.split('/')[-1])

var_mode_NAM_EOF1_stat_cmip6_historical_mo_atm_allModels_allRuns_1900-2005.json
var_mode_NAO_EOF1_stat_cmip6_historical_mo_atm_allModels_allRuns_1900-2005.json
var_mode_NPGO_EOF2_stat_cmip6_historical_mo_atm_allModels_allRuns_1900-2005.json
var_mode_NPO_EOF2_stat_cmip6_historical_mo_atm_allModels_allRuns_1900-2005.json
var_mode_PDO_EOF1_stat_cmip6_historical_mo_atm_allModels_allRuns_1900-2005.json
var_mode_PNA_EOF1_stat_cmip6_historical_mo_atm_allModels_allRuns_1900-2005.json
var_mode_SAM_EOF1_stat_cmip6_historical_mo_atm_allModels_allRuns_1900-2005.json

[6]:

cmip_result_dict = dict()
modes = list()

for json_file in json_list:
    mode = json_file.split('/')[-1].split('_')[2]
    modes.append(mode)
    with open(json_file) as fj:
        dict_temp = json.load(fj)['RESULTS']
        cmip_result_dict[mode] = dict_temp

[ ]:

def dict_to_df(cmip_result_dict):
    models = sorted(list(cmip_result_dict['NAM'].keys()))

    df = pd.DataFrame()
    df['model'] = models
    df['num_runs'] = np.nan

    mode_season_list = list()

    modes = ['SAM', 'NAM', 'NAO', 'NPO', 'PNA', 'NPGO', 'PDO']
    for mode in modes:
        if mode in ['PDO', 'NPGO']:
            seasons = ['monthly']
        else:
            seasons = ['DJF', 'MAM', 'JJA', 'SON']

        for season in seasons:
            df[mode+"_"+season] = np.nan
            mode_season_list.append(mode+"_"+season)
            for index, model in enumerate(models):
                if model in list(cmip_result_dict[mode].keys()):
                    runs = sort_human(list(cmip_result_dict[mode][model].keys()))
                    stat_run_list = list()
                    for run in runs:
                        stat_run = cmip_result_dict[mode][model][run]['defaultReference'][mode][season]['cbf'][stat]
                        stat_run_list.append(stat_run)
                    stat_model = np.average(np.array(stat_run_list))
                    num_runs = len(runs)
                    df.at[index, mode+"_"+season] = stat_model
                    if np.isnan(df.at[index, 'num_runs']):
                        df.at[index, 'num_runs'] = num_runs
                else:
                    stat_model = np.nan
                    num_runs = 0
    return df, mode_season_list

[8]:

df, mode_season_list = dict_to_df(cmip_result_dict)
df

[8]:

	model	num_runs	SAM_DJF	SAM_MAM	SAM_JJA	SAM_SON	NAM_DJF	NAM_MAM	NAM_JJA	NAM_SON	...	NPO_DJF	NPO_MAM	NPO_JJA	NPO_SON	PNA_DJF	PNA_MAM	PNA_JJA	PNA_SON	NPGO_monthly	PDO_monthly
0	ACCESS-CM2	5.0	0.574046	0.380609	0.544973	0.391616	0.571041	0.459388	0.240580	0.342555	...	0.732129	0.460701	0.288707	0.306164	0.650744	0.569345	0.286248	0.256308	0.136352	0.122066
1	ACCESS-ESM1-5	40.0	0.602834	0.414287	0.608346	0.610213	0.506217	0.412721	0.264908	0.354789	...	0.579083	0.350249	0.269973	0.241847	0.476215	0.388328	0.269333	0.287768	0.126786	0.124546
2	AWI-CM-1-1-MR	5.0	0.508376	0.366660	0.796009	0.917559	0.587488	0.392637	0.228123	0.323303	...	0.682399	0.413841	0.224981	0.406585	0.714271	0.464686	0.276964	0.328163	0.112087	0.136091
3	AWI-ESM-1-1-LR	1.0	0.544485	0.398040	0.634991	1.008028	0.802982	0.475390	0.257938	0.404807	...	1.065016	0.574711	0.268740	0.307862	1.031246	0.445994	0.295466	0.302287	0.113582	0.194669
4	BCC-CSM2-MR	3.0	0.465351	0.420144	0.624178	0.645676	0.718848	0.728025	0.403785	0.701120	...	0.763210	0.769850	0.500015	0.591631	0.790271	0.886669	0.483820	0.563353	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
60	NorESM2-MM	3.0	0.486581	0.399562	0.656545	0.633588	0.548388	0.588263	0.309797	0.411143	...	0.307830	0.447543	0.384719	0.251864	0.633810	0.720310	0.375183	0.338101	0.105681	0.186747
61	SAM0-UNICON	1.0	0.511490	0.412573	0.708228	0.679179	0.619961	0.641307	0.233174	0.375586	...	0.582020	0.343711	0.288251	0.473160	0.684519	0.619769	0.267318	0.387104	0.137859	0.133782
62	TaiESM1	2.0	0.469439	0.392923	0.598709	0.523395	0.669217	0.616860	0.276326	0.450474	...	0.339072	0.337362	0.336802	0.414093	0.580974	0.631163	0.306005	0.524750	0.137300	0.136674
63	UKESM1-0-LL	19.0	0.586528	0.352492	0.513565	0.476506	0.520315	0.506951	0.227039	0.312856	...	0.673702	0.556370	0.251275	0.250827	0.597751	0.644911	0.240213	0.237588	0.146209	0.145564
64	UKESM1-1-LL	1.0	0.565858	0.309672	0.426744	0.498278	0.541460	0.355031	0.265088	0.262266	...	0.482643	0.409192	0.281857	0.265768	0.480849	0.355581	0.251933	0.164412	NaN	NaN

65 rows × 24 columns

[9]:

df_combined = df

[10]:

model_labels = [m + ' (' + str(int(r)) + ')' for m, r in zip(df_combined["model"].to_list(), df_combined["num_runs"].to_list())]

Prepare input for portrait plot plotting function

[11]:

landscape = True
#landscape = False

data = dict()

if landscape:
    data = df_combined[mode_season_list].to_numpy().T
else:
    data = df_combined[mode_season_list].to_numpy()

[12]:

models = df_combined.index.values.tolist()

print('data.shape:', data.shape)
print('len(mode_season_list): ', len(mode_season_list))
print('len(models): ', len(models))

data.shape: (22, 65)
len(mode_season_list):  22
len(models):  65

[13]:

if landscape:
    yaxis_labels = mode_season_list
    xaxis_labels = model_labels
else:
    xaxis_labels = mode_season_list
    yaxis_labels = model_labels

1.2 Normalize each column by its median for portrait plot

Use normalize_by_median function.

Parameters

data: 2d numpy array
axis: 0 (normalize each column) or 1 (normalize each row), default=0

Return

data_nor: 2d numpy array

[14]:

if landscape:
    axis = 1
    figsize = (40, 10)
else:
    axis = 0
    figsize = (18, 25)

[15]:

from pcmdi_metrics.graphics import normalize_by_median

if stat not in ["stdv_pc_ratio_to_obs"]:
    data_nor = normalize_by_median(data, axis=axis)
    cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5]
    vertical_center = "median"
    cmap = 'RdYlBu_r'
else:
    data_nor = data
    cmap_bounds = [0.5, 0.7, 0.9, 1.1, 1.3, 1.5]
    cmap_bounds = [r/10 for r in range(5, 16, 1)]
    vertical_center = 1
    cmap = 'jet'

[16]:

data_nor.shape

[16]:

(22, 65)

2. Portrait Plot

Use Matplotlib-based PMP Visualization Function. Detailed description for the functions parameters and returns can be found in the API documentation.

[17]:

from pcmdi_metrics.graphics import portrait_plot

Portrait Plot

[18]:

fig, ax, cbar = portrait_plot(data_nor,
                              xaxis_labels=xaxis_labels,
                              yaxis_labels=yaxis_labels,
                              cbar_label=stat_name,
                              box_as_square=True,
                              vrange=(-0.5, 0.5),
                              figsize=figsize,
                              cmap=cmap,
                              cmap_bounds=cmap_bounds,
                              cbar_kw={"extend": "both"},
                              missing_color='white',
                              legend_box_xy=(1.11, 1.21),
                              legend_box_size=4,
                              legend_lw=1,
                              legend_fontsize=15,
                              logo_rect = [0.67, 1, 0.15, 0.15],
                             )
ax.set_xticklabels(xaxis_labels, rotation=90, va='center', ha="left")

# Add title
ax.set_title("Variability Modes: "+stat_name, fontsize=30, pad=30)

[18]:

Text(0.5, 1.0, 'Variability Modes: RMSE')

../_images/examples_variability_modes_plots_all-stats_27_1.png

[19]:

# Save figure as an image file
fig.savefig('_'.join(['variability_modes_portrait_plot', mip, exp, stat])+'.png', facecolor='w', bbox_inches='tight')

3. Parallel Coordinate Plot

[20]:

data = df_combined[mode_season_list].to_numpy()
model_names = model_labels
metric_names = mode_season_list
model_highlights = None
print('data.shape:', data.shape)
print('len(metric_names): ', len(metric_names))
print('len(model_names): ', len(model_names))

data.shape: (65, 22)
len(metric_names):  22
len(model_names):  65

Use parallel coordinate plot function of PMP

Detailed description for the functions parameters and returns can be found in the API documentation.

[21]:

from pcmdi_metrics.graphics import parallel_coordinate_plot

3.1 Show all models

[22]:

fig, ax = parallel_coordinate_plot(data, metric_names, model_names,
                                   title='Variability Modes: '+stat_name,
                                   figsize=(21, 7),
                                   colormap='tab20',
                                   show_boxplot=False,
                                   show_violin=True,
                                   violin_colors=("lightgrey", "pink"),
                                   xtick_labelsize=10,
                                   logo_rect=[0.8, 0.8, 0.15, 0.15],
                                   comparing_models=model_highlights,
                                   vertical_center=vertical_center,
                                   vertical_center_line=True
                                  )

ax.set_xticklabels(metric_names, rotation=30, va='top', ha="right")

[22]:

[Text(0, 0, 'SAM_DJF'),
 Text(1, 0, 'SAM_MAM'),
 Text(2, 0, 'SAM_JJA'),
 Text(3, 0, 'SAM_SON'),
 Text(4, 0, 'NAM_DJF'),
 Text(5, 0, 'NAM_MAM'),
 Text(6, 0, 'NAM_JJA'),
 Text(7, 0, 'NAM_SON'),
 Text(8, 0, 'NAO_DJF'),
 Text(9, 0, 'NAO_MAM'),
 Text(10, 0, 'NAO_JJA'),
 Text(11, 0, 'NAO_SON'),
 Text(12, 0, 'NPO_DJF'),
 Text(13, 0, 'NPO_MAM'),
 Text(14, 0, 'NPO_JJA'),
 Text(15, 0, 'NPO_SON'),
 Text(16, 0, 'PNA_DJF'),
 Text(17, 0, 'PNA_MAM'),
 Text(18, 0, 'PNA_JJA'),
 Text(19, 0, 'PNA_SON'),
 Text(20, 0, 'NPGO_monthly'),
 Text(21, 0, 'PDO_monthly')]

../_images/examples_variability_modes_plots_all-stats_34_1.png

[23]:

# Save figure as an image file
fig.savefig('_'.join(['variability_modes_parallel_coordinate_plot', mip, exp, stat])+'.png', facecolor='w', bbox_inches='tight')

3.2 Highlight specific models

[24]:

model_highlights = ['E3SM-1-0 (5)', 'E3SM-1-1 (1)']

fig, ax = parallel_coordinate_plot(data, metric_names, model_names,
                                   models_to_highlight=model_highlights,
                                   models_to_highlight_colors=['blue', 'red'],
                                   title='Variability Modes: '+stat_name,
                                   figsize=(21, 7),
                                   colormap='tab20',
                                   show_boxplot=False,
                                   show_violin=True,
                                   violin_colors=("lightgrey", "pink"),
                                   xtick_labelsize=10,
                                   logo_rect=[0.8, 0.8, 0.15, 0.15],
                                   comparing_models=model_highlights,
                                   vertical_center=vertical_center,
                                   vertical_center_line=True
                                  )

ax.set_xticklabels(metric_names, rotation=30, va='top', ha="right")

[24]:

[Text(0, 0, 'SAM_DJF'),
 Text(1, 0, 'SAM_MAM'),
 Text(2, 0, 'SAM_JJA'),
 Text(3, 0, 'SAM_SON'),
 Text(4, 0, 'NAM_DJF'),
 Text(5, 0, 'NAM_MAM'),
 Text(6, 0, 'NAM_JJA'),
 Text(7, 0, 'NAM_SON'),
 Text(8, 0, 'NAO_DJF'),
 Text(9, 0, 'NAO_MAM'),
 Text(10, 0, 'NAO_JJA'),
 Text(11, 0, 'NAO_SON'),
 Text(12, 0, 'NPO_DJF'),
 Text(13, 0, 'NPO_MAM'),
 Text(14, 0, 'NPO_JJA'),
 Text(15, 0, 'NPO_SON'),
 Text(16, 0, 'PNA_DJF'),
 Text(17, 0, 'PNA_MAM'),
 Text(18, 0, 'PNA_JJA'),
 Text(19, 0, 'PNA_SON'),
 Text(20, 0, 'NPGO_monthly'),
 Text(21, 0, 'PDO_monthly')]

../_images/examples_variability_modes_plots_all-stats_37_1.png