From ec8269fd89b316490d1fe4ba2425660e93c5d16d Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Wed, 15 Jan 2025 15:53:35 -0700 Subject: [PATCH 01/44] add todos --- .../core_scripts/gsistats_timeseries.py | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 2706a3b..2c57aa4 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -35,7 +35,7 @@ ) CONFIG_FILE = 'agu_full.mplstyle' -friendly_names_dict={"scout_run_v1":"NOAA Scout Run", "NASA_GEOSIT_GSISTATS":"NASA GEOS-IT", "std_GSIstage_1":"STD", "bias_post_corr_GSIstage_1":"Bias"} +friendly_names_dict={"scout_run_v1":"NOAA Scout Run", "NASA_GEOSIT_GSISTATS":"NASA GEOS-IT", "std_GSIstage_1":"STD", "bias_post_corr_GSIstage_1":"Bias"} # could this be done by string matching for the std/bias etc part? we could have a basic friendly dict for that def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, select_sat_name=True, @@ -50,7 +50,7 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, ], sat_name = 'NOAA 15', start_date = '1979-01-01 00:00:00', - stop_date = '2001-06-01 00:00:00'): + stop_date = '2001-06-01 00:00:00'): #default start and stop date should be 1979 - 2025? #TODO """modify the above input variables to configure and generate time series data for various GSI related statistics @@ -84,7 +84,7 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, timeseries_data.plot() plt.suptitle(experiment_name) #plt.show() - metric_string = array_metric_type.split('%')[1] #this won't always work if you give a specific sensor value + metric_string = array_metric_type.split('%')[1] #this won't always work if you give a specific sensor value #TODO plt.savefig(os.path.join( 'results', f'gsi{metric_string}{experiment_name}.png'), @@ -93,7 +93,7 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, elif make_line_plot: stat_label = 'bias_post_corr_GSIstage_1' - #stat_label = 'std_GSIstage_1' + #stat_label = 'std_GSIstage_1' #TODO sensor_label = 'n15_amsua' y_min = -0.5 y_max = 0.6 @@ -121,7 +121,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, 'amsua_nobs_used_%' ], sat_name = 'NOAA 15', - channel_indices = [4, 5, 6, 7], #this is the specific location in the array, not based on the channel name that needs to be expanded + channel_indices = [4, 5, 6, 7], #this is the specific location in the array, not based on the channel name that needs to be expanded #TODO start_date = '1999-01-01 00:00:00', stop_date = '2001-06-01 00:00:00'): """modify the above input variables to configure and generate time series @@ -135,7 +135,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, CONFIG_PATH, CONFIG_FILE ) - if make_plot or make_line_plot: + if make_line_plot: plt.style.use(style_file) plt.rcParams['font.size'] = 20 @@ -179,11 +179,11 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, # stat_label = 'amsua_bias_post_corr_GSIstage_1' # sensor_label = 'n15_amsua' - plot_experiment_comparison(experiment_timeseries, experiment_list, ".", "5, 6, 7, 8", ['#E4002B', '#003087'], 0) + plot_experiment_comparison(experiment_timeseries, experiment_list, ".", "5, 6, 7, 8", ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff - #plot_experiment_comparison_multi_stat(experiment_timeseries, experiment_list, ".", "8", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) + #plot_experiment_comparison_multi_stat(experiment_timeseries, experiment_list, ".", "8", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways - #plot_experiment_comparison_by_channel(experiment_timeseries, experiment_list, ".", channel_indices) + #plot_experiment_comparison_by_channel(experiment_timeseries, experiment_list, ".", channel_indices) #TODO make accessible via code pathways else: timeseries_data.print_init_time() @@ -330,6 +330,12 @@ def build(self, all_channel_max=True, all_channel_mean=False, by_channel=False): #print(gsi_stage, stat_name, sensor_label, self.sensorlabel_dict[sensor_label]) + def get_channel_indices_from_names(self, channel_names): #TODO: current function + channel_indices_names = ['index', 'name'] #store this and then we can pull from it (we need to be able to pull the name back out for plotting the values) + self.channel_indices = dict() + + + def flatten(self): self.unique_stat_list = extract_unique_stats( set(self.data_frame['metric_name'])) @@ -396,7 +402,7 @@ def flatten(self): #print(gsi_stage, stat_name, sensor_label, self.sensorlabel_dict[sensor_label]) - #right now this function just selects by the channel indices but it should be expanded to use channel names and then applied to the value indices + #right now this function just selects by the channel indices but it should be expanded to use channel names and then applied to the value indices #TODO Channel stuff def flatten_by_channel(self, channel_indices): if channel_indices is None: self.flatten() #do a basic full flatten instead @@ -428,8 +434,10 @@ def flatten_by_channel(self, channel_indices): row.time_valid.day, row.time_valid.year,) + row.array_index_values #has the channel names to index + #flatten to only include given channels - value = np.nansum([np.nan if row.value[i] is None else row.value[i] for i in channel_indices if i < len(row.value)]) + value = np.nansum([np.nan if row.value[i] is None else row.value[i] for i in channel_indices if i < len(row.value)])#TODO - this is the line that needs to change # Check if stat_label exists in timestamp_dict if stat_label not in self.timestamp_dict: @@ -931,7 +939,7 @@ def plot_experiment_comparison_by_channel(timeseries_dict, experiment_list, outp def main(): - run() + run_line_plot() if __name__=='__main__': main() From a634d49c20d807b41bced3d6e453d0044c23f0bc Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Wed, 15 Jan 2025 15:57:34 -0700 Subject: [PATCH 02/44] update the gitignore for .png --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f597916..aa61e6e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ __pycache__ .DS_Store .vscode/ -.png +*.png From 956757bcce6c2cc22bf7b4725190594d40f9434b Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Fri, 24 Jan 2025 16:42:19 -0700 Subject: [PATCH 03/44] channels need to be string and ints, need to fix that... --- .../core_scripts/gsistats_timeseries.py | 127 +++++++++--------- 1 file changed, 67 insertions(+), 60 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 2c57aa4..262071f 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -49,8 +49,9 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, #'amsua_use_%' ], sat_name = 'NOAA 15', + channel_list = ['5','6','7'], start_date = '1979-01-01 00:00:00', - stop_date = '2001-06-01 00:00:00'): #default start and stop date should be 1979 - 2025? #TODO + stop_date = '2024-06-01 00:00:00'): #default start and stop date should be 1979 - 2025? #TODO """modify the above input variables to configure and generate time series data for various GSI related statistics @@ -76,9 +77,9 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, array_metric_types=array_metric_type, select_sat_name=select_sat_name, sat_name=sat_name) - timeseries_data.build(all_channel_max=False, # set max or mean - all_channel_mean=False, - by_channel=True) # other False + # timeseries_data.build(all_channel_max=False, # set max or mean #TODO: uncomment this, it should be under make_plot most likely + # all_channel_mean=False, + # by_channel=True) # other False #TODO remove these hardcodes if make_plot: timeseries_data.plot() @@ -97,7 +98,8 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, sensor_label = 'n15_amsua' y_min = -0.5 y_max = 0.6 - timeseries_data.plot_line_plot(stat_label=stat_label, sensor_label=sensor_label, experiment_name=experiment_name, y_min=y_min, y_max=y_max) + timeseries_data.flatten_by_channel(channel_list) + timeseries_data.plot_line_plot(stat_label=stat_label, sensor_label=sensor_label, experiment_name=experiment_name, channels_to_plot=channel_list, y_min=y_min, y_max=y_max) metric_string = array_metric_type.split('%')[0] #again not expandable plt.savefig(os.path.join( #'results', @@ -121,7 +123,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, 'amsua_nobs_used_%' ], sat_name = 'NOAA 15', - channel_indices = [4, 5, 6, 7], #this is the specific location in the array, not based on the channel name that needs to be expanded #TODO + channel_list = [5, 6, 7, 8], start_date = '1999-01-01 00:00:00', stop_date = '2001-06-01 00:00:00'): """modify the above input variables to configure and generate time series @@ -157,7 +159,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, sat_name=sat_name) # Flatten data for the selected channels - timeseries_data.flatten_by_channel(channel_indices=channel_indices) + timeseries_data.flatten_by_channel(channel_list=channel_list) # Store the timeseries data by experiment name and array metric type experiment_timeseries[experiment_name][array_metric_type] = timeseries_data @@ -171,7 +173,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, array_metric_types=array_metric_type, select_sat_name=select_sat_name, sat_name=sat_name) - timeseries_data.flatten_by_channel(channel_indices=channel_indices) + timeseries_data.flatten_by_channel(channel_list=channel_list) #timeseries_data.build(by_channel=True) experiment_timeseries[experiment_name] = timeseries_data @@ -330,11 +332,6 @@ def build(self, all_channel_max=True, all_channel_mean=False, by_channel=False): #print(gsi_stage, stat_name, sensor_label, self.sensorlabel_dict[sensor_label]) - def get_channel_indices_from_names(self, channel_names): #TODO: current function - channel_indices_names = ['index', 'name'] #store this and then we can pull from it (we need to be able to pull the name back out for plotting the values) - self.channel_indices = dict() - - def flatten(self): self.unique_stat_list = extract_unique_stats( @@ -402,12 +399,8 @@ def flatten(self): #print(gsi_stage, stat_name, sensor_label, self.sensorlabel_dict[sensor_label]) - #right now this function just selects by the channel indices but it should be expanded to use channel names and then applied to the value indices #TODO Channel stuff - def flatten_by_channel(self, channel_indices): - if channel_indices is None: - self.flatten() #do a basic full flatten instead - return + def flatten_by_channel(self, channel_list): self.unique_stat_list = extract_unique_stats( set(self.data_frame['metric_name'])) @@ -415,9 +408,9 @@ def flatten_by_channel(self, channel_indices): self.timelabel_dict = dict() self.value_dict = dict() - self.sensorlabel_list = list() self.statlabel_list = list() + self.channel_list = list() yval = 0 for row in self.data_frame.itertuples(): metric_name_parts = row.metric_name.split('_') @@ -434,45 +427,58 @@ def flatten_by_channel(self, channel_indices): row.time_valid.day, row.time_valid.year,) - row.array_index_values #has the channel names to index - #flatten to only include given channels - value = np.nansum([np.nan if row.value[i] is None else row.value[i] for i in channel_indices if i < len(row.value)])#TODO - this is the line that needs to change + for i, channel in enumerate(row.array_index_values): + if channel_list is not None and channel not in channel_list: #TODO: check this functionality + continue - # Check if stat_label exists in timestamp_dict - if stat_label not in self.timestamp_dict: - self.timestamp_dict[stat_label] = {} # Create the first level dictionary for stat_label + #value = np.nansum([np.nan if row.value[i] is None else row.value[i] for i in channel_indices if i < len(row.value)])#TODO - this is the line that needs to change + value = np.nan if row.value[i] is None else row.value[i] - # Check if sensor_label exists under stat_label in timestamp_dict - if sensor_label not in self.timestamp_dict[stat_label]: - self.timestamp_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label + # Check if stat_label exists in timestamp_dict + if stat_label not in self.timestamp_dict: + self.timestamp_dict[stat_label] = {} # Create the first level dictionary for stat_label - # Check if stat_label exists in timelabel_dict - if stat_label not in self.timelabel_dict: - self.timelabel_dict[stat_label] = {} # Create the first level dictionary for stat_label + # Check if sensor_label exists under stat_label in timestamp_dict + if sensor_label not in self.timestamp_dict[stat_label]: + self.timestamp_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label - # Check if sensor_label exists under stat_label in timelabel_dict - if sensor_label not in self.timelabel_dict[stat_label]: - self.timelabel_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label - - # Check if stat_label exists in timelabel_dict - if stat_label not in self.value_dict: - self.value_dict[stat_label] = {} # Create the first level dictionary for stat_label + #Check if channel + if channel not in self.timestamp_dict[stat_label][sensor_label]: + self.timestamp_dict[stat_label][sensor_label][channel] = [] - # Check if sensor_label exists under stat_label in timelabel_dict - if sensor_label not in self.value_dict[stat_label]: - self.value_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label + # Check if stat_label exists in timelabel_dict + if stat_label not in self.timelabel_dict: + self.timelabel_dict[stat_label] = {} # Create the first level dictionary for stat_label - #print(gsi_stage, stat_name, sensor_label, time_label, value) - self.timestamp_dict[stat_label][sensor_label].append(timestamp) - self.timelabel_dict[stat_label][sensor_label].append(time_label) - self.value_dict[stat_label][sensor_label].append(value) - - if not sensor_label in self.sensorlabel_list: - self.sensorlabel_list.append(sensor_label) - - if not stat_label in self.statlabel_list: - self.statlabel_list.append(stat_label) + # Check if sensor_label exists under stat_label in timelabel_dict + if sensor_label not in self.timelabel_dict[stat_label]: + self.timelabel_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label + + # Check if stat_label exists in timelabel_dict + if stat_label not in self.value_dict: + self.value_dict[stat_label] = {} # Create the first level dictionary for stat_label + + # Check if sensor_label exists under stat_label in timelabel_dict + if sensor_label not in self.value_dict[stat_label]: + self.value_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label + + if channel not in self.value_dict[stat_label][sensor_label]: + self.value_dict[stat_label][sensor_label][channel] = [] + + #print(gsi_stage, stat_name, sensor_label, time_label, value) + self.timestamp_dict[stat_label][sensor_label][channel].append(timestamp) + self.timelabel_dict[stat_label][sensor_label][channel].append(time_label) + self.value_dict[stat_label][sensor_label][channel].append(value) + + if not sensor_label in self.sensorlabel_list: + self.sensorlabel_list.append(sensor_label) + + if not stat_label in self.statlabel_list: + self.statlabel_list.append(stat_label) + + if not channel in self.channel_list: + self.channel_list.append(channel) def plot(self, all_channel_mean=False, all_channel_max=True): """demonstrate how to plot metrics stored in a backened SQL database @@ -564,7 +570,7 @@ def print_init_time(self): print("GSIStatsTimeSeries object init date and time: ", f"{self.init_datetime}") - def plot_line_plot(self, stat_label, sensor_label, experiment_name, channels_to_plot=[4, 5, 6, 7], y_min=None, y_max=None): + def plot_line_plot(self, stat_label, sensor_label, experiment_name, channels_to_plot, y_min=None, y_max=None): """ Plot time series for specified stat_label, sensor_label, and channels. @@ -584,16 +590,16 @@ def plot_line_plot(self, stat_label, sensor_label, experiment_name, channels_to_ for channel in channels_to_plot: try: # Extract the values for the specified stat_label and sensor_label - channel_values = self.value_dict[stat_label][sensor_label] + channel_values = self.value_dict[stat_label][sensor_label][channel] # Extract the corresponding timestamps - timestamps = self.timestamp_dict[stat_label][sensor_label] + timestamps = self.timestamp_dict[stat_label][sensor_label][channel] # Ensure the channel index is valid and plot the values if len(channel_values) > channel: - plt.plot(timestamps, [v[channel] for v in channel_values], label=f'Channel {channel + 1}', alpha=0.7) + plt.plot(timestamps, channel_values, label=f'Channel {channel}', alpha=0.7) else: - print(f"Channel {channel + 1} not found for {stat_label}, {sensor_label}") + print(f"Channel {channel} not found for {stat_label}, {sensor_label}") except KeyError as e: print(f"Missing data for {stat_label}, {sensor_label}: {e}") @@ -736,7 +742,7 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha # Safely access the nested dictionary for time_valid and value time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) - + #TODO: update to handle channels? # Check if data exists for the stat_label and sensor_label if time_valid and value: # Plot the data for this experiment @@ -823,7 +829,7 @@ def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, outp # Safely access the nested dictionary for time_valid and value time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) - + #TODO: update to handle channels? # Check if data exists for the stat_label and sensor_label if time_valid and value: # Plot the data for this experiment and stat_label with custom color @@ -904,7 +910,7 @@ def plot_experiment_comparison_by_channel(timeseries_dict, experiment_list, outp if timeseries_obj: # Safely access the nested dictionary for time_valid and value time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) - value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, [])[channel] + value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, [])[channel] #TODO: does this work with new structure? # Check if data exists for the stat_label and sensor_label if time_valid and value: @@ -939,7 +945,8 @@ def plot_experiment_comparison_by_channel(timeseries_dict, experiment_list, outp def main(): - run_line_plot() + run() + #run_line_plot() if __name__=='__main__': main() From 8720c259c0c2c0084f338de6d7901513afe3b3d0 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Tue, 28 Jan 2025 11:55:07 -0700 Subject: [PATCH 04/44] channel line plot works --- .../core_scripts/gsistats_timeseries.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 262071f..60aa816 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -99,7 +99,7 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, y_min = -0.5 y_max = 0.6 timeseries_data.flatten_by_channel(channel_list) - timeseries_data.plot_line_plot(stat_label=stat_label, sensor_label=sensor_label, experiment_name=experiment_name, channels_to_plot=channel_list, y_min=y_min, y_max=y_max) + timeseries_data.plot_line_plot_by_channel(stat_label=stat_label, sensor_label=sensor_label, experiment_name=experiment_name, channels_to_plot=channel_list, y_min=y_min, y_max=y_max) metric_string = array_metric_type.split('%')[0] #again not expandable plt.savefig(os.path.join( #'results', @@ -441,11 +441,11 @@ def flatten_by_channel(self, channel_list): # Check if sensor_label exists under stat_label in timestamp_dict if sensor_label not in self.timestamp_dict[stat_label]: - self.timestamp_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label + self.timestamp_dict[stat_label][sensor_label] = {} # Second level dictionary #Check if channel if channel not in self.timestamp_dict[stat_label][sensor_label]: - self.timestamp_dict[stat_label][sensor_label][channel] = [] + self.timestamp_dict[stat_label][sensor_label][channel] = [] #Empty list for channel level values # Check if stat_label exists in timelabel_dict if stat_label not in self.timelabel_dict: @@ -453,7 +453,10 @@ def flatten_by_channel(self, channel_list): # Check if sensor_label exists under stat_label in timelabel_dict if sensor_label not in self.timelabel_dict[stat_label]: - self.timelabel_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label + self.timelabel_dict[stat_label][sensor_label] = {} # Second level dictionary + + if channel not in self.timelabel_dict[stat_label][sensor_label]: + self.timelabel_dict[stat_label][sensor_label][channel] = [] #Empty list for channel level values # Check if stat_label exists in timelabel_dict if stat_label not in self.value_dict: @@ -461,10 +464,10 @@ def flatten_by_channel(self, channel_list): # Check if sensor_label exists under stat_label in timelabel_dict if sensor_label not in self.value_dict[stat_label]: - self.value_dict[stat_label][sensor_label] = [] # Create an empty list for sensor_label + self.value_dict[stat_label][sensor_label] = {} # Second level dictionary if channel not in self.value_dict[stat_label][sensor_label]: - self.value_dict[stat_label][sensor_label][channel] = [] + self.value_dict[stat_label][sensor_label][channel] = [] # Empty list for channel level values #print(gsi_stage, stat_name, sensor_label, time_label, value) self.timestamp_dict[stat_label][sensor_label][channel].append(timestamp) @@ -570,7 +573,7 @@ def print_init_time(self): print("GSIStatsTimeSeries object init date and time: ", f"{self.init_datetime}") - def plot_line_plot(self, stat_label, sensor_label, experiment_name, channels_to_plot, y_min=None, y_max=None): + def plot_line_plot_by_channel(self, stat_label, sensor_label, experiment_name, channels_to_plot, y_min=None, y_max=None): """ Plot time series for specified stat_label, sensor_label, and channels. @@ -595,8 +598,8 @@ def plot_line_plot(self, stat_label, sensor_label, experiment_name, channels_to_ # Extract the corresponding timestamps timestamps = self.timestamp_dict[stat_label][sensor_label][channel] - # Ensure the channel index is valid and plot the values - if len(channel_values) > channel: + # Ensure the channel is valid and plot the values + if channel_values is not None: plt.plot(timestamps, channel_values, label=f'Channel {channel}', alpha=0.7) else: print(f"Channel {channel} not found for {stat_label}, {sensor_label}") From ad135efe37df5ffd0a9f9435820badb742f480c8 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Mon, 3 Feb 2025 10:56:45 -0700 Subject: [PATCH 05/44] per channel works --- .../core_scripts/gsistats_timeseries.py | 166 ++++++++++++++++-- 1 file changed, 148 insertions(+), 18 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 60aa816..cf54dde 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -123,9 +123,9 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, 'amsua_nobs_used_%' ], sat_name = 'NOAA 15', - channel_list = [5, 6, 7, 8], + channel_list = ['5', '6', '7', '8'], start_date = '1999-01-01 00:00:00', - stop_date = '2001-06-01 00:00:00'): + stop_date = '2024-12-01 00:00:00'): """modify the above input variables to configure and generate time series data for various GSI related statistics @@ -181,7 +181,9 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, # stat_label = 'amsua_bias_post_corr_GSIstage_1' # sensor_label = 'n15_amsua' - plot_experiment_comparison(experiment_timeseries, experiment_list, ".", "5, 6, 7, 8", ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff + #plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff + + plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087'], 0) #plot_experiment_comparison_multi_stat(experiment_timeseries, experiment_list, ".", "8", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways @@ -736,6 +738,29 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha for sensor_label in sensorlabel_list: plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor combination + # # Loop through each experiment in the experiment list + # for i, experiment_name in enumerate(experiment_list): + # # Access the corresponding GSIStatsTimeSeries object from the dictionary + # timeseries_obj = timeseries_dict.get(experiment_name) + + # if timeseries_obj: + # # Safely access the nested dictionary for time_valid and value + # time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) + # value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) + # #TODO: update to handle channels? + # # Check if data exists for the stat_label and sensor_label + # if time_valid and value: + # # Plot the data for this experiment + # color = expt_colors[i] if expt_colors else None + # experiment_label = experiment_name + # if experiment_name in friendly_names_dict: + # experiment_label = friendly_names_dict[experiment_name] + # plt.plot(time_valid, value, label=experiment_label, alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter + # else: + # print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") + # else: + # print(f"No data for experiment: {experiment_name}") + # Loop through each experiment in the experiment list for i, experiment_name in enumerate(experiment_list): # Access the corresponding GSIStatsTimeSeries object from the dictionary @@ -743,19 +768,19 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha if timeseries_obj: # Safely access the nested dictionary for time_valid and value - time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) - value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) - #TODO: update to handle channels? - # Check if data exists for the stat_label and sensor_label - if time_valid and value: - # Plot the data for this experiment - color = expt_colors[i] if expt_colors else None - experiment_label = experiment_name - if experiment_name in friendly_names_dict: - experiment_label = friendly_names_dict[experiment_name] - plt.plot(time_valid, value, label=experiment_label, alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter - else: - print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") + time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}) + for channel, timestamps in time_valid.items(): + values = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + # Check if data exists for the stat_label and sensor_label + if timestamps and values: + # Plot the data for this experiment + color = expt_colors[i] if expt_colors else None + experiment_label = experiment_name + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + plt.plot(timestamps, values, label=f"{experiment_label} - Ch {channel}", alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter + else: + print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") else: print(f"No data for experiment: {experiment_name}") @@ -786,6 +811,111 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha print(f"Plot saved: {plot_filepath}") + +def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, output_dir, expt_colors=None, y_min=None, y_max=None): + """ + Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. + + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. + - experiment_list: List of experiment names to plot. + - output_dir: Directory where plots will be saved. + """ + + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects + if not timeseries_dict: + print("Error: timeseries_dict is empty.") + return + + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0] + statlabel_list = first_timeseries_obj.statlabel_list + sensorlabel_list = first_timeseries_obj.sensorlabel_list + channel_list = first_timeseries_obj.channel_list + + + # Loop through each stat_label in the statlabel_list + for stat_label in statlabel_list: + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + for channel_label in channel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor-channel combination + + # # Loop through each experiment in the experiment list + # for i, experiment_name in enumerate(experiment_list): + # # Access the corresponding GSIStatsTimeSeries object from the dictionary + # timeseries_obj = timeseries_dict.get(experiment_name) + + # if timeseries_obj: + # # Safely access the nested dictionary for time_valid and value + # time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) + # value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) + # #TODO: update to handle channels? + # # Check if data exists for the stat_label and sensor_label + # if time_valid and value: + # # Plot the data for this experiment + # color = expt_colors[i] if expt_colors else None + # experiment_label = experiment_name + # if experiment_name in friendly_names_dict: + # experiment_label = friendly_names_dict[experiment_name] + # plt.plot(time_valid, value, label=experiment_label, alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter + # else: + # print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") + # else: + # print(f"No data for experiment: {experiment_name}") + + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding GSIStatsTimeSeries object from the dictionary + timeseries_obj = timeseries_dict.get(experiment_name) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + timestamps = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel_label, []) + values = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel_label, []) + # Check if data exists for the stat_label and sensor_label + if timestamps and values: + time_range = (max(timestamps) - min(timestamps)).total_seconds() + new_width = min(max(time_range / 50000, 12), 25) + fig = plt.gcf() + width, height = fig.get_size_inches() + if new_width > width: + fig.set_size_inches(new_width, height) + # Plot the data for this experiment + color = expt_colors[i] if expt_colors else None + experiment_label = experiment_name + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + plt.plot(timestamps, values, label=f"{experiment_label} - Ch {channel_label}", alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter + else: + print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") + + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) + + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'{stat_label}', fontsize=18) + plt.title(f'Comparison of {stat_label} and {sensor_label} across Experiments for Channel {channel_label}', fontsize=18) + plt.legend(fontsize=20) + + # Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) + + # Save the plot to the specified output directory + plot_filename = f'{stat_label}_{sensor_label}_{channel_label}_comparison.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) + + # Close the plot after saving + plt.close() + + print(f"Plot saved: {plot_filepath}") + def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, output_dir, channel_list, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. @@ -948,8 +1078,8 @@ def plot_experiment_comparison_by_channel(timeseries_dict, experiment_list, outp def main(): - run() - #run_line_plot() + #run() + run_line_plot() if __name__=='__main__': main() From 106021fcdfa39446b66e6b282f71437247bafe95 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Mon, 3 Feb 2025 14:24:09 -0700 Subject: [PATCH 06/44] multi stat works --- .../core_scripts/gsistats_timeseries.py | 232 ++++++++++++++---- 1 file changed, 178 insertions(+), 54 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index cf54dde..eae0d60 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -111,19 +111,19 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, # separate to be able to plot experiments on the same graphic / flattens data for now def run_line_plot(make_line_plot=True, select_array_metric_types=True, - select_sat_name=True, multi_stat=False, + select_sat_name=True, multi_stat=True, experiment_list=[#'scout_run_v1', 'NASA_GEOSIT_GSISTATS', 'scout_run_v1' #'scout_runs_gsi3dvar_1979stream' ], - array_metrics_list=[#'amsua_std_%', - #'amsua_bias_post_corr_GSIstage_%', + array_metrics_list=['amsua_std_%', + 'amsua_bias_post_corr_GSIstage_%', #'%_variance_%', - 'amsua_nobs_used_%' + #'amsua_nobs_used_%' ], sat_name = 'NOAA 15', - channel_list = ['5', '6', '7', '8'], + channel_list = ['3', '6', '8', '10'], start_date = '1999-01-01 00:00:00', stop_date = '2024-12-01 00:00:00'): """modify the above input variables to configure and generate time series @@ -183,11 +183,14 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, #plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff - plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087'], 0) + #plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087'], 0) #plot_experiment_comparison_multi_stat(experiment_timeseries, experiment_list, ".", "8", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways - #plot_experiment_comparison_by_channel(experiment_timeseries, experiment_list, ".", channel_indices) #TODO make accessible via code pathways + #plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways + + plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways + else: timeseries_data.print_init_time() @@ -1005,76 +1008,197 @@ def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, outp print(f"Plot saved: {plot_filepath}") +def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): + """ + Plot time series for multiple experiments for each stat and sensor combination, and save each plot. + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. + - experiment_list: List of experiment names to plot. + - output_dir: Directory where plots will be saved. + - channel_list: List of channel indices to plot. + - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). + - array_metrics_list: List of array metric types corresponding to the stat labels. + - line_colors: List of colors for each line (optional). If None, default colors will be used. + """ -#in progress, not being used yet successfully -def plot_experiment_comparison_by_channel(timeseries_dict, experiment_list, output_dir, channels_to_plot): + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects + if not timeseries_dict: + print("Error: timeseries_dict is empty.") + return + + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0].values() + sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list + channel_list = list(first_timeseries_obj)[0].channel_list + + # Set default line colors if none are provided + # if line_colors is None: + # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) + + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination + + for channel in channel_list: + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding dictionary for the experiment + experiment_data = timeseries_dict.get(experiment_name) + + if experiment_data: + # Loop through both stat labels in the stat_pair + for j, stat_label in enumerate(stat_pair): + array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type + timeseries_obj = experiment_data.get(array_metric_type) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + # Check if data exists for the stat_label and sensor_label + if time_valid and value: + time_range = (max(time_valid) - min(time_valid)).total_seconds() + new_width = min(max(time_range / 50000, 12), 25) + fig = plt.gcf() + width, height = fig.get_size_inches() + if new_width > width: + fig.set_size_inches(new_width, height) + # Plot the data for this experiment and stat_label with custom color + color = line_colors[i][j] if line_colors else None + experiment_label = experiment_name + stat_friendly = stat_label + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + if stat_label in friendly_names_dict: + stat_friendly = friendly_names_dict[stat_label] + plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly} - Ch {channel}', color=color, alpha=0.6) + else: + print(f"No data for {stat_label}, {sensor_label}, {channel} in experiment: {experiment_name}") + else: + print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") + + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) + + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'Statistic Values', fontsize=18) + plt.title(f'Comparison of NOAA and NASA Experiments for Channel {channel_list}', fontsize=18) + plt.legend(fontsize=20) + + #Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) + + # Save the plot to the specified output directory + plot_filename = f'{sensor_label}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) + + # Close the plot after saving + plt.close() + + print(f"Plot saved: {plot_filepath}") + +def plot_experiment_comparison_multi_stat_per_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. - + Parameters: - - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. + - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. - experiment_list: List of experiment names to plot. - output_dir: Directory where plots will be saved. + - channel_list: List of channel indices to plot. + - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). + - array_metrics_list: List of array metric types corresponding to the stat labels. + - line_colors: List of colors for each line (optional). If None, default colors will be used. """ - + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects if not timeseries_dict: print("Error: timeseries_dict is empty.") return - - # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict - first_timeseries_obj = list(timeseries_dict.values())[0] - statlabel_list = first_timeseries_obj.statlabel_list - sensorlabel_list = first_timeseries_obj.sensorlabel_list - for channel in channels_to_plot: - # Loop through each stat_label in the statlabel_list - for stat_label in statlabel_list: - # Loop through each sensor_label in the sensorlabel_list - for sensor_label in sensorlabel_list: - plt.figure(figsize=(12, 8), dpi=300) # Create a new figure for each stat-sensor combination + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0].values() + sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list + channel_list = list(first_timeseries_obj)[0].channel_list - # Loop through each experiment in the experiment list - for experiment_name in experiment_list: - # Access the corresponding GSIStatsTimeSeries object from the dictionary - timeseries_obj = timeseries_dict.get(experiment_name) + # Set default line colors if none are provided + # if line_colors is None: + # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) - if timeseries_obj: - # Safely access the nested dictionary for time_valid and value - time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) - value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, [])[channel] #TODO: does this work with new structure? + for channel in channel_list: + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination - # Check if data exists for the stat_label and sensor_label - if time_valid and value: - # Plot the data for this experiment - plt.plot(time_valid, value, label=experiment_name, alpha=0.7) #set plot for line or bar for bar or scatter for scatter + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding dictionary for the experiment + experiment_data = timeseries_dict.get(experiment_name) + + if experiment_data: + # Loop through both stat labels in the stat_pair + for j, stat_label in enumerate(stat_pair): + array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type + timeseries_obj = experiment_data.get(array_metric_type) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + # Check if data exists for the stat_label and sensor_label + if time_valid and value: + time_range = (max(time_valid) - min(time_valid)).total_seconds() + new_width = min(max(time_range / 50000, 12), 25) + fig = plt.gcf() + width, height = fig.get_size_inches() + if new_width > width: + fig.set_size_inches(new_width, height) + # Plot the data for this experiment and stat_label with custom color + color = line_colors[i][j] if line_colors else None + experiment_label = experiment_name + stat_friendly = stat_label + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + if stat_label in friendly_names_dict: + stat_friendly = friendly_names_dict[stat_label] + plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly}', color=color, alpha=0.6) + else: + print(f"No data for {stat_label}, {sensor_label}, {channel} in experiment: {experiment_name}") else: - print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") - else: - print(f"No data for experiment: {experiment_name}") + print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") - # Add labels and title for the plot - plt.xlabel('Time Valid', fontsize=24) - plt.ylabel(f'{stat_label}', fontsize=24) - plt.title(f'Comparison of {stat_label} and {sensor_label} across Experiments for Channel {channel + 1}') - plt.legend() + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) - # Rotate x-axis labels for readability - # Rotate x-axis labels for readability - plt.xticks(rotation=45, fontsize=24) - plt.yticks(fontsize=24) + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'Statistic Values', fontsize=18) + plt.title(f'Comparison of NOAA and NASA Experiments for Channel {channel}', fontsize=18) + plt.legend(fontsize=20) - # Save the plot to the specified output directory - plot_filename = f'{stat_label}_{sensor_label}_comparison.svg' - plot_filepath = os.path.join(output_dir, plot_filename) - plt.savefig(plot_filepath) + #Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) - # Close the plot after saving - plt.close() + # Save the plot to the specified output directory + plot_filename = f'{sensor_label}_ch{channel}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) - print(f"Plot saved: {plot_filepath}") + # Close the plot after saving + plt.close() + print(f"Plot saved: {plot_filepath}") def main(): From 0e554afa32876ba5581c3d640440407c76103ae0 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Mon, 3 Feb 2025 14:38:23 -0700 Subject: [PATCH 07/44] comparison works --- .../core_scripts/gsistats_timeseries.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index eae0d60..27c7a66 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -111,16 +111,16 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, # separate to be able to plot experiments on the same graphic / flattens data for now def run_line_plot(make_line_plot=True, select_array_metric_types=True, - select_sat_name=True, multi_stat=True, + select_sat_name=True, multi_stat=False, experiment_list=[#'scout_run_v1', 'NASA_GEOSIT_GSISTATS', 'scout_run_v1' #'scout_runs_gsi3dvar_1979stream' ], - array_metrics_list=['amsua_std_%', - 'amsua_bias_post_corr_GSIstage_%', + array_metrics_list=[#'amsua_std_%', + #'amsua_bias_post_corr_GSIstage_%', #'%_variance_%', - #'amsua_nobs_used_%' + 'amsua_nobs_used_%' ], sat_name = 'NOAA 15', channel_list = ['3', '6', '8', '10'], @@ -181,14 +181,19 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, # stat_label = 'amsua_bias_post_corr_GSIstage_1' # sensor_label = 'n15_amsua' - #plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff + #all channels on one plot, single stat + plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff + #Each channel on it's own plot, single stat #plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087'], 0) + #TODO: this is not channel based, should be using flatten? #plot_experiment_comparison_multi_stat(experiment_timeseries, experiment_list, ".", "8", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways + #Multi stat, all channels on the same plot #plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways + #Multi stat, each channel on their own plot plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways else: From fa34de308985e9e81eca5827ce4e95bc7eddd303 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Mon, 3 Feb 2025 14:41:40 -0700 Subject: [PATCH 08/44] function detail comments --- src/score_plotting/core_scripts/gsistats_timeseries.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 27c7a66..57b1ee5 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -719,7 +719,7 @@ def make_line_plot_multi_expt(timeseries_dict, experiment_list): plt.tight_layout() plt.show() - +#This function plots all channels on the same plot, but each stat and sensor combo gets it's own plot def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, channel_list, expt_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. @@ -819,7 +819,7 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha print(f"Plot saved: {plot_filepath}") - +#This function plots each channel, stat, sensor combo on it's own plot def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, output_dir, expt_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. @@ -924,6 +924,7 @@ def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, out print(f"Plot saved: {plot_filepath}") +#This function plots stat and sensor combos on the same plot wihtout regard to separate channels (expects channels averaged and a list of which channels are included for title) def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, output_dir, channel_list, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. @@ -1013,6 +1014,7 @@ def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, outp print(f"Plot saved: {plot_filepath}") +#This function plots stat, sensor, and channels all on the same plot def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. @@ -1109,6 +1111,7 @@ def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experimen print(f"Plot saved: {plot_filepath}") +#This function plots stat and sensor combos on the same plot but each channel receives it's own plot def plot_experiment_comparison_multi_stat_per_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. From 10d0eb860ad0dea75d1ccc607b4d0d4a10c1c0ad Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Wed, 5 Feb 2025 17:07:07 -0700 Subject: [PATCH 09/44] updates including code tree path --- .../core_scripts/gsistats_timeseries.py | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 57b1ee5..c8a0022 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -111,19 +111,21 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, # separate to be able to plot experiments on the same graphic / flattens data for now def run_line_plot(make_line_plot=True, select_array_metric_types=True, - select_sat_name=True, multi_stat=False, + select_sat_name=True, multi_stat=False, per_channel=True, experiment_list=[#'scout_run_v1', 'NASA_GEOSIT_GSISTATS', - 'scout_run_v1' + 'scout_run_v1', #171 + 'replay_observer_diagnostic_v1' #'scout_runs_gsi3dvar_1979stream' ], + experiment_id_list=[185, 171, 175], array_metrics_list=[#'amsua_std_%', #'amsua_bias_post_corr_GSIstage_%', #'%_variance_%', 'amsua_nobs_used_%' ], sat_name = 'NOAA 15', - channel_list = ['3', '6', '8', '10'], + channel_list = None, start_date = '1999-01-01 00:00:00', stop_date = '2024-12-01 00:00:00'): """modify the above input variables to configure and generate time series @@ -147,7 +149,8 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, if multi_stat: experiment_timeseries = dict() - for experiment_name in experiment_list: + #for experiment_name in experiment_list: + for i, experiment_name in enumerate(experiment_list): experiment_timeseries[experiment_name] = dict() # Create a dictionary for each experiment for array_metric_type in array_metrics_list: timeseries_data = GSIStatsTimeSeries( @@ -156,7 +159,8 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, select_array_metric_types=select_array_metric_types, array_metric_types=array_metric_type, select_sat_name=select_sat_name, - sat_name=sat_name) + sat_name=sat_name, + experiment_id=experiment_id_list[i]) # Flatten data for the selected channels timeseries_data.flatten_by_channel(channel_list=channel_list) @@ -180,12 +184,22 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, if make_line_plot: # stat_label = 'amsua_bias_post_corr_GSIstage_1' # sensor_label = 'n15_amsua' + if per_channel: + if multi_stat: + plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#E4002B', '#f2901f'], ['#003087', '#0085CA'], ['#46990f', '#c1e67c']]) #TODO make accessible via code pathways #['#003087', '#0085CA'], colors for scout run + else: + plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087', '#46990f'], 0) + else: + if multi_stat: + plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways + else: + plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff #all channels on one plot, single stat - plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff + #plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff - #Each channel on it's own plot, single stat - #plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087'], 0) + #Each channel on it's own plot, single stat -- requires not the multi stat processing for how it's written right now + #plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087', '#46990f'], 0) #TODO: this is not channel based, should be using flatten? #plot_experiment_comparison_multi_stat(experiment_timeseries, experiment_list, ".", "8", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways @@ -194,7 +208,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, #plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways #Multi stat, each channel on their own plot - plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways + #plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#E4002B', '#f2901f'], ['#003087', '#0085CA'], ['#46990f', '#c1e67c']]) #TODO make accessible via code pathways #['#003087', '#0085CA'], colors for scout run else: timeseries_data.print_init_time() @@ -206,7 +220,8 @@ def __init__(self, start_date, stop_date, select_array_metric_types = True, array_metric_types='%', select_sat_name = False, - sat_name = None + sat_name = None, + experiment_id=None, ): """Download metrics data for given experiment name """ @@ -216,6 +231,7 @@ def __init__(self, start_date, stop_date, self.array_metric_types = array_metric_types self.select_sat_name = select_sat_name self.sat_name = sat_name + self.experiment_id = experiment_id self.get_data_frame(start_date, stop_date) @@ -257,6 +273,14 @@ def get_data_frame(self, start_date, stop_date): request_dict['params']['filters']['sat_meta'] = { 'sat_name': {'exact': self.sat_name} } + + if self.experiment_id is not None: + request_dict['params']['filters']['experiment'] = { + 'experiment_name': + {'exact': + self.experiment_name}, + 'id': self.experiment_id + } db_action_response = score_db_base.handle_request(request_dict) self.data_frame = db_action_response.details['records'] From c181d0afef117098275253cfbf4356649b8cf4c4 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Tue, 11 Feb 2025 10:21:50 -0700 Subject: [PATCH 10/44] everything specified through the top level function --- .../core_scripts/gsistats_timeseries.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index c8a0022..8a06add 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -127,7 +127,14 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, sat_name = 'NOAA 15', channel_list = None, start_date = '1999-01-01 00:00:00', - stop_date = '2024-12-01 00:00:00'): + stop_date = '2024-12-01 00:00:00', + color_list = [['#E4002B', '#f2901f'], + ['#003087', '#0085CA'], + ['#46990f', '#c1e67c']], + stat_pair = ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], + y_min = None, + y_max = None, + output_directory="."): """modify the above input variables to configure and generate time series data for various GSI related statistics @@ -182,18 +189,16 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, experiment_timeseries[experiment_name] = timeseries_data if make_line_plot: - # stat_label = 'amsua_bias_post_corr_GSIstage_1' - # sensor_label = 'n15_amsua' if per_channel: if multi_stat: - plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#E4002B', '#f2901f'], ['#003087', '#0085CA'], ['#46990f', '#c1e67c']]) #TODO make accessible via code pathways #['#003087', '#0085CA'], colors for scout run + plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, color_list, y_min, y_max) else: - plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087', '#46990f'], 0) + plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, output_directory, color_list, y_min, y_max) else: if multi_stat: - plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways + plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, color_list, y_min, y_max) else: - plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff + plot_experiment_comparison(experiment_timeseries, experiment_list, output_directory, channel_list, color_list, y_min, y_max) #all channels on one plot, single stat #plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff From 2106b03d56d6cc489b307a1d7129237828c09837 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Tue, 11 Feb 2025 13:15:45 -0700 Subject: [PATCH 11/44] updated changes --- .../core_scripts/gsistats_timeseries.py | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 8a06add..f91b4bc 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -111,18 +111,17 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, # separate to be able to plot experiments on the same graphic / flattens data for now def run_line_plot(make_line_plot=True, select_array_metric_types=True, - select_sat_name=True, multi_stat=False, per_channel=True, + select_sat_name=True, multi_stat=True, per_channel=True, experiment_list=[#'scout_run_v1', 'NASA_GEOSIT_GSISTATS', 'scout_run_v1', #171 'replay_observer_diagnostic_v1' #'scout_runs_gsi3dvar_1979stream' ], - experiment_id_list=[185, 171, 175], - array_metrics_list=[#'amsua_std_%', - #'amsua_bias_post_corr_GSIstage_%', + array_metrics_list=['amsua_std_%', + 'amsua_bias_post_corr_GSIstage_%', #'%_variance_%', - 'amsua_nobs_used_%' + #'amsua_nobs_used_%' ], sat_name = 'NOAA 15', channel_list = None, @@ -157,7 +156,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, experiment_timeseries = dict() #for experiment_name in experiment_list: - for i, experiment_name in enumerate(experiment_list): + for experiment_name in experiment_list: experiment_timeseries[experiment_name] = dict() # Create a dictionary for each experiment for array_metric_type in array_metrics_list: timeseries_data = GSIStatsTimeSeries( @@ -166,8 +165,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, select_array_metric_types=select_array_metric_types, array_metric_types=array_metric_type, select_sat_name=select_sat_name, - sat_name=sat_name, - experiment_id=experiment_id_list[i]) + sat_name=sat_name) # Flatten data for the selected channels timeseries_data.flatten_by_channel(channel_list=channel_list) @@ -278,14 +276,6 @@ def get_data_frame(self, start_date, stop_date): request_dict['params']['filters']['sat_meta'] = { 'sat_name': {'exact': self.sat_name} } - - if self.experiment_id is not None: - request_dict['params']['filters']['experiment'] = { - 'experiment_name': - {'exact': - self.experiment_name}, - 'id': self.experiment_id - } db_action_response = score_db_base.handle_request(request_dict) self.data_frame = db_action_response.details['records'] From e6d98a9ddbf8e7552f39ee7971a7d44238cb2acb Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Tue, 11 Feb 2025 13:18:30 -0700 Subject: [PATCH 12/44] clean up --- .../core_scripts/gsistats_timeseries.py | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index f91b4bc..e78d4a0 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -109,18 +109,15 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, else: timeseries_data.print_init_time() -# separate to be able to plot experiments on the same graphic / flattens data for now def run_line_plot(make_line_plot=True, select_array_metric_types=True, select_sat_name=True, multi_stat=True, per_channel=True, - experiment_list=[#'scout_run_v1', + experiment_list=[ 'NASA_GEOSIT_GSISTATS', 'scout_run_v1', #171 'replay_observer_diagnostic_v1' - #'scout_runs_gsi3dvar_1979stream' ], array_metrics_list=['amsua_std_%', 'amsua_bias_post_corr_GSIstage_%', - #'%_variance_%', #'amsua_nobs_used_%' ], sat_name = 'NOAA 15', @@ -183,36 +180,24 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, select_sat_name=select_sat_name, sat_name=sat_name) timeseries_data.flatten_by_channel(channel_list=channel_list) - #timeseries_data.build(by_channel=True) experiment_timeseries[experiment_name] = timeseries_data if make_line_plot: if per_channel: if multi_stat: + #mutli stat, each channel on their own plot plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, color_list, y_min, y_max) else: + #singular stat, each channel on their own plot plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, output_directory, color_list, y_min, y_max) else: if multi_stat: + #multi stat, all channels same plot plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, color_list, y_min, y_max) else: + #single stat, all channels same plot plot_experiment_comparison(experiment_timeseries, experiment_list, output_directory, channel_list, color_list, y_min, y_max) - #all channels on one plot, single stat - #plot_experiment_comparison(experiment_timeseries, experiment_list, ".", channel_list, ['#E4002B', '#003087'], 0) #TODO remove the hard coded stuff - - #Each channel on it's own plot, single stat -- requires not the multi stat processing for how it's written right now - #plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, ".", ['#E4002B', '#003087', '#46990f'], 0) - - #TODO: this is not channel based, should be using flatten? - #plot_experiment_comparison_multi_stat(experiment_timeseries, experiment_list, ".", "8", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways - - #Multi stat, all channels on the same plot - #plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#003087', '#0085CA'], ['#E4002B', '#f2901f']], -0.2, 0.4) #TODO make accessible via code pathways - - #Multi stat, each channel on their own plot - #plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, ".", ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], array_metrics_list, [['#E4002B', '#f2901f'], ['#003087', '#0085CA'], ['#46990f', '#c1e67c']]) #TODO make accessible via code pathways #['#003087', '#0085CA'], colors for scout run - else: timeseries_data.print_init_time() From be38e9e1810a7f6baca11eebd46daea6efad5a0c Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Wed, 12 Feb 2025 15:42:43 -0700 Subject: [PATCH 13/44] some clean up --- .../core_scripts/gsistats_timeseries.py | 42 ++++--------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index e78d4a0..7769c5a 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -41,7 +41,7 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, select_sat_name=True, experiment_list=['scout_run_v1', 'NASA_GEOSIT_GSISTATS' - #'scout_runs_gsi3dvar_1979stream'fcdvwq + #'scout_runs_gsi3dvar_1979stream' ], array_metrics_list=['amsua_bias_post_corr_GSIstage_%', #'amsua_std_%', @@ -51,7 +51,7 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, sat_name = 'NOAA 15', channel_list = ['5','6','7'], start_date = '1979-01-01 00:00:00', - stop_date = '2024-06-01 00:00:00'): #default start and stop date should be 1979 - 2025? #TODO + stop_date = '2026-01-01 00:00:00'): """modify the above input variables to configure and generate time series data for various GSI related statistics @@ -77,15 +77,15 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, array_metric_types=array_metric_type, select_sat_name=select_sat_name, sat_name=sat_name) - # timeseries_data.build(all_channel_max=False, # set max or mean #TODO: uncomment this, it should be under make_plot most likely - # all_channel_mean=False, - # by_channel=True) # other False #TODO remove these hardcodes + timeseries_data.build(all_channel_max=False, # set max or mean + all_channel_mean=False, + by_channel=True) # other False if make_plot: timeseries_data.plot() plt.suptitle(experiment_name) #plt.show() - metric_string = array_metric_type.split('%')[1] #this won't always work if you give a specific sensor value #TODO + metric_string = array_metric_type.split('%')[1] #this won't always work if you give a specific sensor value plt.savefig(os.path.join( 'results', f'gsi{metric_string}{experiment_name}.png'), @@ -94,7 +94,7 @@ def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, elif make_line_plot: stat_label = 'bias_post_corr_GSIstage_1' - #stat_label = 'std_GSIstage_1' #TODO + #stat_label = 'std_GSIstage_1' sensor_label = 'n15_amsua' y_min = -0.5 y_max = 0.6 @@ -113,7 +113,7 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, select_sat_name=True, multi_stat=True, per_channel=True, experiment_list=[ 'NASA_GEOSIT_GSISTATS', - 'scout_run_v1', #171 + 'scout_run_v1', 'replay_observer_diagnostic_v1' ], array_metrics_list=['amsua_std_%', @@ -443,10 +443,9 @@ def flatten_by_channel(self, channel_list): for i, channel in enumerate(row.array_index_values): - if channel_list is not None and channel not in channel_list: #TODO: check this functionality + if channel_list is not None and channel not in channel_list: continue - #value = np.nansum([np.nan if row.value[i] is None else row.value[i] for i in channel_indices if i < len(row.value)])#TODO - this is the line that needs to change value = np.nan if row.value[i] is None else row.value[i] # Check if stat_label exists in timestamp_dict @@ -853,29 +852,6 @@ def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, out for channel_label in channel_list: plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor-channel combination - # # Loop through each experiment in the experiment list - # for i, experiment_name in enumerate(experiment_list): - # # Access the corresponding GSIStatsTimeSeries object from the dictionary - # timeseries_obj = timeseries_dict.get(experiment_name) - - # if timeseries_obj: - # # Safely access the nested dictionary for time_valid and value - # time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) - # value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) - # #TODO: update to handle channels? - # # Check if data exists for the stat_label and sensor_label - # if time_valid and value: - # # Plot the data for this experiment - # color = expt_colors[i] if expt_colors else None - # experiment_label = experiment_name - # if experiment_name in friendly_names_dict: - # experiment_label = friendly_names_dict[experiment_name] - # plt.plot(time_valid, value, label=experiment_label, alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter - # else: - # print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") - # else: - # print(f"No data for experiment: {experiment_name}") - # Loop through each experiment in the experiment list for i, experiment_name in enumerate(experiment_list): # Access the corresponding GSIStatsTimeSeries object from the dictionary From e5dd2311df69339b9ca9b1e304e452fd8491e603 Mon Sep 17 00:00:00 2001 From: Jessica Knezha Date: Fri, 14 Feb 2025 12:13:29 -0700 Subject: [PATCH 14/44] clear plot titles with all info --- .../core_scripts/gsistats_timeseries.py | 61 ++++++------------- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 7769c5a..9675fcf 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -114,19 +114,21 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, experiment_list=[ 'NASA_GEOSIT_GSISTATS', 'scout_run_v1', - 'replay_observer_diagnostic_v1' + #'replay_observer_diagnostic_v1' ], array_metrics_list=['amsua_std_%', 'amsua_bias_post_corr_GSIstage_%', #'amsua_nobs_used_%' ], - sat_name = 'NOAA 15', + sensor_name = 'AMSUA', + sat_name = 'NOAA 18', channel_list = None, start_date = '1999-01-01 00:00:00', stop_date = '2024-12-01 00:00:00', color_list = [['#E4002B', '#f2901f'], ['#003087', '#0085CA'], - ['#46990f', '#c1e67c']], + #['#46990f', '#c1e67c'] + ], stat_pair = ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], y_min = None, y_max = None, @@ -186,17 +188,17 @@ def run_line_plot(make_line_plot=True, select_array_metric_types=True, if per_channel: if multi_stat: #mutli stat, each channel on their own plot - plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, color_list, y_min, y_max) + plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, sensor_name, sat_name, color_list, y_min, y_max) else: #singular stat, each channel on their own plot - plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, output_directory, color_list, y_min, y_max) + plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, output_directory, sensor_name, sat_name, color_list, y_min, y_max) else: if multi_stat: #multi stat, all channels same plot - plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, color_list, y_min, y_max) + plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, sensor_name, sat_name, color_list, y_min, y_max) else: #single stat, all channels same plot - plot_experiment_comparison(experiment_timeseries, experiment_list, output_directory, channel_list, color_list, y_min, y_max) + plot_experiment_comparison(experiment_timeseries, experiment_list, output_directory, channel_list, sensor_name, sat_name, color_list, y_min, y_max) else: timeseries_data.print_init_time() @@ -723,7 +725,7 @@ def make_line_plot_multi_expt(timeseries_dict, experiment_list): plt.show() #This function plots all channels on the same plot, but each stat and sensor combo gets it's own plot -def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, channel_list, expt_colors=None, y_min=None, y_max=None): +def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, channel_list, sensor_name, sat_name, expt_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. @@ -749,29 +751,6 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha for sensor_label in sensorlabel_list: plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor combination - # # Loop through each experiment in the experiment list - # for i, experiment_name in enumerate(experiment_list): - # # Access the corresponding GSIStatsTimeSeries object from the dictionary - # timeseries_obj = timeseries_dict.get(experiment_name) - - # if timeseries_obj: - # # Safely access the nested dictionary for time_valid and value - # time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) - # value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) - # #TODO: update to handle channels? - # # Check if data exists for the stat_label and sensor_label - # if time_valid and value: - # # Plot the data for this experiment - # color = expt_colors[i] if expt_colors else None - # experiment_label = experiment_name - # if experiment_name in friendly_names_dict: - # experiment_label = friendly_names_dict[experiment_name] - # plt.plot(time_valid, value, label=experiment_label, alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter - # else: - # print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") - # else: - # print(f"No data for experiment: {experiment_name}") - # Loop through each experiment in the experiment list for i, experiment_name in enumerate(experiment_list): # Access the corresponding GSIStatsTimeSeries object from the dictionary @@ -803,9 +782,9 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha plt.xlabel('Time Valid', fontsize=18) plt.ylabel(f'{stat_label}', fontsize=18) if channel_list is None: - plt.title(f'Comparison of {stat_label} and {sensor_label} across Experiments', fontsize=18) + plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments', fontsize=18) else: - plt.title(f'Comparison of {stat_label} and {sensor_label} across Experiments for Channels {channel_list}', fontsize=18) + plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments for Channels {channel_list}', fontsize=18) plt.legend(fontsize=20) # Rotate x-axis labels for readability @@ -823,7 +802,7 @@ def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, cha print(f"Plot saved: {plot_filepath}") #This function plots each channel, stat, sensor combo on it's own plot -def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, output_dir, expt_colors=None, y_min=None, y_max=None): +def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, output_dir, sensor_name, sat_name, expt_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. @@ -887,7 +866,7 @@ def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, out # Add labels and title for the plot plt.xlabel('Time Valid', fontsize=18) plt.ylabel(f'{stat_label}', fontsize=18) - plt.title(f'Comparison of {stat_label} and {sensor_label} across Experiments for Channel {channel_label}', fontsize=18) + plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments for Channel {channel_label}', fontsize=18) plt.legend(fontsize=20) # Rotate x-axis labels for readability @@ -905,7 +884,7 @@ def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, out print(f"Plot saved: {plot_filepath}") #This function plots stat and sensor combos on the same plot wihtout regard to separate channels (expects channels averaged and a list of which channels are included for title) -def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, output_dir, channel_list, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): +def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, output_dir, channel_list, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. @@ -977,7 +956,7 @@ def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, outp # Add labels and title for the plot plt.xlabel('Time Valid', fontsize=18) plt.ylabel(f'Statistic Values', fontsize=18) - plt.title(f'Comparison of NOAA and NASA Experiments for Channel {channel_list}', fontsize=18) + plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} and Channel(s) {channel_list}', fontsize=18) plt.legend(fontsize=20) #Rotate x-axis labels for readability @@ -995,7 +974,7 @@ def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, outp print(f"Plot saved: {plot_filepath}") #This function plots stat, sensor, and channels all on the same plot -def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): +def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. @@ -1074,7 +1053,7 @@ def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experimen # Add labels and title for the plot plt.xlabel('Time Valid', fontsize=18) plt.ylabel(f'Statistic Values', fontsize=18) - plt.title(f'Comparison of NOAA and NASA Experiments for Channel {channel_list}', fontsize=18) + plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} and Channel(s) {channel_list}', fontsize=18) plt.legend(fontsize=20) #Rotate x-axis labels for readability @@ -1092,7 +1071,7 @@ def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experimen print(f"Plot saved: {plot_filepath}") #This function plots stat and sensor combos on the same plot but each channel receives it's own plot -def plot_experiment_comparison_multi_stat_per_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, line_colors=None, y_min=None, y_max=None): +def plot_experiment_comparison_multi_stat_per_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): """ Plot time series for multiple experiments for each stat and sensor combination, and save each plot. @@ -1171,7 +1150,7 @@ def plot_experiment_comparison_multi_stat_per_channel(timeseries_dict, experimen # Add labels and title for the plot plt.xlabel('Time Valid', fontsize=18) plt.ylabel(f'Statistic Values', fontsize=18) - plt.title(f'Comparison of NOAA and NASA Experiments for Channel {channel}', fontsize=18) + plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} Channel {channel}', fontsize=18) plt.legend(fontsize=20) #Rotate x-axis labels for readability From 0cef34b0128ae56be4ab9563639cbcbad4f2c8c5 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 21 Feb 2025 17:21:54 -0700 Subject: [PATCH 15/44] automation for time series plot generation of brightness temperature (mean, rms) errors, organized by sensor and platform for reanalysis experiment inter-comparisons --- .../core_scripts/gsistats_plot_utils.py | 703 ++++++++++++++++ .../core_scripts/gsistats_timeseries.py | 758 +----------------- .../core_scripts/instrument_channel_nums.py | 35 + .../plot_gsi_radiance_fit_to_obs.py | 239 ++++++ .../core_scripts/satellite_names.py | 115 +++ .../style_lib/agu_full_3pg.mplstyle | 28 + 6 files changed, 1141 insertions(+), 737 deletions(-) create mode 100755 src/score_plotting/core_scripts/gsistats_plot_utils.py create mode 100644 src/score_plotting/core_scripts/instrument_channel_nums.py create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py create mode 100644 src/score_plotting/core_scripts/satellite_names.py create mode 100644 src/score_plotting/style_lib/agu_full_3pg.mplstyle diff --git a/src/score_plotting/core_scripts/gsistats_plot_utils.py b/src/score_plotting/core_scripts/gsistats_plot_utils.py new file mode 100755 index 0000000..4187d92 --- /dev/null +++ b/src/score_plotting/core_scripts/gsistats_plot_utils.py @@ -0,0 +1,703 @@ +#!/usr/bin/env python + +"""build GSI stats time series for plotting + +usage: place this file in score-db/src, run using the python interpreter, i.e. + +score-db/src$ python gsistats_timeseries.py + +users shouldn't need to edit anything besides the run() function, which can +be customized according to their needs + +This script uses a matplotlib styesheet. To make the style sheet available to +matplotlib, place the "agu_full.mplstyle" file in the "stylelib" direcotry under matplotlib.get_configdir(), which is usually either ~/.config/matplotlib/ or ~/.matplotlib/ (https://matplotlib.org/stable/users/explain/customizing.html#using-style-sheets) + +for any questions, please feel free to contact Adam Schneider +(Adam.Schneider@noaa.gov) +""" + +import os +import pathlib +from datetime import datetime +import warnings + +import numpy as np +from matplotlib import pyplot as plt +import matplotlib.colors as mcolors +import matplotlib.dates as mdates +import colorcet as cc + +from score_db import score_db_base + +def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, + select_sat_name=True, + experiment_list=['scout_run_v1', + 'NASA_GEOSIT_GSISTATS' + #'scout_runs_gsi3dvar_1979stream' + ], + array_metrics_list=['amsua_bias_post_corr_GSIstage_%', + #'amsua_std_%', + #'%_variance_%', + #'amsua_use_%' + ], + sat_name = 'NOAA 15', + channel_list = ['5','6','7'], + start_date = '1979-01-01 00:00:00', + stop_date = '2026-01-01 00:00:00'): + """modify the above input variables to configure and generate time series + data for various GSI related statistics + + experiment_list: list of experiments to plot in sequence + array_metrics_list: list of metrics to plot in sequence + """ + + style_file = os.path.join( + CONFIG_PATH, + CONFIG_FILE + ) + if make_plot or make_line_plot: + plt.style.use(style_file) + + if not select_array_metric_types: + array_metrics_list=['%_all_stats_%'] + + for experiment_name in experiment_list: + for array_metric_type in array_metrics_list: + timeseries_data = GSIStatsTimeSeries(start_date, stop_date, + experiment_name=experiment_name, + select_array_metric_types=select_array_metric_types, + array_metric_types=array_metric_type, + select_sat_name=select_sat_name, + sat_name=sat_name) + timeseries_data.build(all_channel_max=False, # set max or mean + all_channel_mean=False, + by_channel=True) # other False + + if make_plot: + timeseries_data.plot() + plt.suptitle(experiment_name) + #plt.show() + metric_string = array_metric_type.split('%')[1] #this won't always work if you give a specific sensor value + plt.savefig(os.path.join( + 'results', + f'gsi{metric_string}{experiment_name}.png'), + dpi=600) + plt.close() + + elif make_line_plot: + stat_label = 'bias_post_corr_GSIstage_1' + #stat_label = 'std_GSIstage_1' + sensor_label = 'n15_amsua' + y_min = -0.5 + y_max = 0.6 + timeseries_data.flatten_by_channel(channel_list) + timeseries_data.plot_line_plot_by_channel(stat_label=stat_label, sensor_label=sensor_label, experiment_name=experiment_name, channels_to_plot=channel_list, y_min=y_min, y_max=y_max) + metric_string = array_metric_type.split('%')[0] #again not expandable + plt.savefig(os.path.join( + #'results', + f'gsiline{metric_string}{experiment_name}.png'), + dpi=600) + plt.close() + else: + timeseries_data.print_init_time() + +def run_line_plot(make_line_plot=True, select_array_metric_types=True, + select_sat_name=True, multi_stat=True, per_channel=True, + experiment_list=[ + 'NASA_GEOSIT_GSISTATS', + 'scout_run_v1', + #'replay_observer_diagnostic_v1' + ], + array_metrics_list=['amsua_std_%', + 'amsua_bias_post_corr_GSIstage_%', + #'amsua_nobs_used_%' + ], + sensor_name = 'AMSUA', + sat_name = 'NOAA 18', + channel_list = None, + start_date = '1999-01-01 00:00:00', + stop_date = '2024-12-01 00:00:00', + color_list = [['#E4002B', '#f2901f'], + ['#003087', '#0085CA'], + #['#46990f', '#c1e67c'] + ], + stat_pair = ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], + y_min = None, + y_max = None, + output_directory="."): + """modify the above input variables to configure and generate time series + data for various GSI related statistics + + experiment_list: list of experiments to plot in sequence + array_metrics_list: list of metrics to plot in sequence + """ + + style_file = os.path.join( + CONFIG_PATH, + CONFIG_FILE + ) + if make_line_plot: + plt.style.use(style_file) + plt.rcParams['font.size'] = 20 + + if not select_array_metric_types: + array_metrics_list=['%_all_stats_%'] + + if multi_stat: + experiment_timeseries = dict() + + #for experiment_name in experiment_list: + for experiment_name in experiment_list: + experiment_timeseries[experiment_name] = dict() # Create a dictionary for each experiment + for array_metric_type in array_metrics_list: + timeseries_data = GSIStatsTimeSeries( + start_date, stop_date, + experiment_name=experiment_name, + select_array_metric_types=select_array_metric_types, + array_metric_types=array_metric_type, + select_sat_name=select_sat_name, + sat_name=sat_name) + + # Flatten data for the selected channels + timeseries_data.flatten_by_channel(channel_list=channel_list) + + # Store the timeseries data by experiment name and array metric type + experiment_timeseries[experiment_name][array_metric_type] = timeseries_data + else: + experiment_timeseries = dict() + for experiment_name in experiment_list: + for array_metric_type in array_metrics_list: + timeseries_data = GSIStatsTimeSeries(start_date, stop_date, + experiment_name=experiment_name, + select_array_metric_types=select_array_metric_types, + array_metric_types=array_metric_type, + select_sat_name=select_sat_name, + sat_name=sat_name) + timeseries_data.flatten_by_channel(channel_list=channel_list) + experiment_timeseries[experiment_name] = timeseries_data + + if make_line_plot: + if per_channel: + if multi_stat: + #mutli stat, each channel on their own plot + plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, sensor_name, sat_name, color_list, y_min, y_max) + else: + #singular stat, each channel on their own plot + plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, output_directory, sensor_name, sat_name, color_list, y_min, y_max) + else: + if multi_stat: + #multi stat, all channels same plot + plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, sensor_name, sat_name, color_list, y_min, y_max) + else: + #single stat, all channels same plot + plot_experiment_comparison(experiment_timeseries, experiment_list, output_directory, channel_list, sensor_name, sat_name, color_list, y_min, y_max) + + else: + timeseries_data.print_init_time() + +def get_colormap(cmap = cc.cm.CET_D1A, discrete_levels = 51, num_ticks=11, + vmin=-5, vmax=5): + # Create discrete colormap + colors = cmap(np.linspace(0, 1, discrete_levels)) + cmap_discrete = mcolors.ListedColormap(colors) + + # Create boundaries for the colormap + boundaries = np.linspace(vmin, vmax, discrete_levels) + norm = mcolors.BoundaryNorm(boundaries, cmap_discrete.N) + + # Adjust tick labels: Use fewer ticks + tick_positions = np.linspace(vmin, vmax, num_ticks) + + return(cmap_discrete, boundaries, norm, tick_positions) + +def make_line_plot_multi_expt(timeseries_dict, experiment_list): + """ + Plot time series for multiple experiments stored in a dictionary. + + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. + - experiment_list: List of experiment names to plot. + """ + + # Prepare the plot + plt.figure(figsize=(10, 6)) + + # Loop through each experiment name in experiment_list + for experiment_name in experiment_list: + # Access the corresponding GSIStatsTimeSeries object from the dictionary + timeseries_obj = timeseries_dict.get(experiment_name) + + if timeseries_obj: + # Extract data from the timeseries object + time_valid = timeseries_obj.timestamp_dict # Replace with actual column name + value = timeseries_obj.value_dict # Replace with actual column name + + + # Plot the data + plt.plot(time_valid, value, label=experiment_name) + else: + print(f"No data found for experiment: {experiment_name}") + + # Add labels and title + plt.xlabel('Time') + plt.ylabel('Number AMSUA Obs Used') + plt.title(f'Time Series Comparison of Experiments') + plt.legend() + + # Rotate x-axis labels for readability + plt.xticks(rotation=45) + + # Show the plot + plt.tight_layout() + plt.show() + +#This function plots all channels on the same plot, but each stat and sensor combo gets it's own plot +def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, channel_list, sensor_name, sat_name, expt_colors=None, y_min=None, y_max=None): + """ + Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. + + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. + - experiment_list: List of experiment names to plot. + - output_dir: Directory where plots will be saved. + """ + + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects + if not timeseries_dict: + print("Error: timeseries_dict is empty.") + return + + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0] + statlabel_list = first_timeseries_obj.statlabel_list + sensorlabel_list = first_timeseries_obj.sensorlabel_list + + # Loop through each stat_label in the statlabel_list + for stat_label in statlabel_list: + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor combination + + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding GSIStatsTimeSeries object from the dictionary + timeseries_obj = timeseries_dict.get(experiment_name) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}) + for channel, timestamps in time_valid.items(): + values = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + # Check if data exists for the stat_label and sensor_label + if timestamps and values: + # Plot the data for this experiment + color = expt_colors[i] if expt_colors else None + experiment_label = experiment_name + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + plt.plot(timestamps, values, label=f"{experiment_label} - Ch {channel}", alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter + else: + print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") + + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) + + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'{stat_label}', fontsize=18) + if channel_list is None: + plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments', fontsize=18) + else: + plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments for Channels {channel_list}', fontsize=18) + plt.legend(fontsize=20) + + # Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) + + # Save the plot to the specified output directory + plot_filename = f'{stat_label}_{sensor_label}_comparison.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) + + # Close the plot after saving + plt.close() + + print(f"Plot saved: {plot_filepath}") + +#This function plots each channel, stat, sensor combo on it's own plot +def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, output_dir, sensor_name, sat_name, expt_colors=None, y_min=None, y_max=None): + """ + Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. + + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. + - experiment_list: List of experiment names to plot. + - output_dir: Directory where plots will be saved. + """ + + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects + if not timeseries_dict: + print("Error: timeseries_dict is empty.") + return + + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0] + statlabel_list = first_timeseries_obj.statlabel_list + sensorlabel_list = first_timeseries_obj.sensorlabel_list + channel_list = first_timeseries_obj.channel_list + + + # Loop through each stat_label in the statlabel_list + for stat_label in statlabel_list: + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + for channel_label in channel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor-channel combination + + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding GSIStatsTimeSeries object from the dictionary + timeseries_obj = timeseries_dict.get(experiment_name) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + timestamps = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel_label, []) + values = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel_label, []) + # Check if data exists for the stat_label and sensor_label + if timestamps and values: + time_range = (max(timestamps) - min(timestamps)).total_seconds() + new_width = min(max(time_range / 50000, 12), 25) + fig = plt.gcf() + width, height = fig.get_size_inches() + if new_width > width: + fig.set_size_inches(new_width, height) + # Plot the data for this experiment + color = expt_colors[i] if expt_colors else None + experiment_label = experiment_name + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + plt.plot(timestamps, values, label=f"{experiment_label} - Ch {channel_label}", alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter + else: + print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") + + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) + + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'{stat_label}', fontsize=18) + plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments for Channel {channel_label}', fontsize=18) + plt.legend(fontsize=20) + + # Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) + + # Save the plot to the specified output directory + plot_filename = f'{stat_label}_{sensor_label}_{channel_label}_comparison.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) + + # Close the plot after saving + plt.close() + + print(f"Plot saved: {plot_filepath}") + +#This function plots stat and sensor combos on the same plot wihtout regard to separate channels (expects channels averaged and a list of which channels are included for title) +def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, output_dir, channel_list, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): + """ + Plot time series for multiple experiments for each stat and sensor combination, and save each plot. + + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. + - experiment_list: List of experiment names to plot. + - output_dir: Directory where plots will be saved. + - channel_list: List of channel indices to plot. + - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). + - array_metrics_list: List of array metric types corresponding to the stat labels. + - line_colors: List of colors for each line (optional). If None, default colors will be used. + """ + + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects + if not timeseries_dict: + print("Error: timeseries_dict is empty.") + return + + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0].values() + sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list + + # Set default line colors if none are provided + # if line_colors is None: + # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) + + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination + + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding dictionary for the experiment + experiment_data = timeseries_dict.get(experiment_name) + + if experiment_data: + # Loop through both stat labels in the stat_pair + for j, stat_label in enumerate(stat_pair): + array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type + timeseries_obj = experiment_data.get(array_metric_type) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) + value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) + #TODO: update to handle channels? + # Check if data exists for the stat_label and sensor_label + if time_valid and value: + # Plot the data for this experiment and stat_label with custom color + color = line_colors[i][j] if line_colors else None + experiment_label = experiment_name + stat_friendly = stat_label + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + if stat_label in friendly_names_dict: + stat_friendly = friendly_names_dict[stat_label] + plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly}', color=color, alpha=0.6) + else: + print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") + else: + print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") + + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) + + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'Statistic Values', fontsize=18) + plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} and Channel(s) {channel_list}', fontsize=18) + plt.legend(fontsize=20) + + #Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) + + # Save the plot to the specified output directory + plot_filename = f'{sensor_label}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) + + # Close the plot after saving + plt.close() + + print(f"Plot saved: {plot_filepath}") + +#This function plots stat, sensor, and channels all on the same plot +def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): + """ + Plot time series for multiple experiments for each stat and sensor combination, and save each plot. + + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. + - experiment_list: List of experiment names to plot. + - output_dir: Directory where plots will be saved. + - channel_list: List of channel indices to plot. + - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). + - array_metrics_list: List of array metric types corresponding to the stat labels. + - line_colors: List of colors for each line (optional). If None, default colors will be used. + """ + + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects + if not timeseries_dict: + print("Error: timeseries_dict is empty.") + return + + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0].values() + sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list + channel_list = list(first_timeseries_obj)[0].channel_list + + # Set default line colors if none are provided + # if line_colors is None: + # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) + + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination + + for channel in channel_list: + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding dictionary for the experiment + experiment_data = timeseries_dict.get(experiment_name) + + if experiment_data: + # Loop through both stat labels in the stat_pair + for j, stat_label in enumerate(stat_pair): + array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type + timeseries_obj = experiment_data.get(array_metric_type) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + # Check if data exists for the stat_label and sensor_label + if time_valid and value: + time_range = (max(time_valid) - min(time_valid)).total_seconds() + new_width = min(max(time_range / 50000, 12), 25) + fig = plt.gcf() + width, height = fig.get_size_inches() + if new_width > width: + fig.set_size_inches(new_width, height) + # Plot the data for this experiment and stat_label with custom color + color = line_colors[i][j] if line_colors else None + experiment_label = experiment_name + stat_friendly = stat_label + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + if stat_label in friendly_names_dict: + stat_friendly = friendly_names_dict[stat_label] + plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly} - Ch {channel}', color=color, alpha=0.6) + else: + print(f"No data for {stat_label}, {sensor_label}, {channel} in experiment: {experiment_name}") + else: + print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") + + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) + + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'Statistic Values', fontsize=18) + plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} and Channel(s) {channel_list}', fontsize=18) + plt.legend(fontsize=20) + + #Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) + + # Save the plot to the specified output directory + plot_filename = f'{sensor_label}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) + + # Close the plot after saving + plt.close() + + print(f"Plot saved: {plot_filepath}") + +#This function plots stat and sensor combos on the same plot but each channel receives it's own plot +def plot_experiment_comparison_multi_stat_per_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): + """ + Plot time series for multiple experiments for each stat and sensor combination, and save each plot. + + Parameters: + - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. + - experiment_list: List of experiment names to plot. + - output_dir: Directory where plots will be saved. + - channel_list: List of channel indices to plot. + - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). + - array_metrics_list: List of array metric types corresponding to the stat labels. + - line_colors: List of colors for each line (optional). If None, default colors will be used. + """ + + # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects + if not timeseries_dict: + print("Error: timeseries_dict is empty.") + return + + # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict + first_timeseries_obj = list(timeseries_dict.values())[0].values() + sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list + channel_list = list(first_timeseries_obj)[0].channel_list + + # Set default line colors if none are provided + # if line_colors is None: + # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) + + for channel in channel_list: + # Loop through each sensor_label in the sensorlabel_list + for sensor_label in sensorlabel_list: + plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination + + # Loop through each experiment in the experiment list + for i, experiment_name in enumerate(experiment_list): + # Access the corresponding dictionary for the experiment + experiment_data = timeseries_dict.get(experiment_name) + + if experiment_data: + # Loop through both stat labels in the stat_pair + for j, stat_label in enumerate(stat_pair): + array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type + timeseries_obj = experiment_data.get(array_metric_type) + + if timeseries_obj: + # Safely access the nested dictionary for time_valid and value + time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) + # Check if data exists for the stat_label and sensor_label + if time_valid and value: + time_range = (max(time_valid) - min(time_valid)).total_seconds() + new_width = min(max(time_range / 50000, 12), 25) + fig = plt.gcf() + width, height = fig.get_size_inches() + if new_width > width: + fig.set_size_inches(new_width, height) + # Plot the data for this experiment and stat_label with custom color + color = line_colors[i][j] if line_colors else None + experiment_label = experiment_name + stat_friendly = stat_label + if experiment_name in friendly_names_dict: + experiment_label = friendly_names_dict[experiment_name] + if stat_label in friendly_names_dict: + stat_friendly = friendly_names_dict[stat_label] + plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly}', color=color, alpha=0.6) + else: + print(f"No data for {stat_label}, {sensor_label}, {channel} in experiment: {experiment_name}") + else: + print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") + else: + print(f"No data for experiment: {experiment_name}") + + # Set y-axis limits if specified + if y_min is not None or y_max is not None: + plt.ylim(y_min, y_max) + + # Add labels and title for the plot + plt.xlabel('Time Valid', fontsize=18) + plt.ylabel(f'Statistic Values', fontsize=18) + plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} Channel {channel}', fontsize=18) + plt.legend(fontsize=20) + + #Rotate x-axis labels for readability + plt.xticks(rotation=45, fontsize=20) + plt.yticks(fontsize=20) + + # Save the plot to the specified output directory + plot_filename = f'{sensor_label}_ch{channel}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' + plot_filepath = os.path.join(output_dir, plot_filename) + plt.savefig(plot_filepath) + + # Close the plot after saving + plt.close() + + print(f"Plot saved: {plot_filepath}") + + +def main(): + #run() + run_line_plot() + +if __name__=='__main__': + main() diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 9675fcf..b3d2047 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -1,207 +1,29 @@ -#!/usr/bin/env python - -"""build GSI stats time series for plotting - -usage: place this file in score-db/src, run using the python interpreter, i.e. - -score-db/src$ python gsistats_timeseries.py - -users shouldn't need to edit anything besides the run() function, which can -be customized according to their needs - -This script uses a matplotlib styesheet. To make the style sheet available to -matplotlib, place the "agu_full.mplstyle" file in the "stylelib" direcotry under matplotlib.get_configdir(), which is usually either ~/.config/matplotlib/ or ~/.matplotlib/ (https://matplotlib.org/stable/users/explain/customizing.html#using-style-sheets) - -for any questions, please feel free to contact Adam Schneider -(Adam.Schneider@noaa.gov) -""" - -import os -import pathlib from datetime import datetime -import warnings - -import numpy as np -from matplotlib import pyplot as plt -import matplotlib.colors as mcolors -import matplotlib.dates as mdates -import colorcet as cc from score_db import score_db_base -CONFIG_PATH = os.path.join( - pathlib.Path(__file__).parent.parent.resolve(), - 'style_lib' -) -CONFIG_FILE = 'agu_full.mplstyle' - -friendly_names_dict={"scout_run_v1":"NOAA Scout Run", "NASA_GEOSIT_GSISTATS":"NASA GEOS-IT", "std_GSIstage_1":"STD", "bias_post_corr_GSIstage_1":"Bias"} # could this be done by string matching for the std/bias etc part? we could have a basic friendly dict for that - -def run(make_plot=False, make_line_plot=True, select_array_metric_types=True, - select_sat_name=True, - experiment_list=['scout_run_v1', - 'NASA_GEOSIT_GSISTATS' - #'scout_runs_gsi3dvar_1979stream' - ], - array_metrics_list=['amsua_bias_post_corr_GSIstage_%', - #'amsua_std_%', - #'%_variance_%', - #'amsua_use_%' - ], - sat_name = 'NOAA 15', - channel_list = ['5','6','7'], - start_date = '1979-01-01 00:00:00', - stop_date = '2026-01-01 00:00:00'): - """modify the above input variables to configure and generate time series - data for various GSI related statistics - - experiment_list: list of experiments to plot in sequence - array_metrics_list: list of metrics to plot in sequence - """ - - style_file = os.path.join( - CONFIG_PATH, - CONFIG_FILE - ) - if make_plot or make_line_plot: - plt.style.use(style_file) - - if not select_array_metric_types: - array_metrics_list=['%_all_stats_%'] +def extract_unique_stats(strings): + # Create sets to store unique values in the second and last positions + second_position_set = set() + last_position_set = set() - for experiment_name in experiment_list: - for array_metric_type in array_metrics_list: - timeseries_data = GSIStatsTimeSeries(start_date, stop_date, - experiment_name=experiment_name, - select_array_metric_types=select_array_metric_types, - array_metric_types=array_metric_type, - select_sat_name=select_sat_name, - sat_name=sat_name) - timeseries_data.build(all_channel_max=False, # set max or mean - all_channel_mean=False, - by_channel=True) # other False - - if make_plot: - timeseries_data.plot() - plt.suptitle(experiment_name) - #plt.show() - metric_string = array_metric_type.split('%')[1] #this won't always work if you give a specific sensor value - plt.savefig(os.path.join( - 'results', - f'gsi{metric_string}{experiment_name}.png'), - dpi=600) - plt.close() - - elif make_line_plot: - stat_label = 'bias_post_corr_GSIstage_1' - #stat_label = 'std_GSIstage_1' - sensor_label = 'n15_amsua' - y_min = -0.5 - y_max = 0.6 - timeseries_data.flatten_by_channel(channel_list) - timeseries_data.plot_line_plot_by_channel(stat_label=stat_label, sensor_label=sensor_label, experiment_name=experiment_name, channels_to_plot=channel_list, y_min=y_min, y_max=y_max) - metric_string = array_metric_type.split('%')[0] #again not expandable - plt.savefig(os.path.join( - #'results', - f'gsiline{metric_string}{experiment_name}.png'), - dpi=600) - plt.close() - else: - timeseries_data.print_init_time() - -def run_line_plot(make_line_plot=True, select_array_metric_types=True, - select_sat_name=True, multi_stat=True, per_channel=True, - experiment_list=[ - 'NASA_GEOSIT_GSISTATS', - 'scout_run_v1', - #'replay_observer_diagnostic_v1' - ], - array_metrics_list=['amsua_std_%', - 'amsua_bias_post_corr_GSIstage_%', - #'amsua_nobs_used_%' - ], - sensor_name = 'AMSUA', - sat_name = 'NOAA 18', - channel_list = None, - start_date = '1999-01-01 00:00:00', - stop_date = '2024-12-01 00:00:00', - color_list = [['#E4002B', '#f2901f'], - ['#003087', '#0085CA'], - #['#46990f', '#c1e67c'] - ], - stat_pair = ['std_GSIstage_1', 'bias_post_corr_GSIstage_1'], - y_min = None, - y_max = None, - output_directory="."): - """modify the above input variables to configure and generate time series - data for various GSI related statistics + # Iterate over the set of strings + for s in strings: + parts = s.split('_') - experiment_list: list of experiments to plot in sequence - array_metrics_list: list of metrics to plot in sequence - """ + # Add the second and last elements to their respective sets + if len(parts) > 1 and parts[-1] != 'None': # Ensure there are at least 2 parts, + second_position_set.add('_'.join(parts[1:-2])) + last_position_set.add(parts[-1]) - style_file = os.path.join( - CONFIG_PATH, - CONFIG_FILE - ) - if make_line_plot: - plt.style.use(style_file) - plt.rcParams['font.size'] = 20 - - if not select_array_metric_types: - array_metrics_list=['%_all_stats_%'] - - if multi_stat: - experiment_timeseries = dict() - - #for experiment_name in experiment_list: - for experiment_name in experiment_list: - experiment_timeseries[experiment_name] = dict() # Create a dictionary for each experiment - for array_metric_type in array_metrics_list: - timeseries_data = GSIStatsTimeSeries( - start_date, stop_date, - experiment_name=experiment_name, - select_array_metric_types=select_array_metric_types, - array_metric_types=array_metric_type, - select_sat_name=select_sat_name, - sat_name=sat_name) - - # Flatten data for the selected channels - timeseries_data.flatten_by_channel(channel_list=channel_list) - - # Store the timeseries data by experiment name and array metric type - experiment_timeseries[experiment_name][array_metric_type] = timeseries_data - else: - experiment_timeseries = dict() - for experiment_name in experiment_list: - for array_metric_type in array_metrics_list: - timeseries_data = GSIStatsTimeSeries(start_date, stop_date, - experiment_name=experiment_name, - select_array_metric_types=select_array_metric_types, - array_metric_types=array_metric_type, - select_sat_name=select_sat_name, - sat_name=sat_name) - timeseries_data.flatten_by_channel(channel_list=channel_list) - experiment_timeseries[experiment_name] = timeseries_data - - if make_line_plot: - if per_channel: - if multi_stat: - #mutli stat, each channel on their own plot - plot_experiment_comparison_multi_stat_per_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, sensor_name, sat_name, color_list, y_min, y_max) - else: - #singular stat, each channel on their own plot - plot_experiment_comparison_per_channel(experiment_timeseries, experiment_list, output_directory, sensor_name, sat_name, color_list, y_min, y_max) - else: - if multi_stat: - #multi stat, all channels same plot - plot_experiment_comparison_multi_stat_all_channel(experiment_timeseries, experiment_list, output_directory, stat_pair, array_metrics_list, sensor_name, sat_name, color_list, y_min, y_max) - else: - #single stat, all channels same plot - plot_experiment_comparison(experiment_timeseries, experiment_list, output_directory, channel_list, sensor_name, sat_name, color_list, y_min, y_max) - - else: - timeseries_data.print_init_time() + # Convert sets to sorted lists to maintain a consistent order + second_position_list = sorted(list(second_position_set)) + last_position_list = sorted(list(last_position_set)) + + # Combine the two lists into a 2D array (list of lists) + unique_positions = [second_position_list, last_position_list] + + return unique_positions class GSIStatsTimeSeries(object): def __init__(self, start_date, stop_date, @@ -224,7 +46,6 @@ def __init__(self, start_date, stop_date, self.experiment_id = experiment_id self.get_data_frame(start_date, stop_date) - def get_data_frame(self, start_date, stop_date): """request from the score-db application experiment data Database requests are submitted via score-db with a request dictionary @@ -278,7 +99,7 @@ def get_data_frame(self, start_date, stop_date): self.data_frame.drop_duplicates(subset=['metric_name', 'time_valid'], keep='last', inplace=True) - def build(self, all_channel_max=True, all_channel_mean=False, by_channel=False): + def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): self.unique_stat_list = extract_unique_stats( set(self.data_frame['metric_name'])) @@ -291,7 +112,6 @@ def build(self, all_channel_max=True, all_channel_mean=False, by_channel=False): self.timelabel_dict[f'{stat_name}_GSIstage_{gsi_stage}'] = dict() self.value_dict[f'{stat_name}_GSIstage_{gsi_stage}'] = dict() - for key in self.value_dict.keys(): for sat_short_name in set(self.data_frame.sat_short_name): for instrument_name in set( @@ -336,7 +156,6 @@ def build(self, all_channel_max=True, all_channel_mean=False, by_channel=False): elif by_channel: value = row.value - #print(gsi_stage, stat_name, sensor_label, time_label, value) self.timestamp_dict[stat_label][sensor_label].append(timestamp) self.timelabel_dict[stat_label][sensor_label].append(time_label) @@ -348,7 +167,6 @@ def build(self, all_channel_max=True, all_channel_mean=False, by_channel=False): #print(gsi_stage, stat_name, sensor_label, self.sensorlabel_dict[sensor_label]) - def flatten(self): self.unique_stat_list = extract_unique_stats( set(self.data_frame['metric_name'])) @@ -443,7 +261,6 @@ def flatten_by_channel(self, channel_list): row.time_valid.day, row.time_valid.year,) - for i, channel in enumerate(row.array_index_values): if channel_list is not None and channel not in channel_list: continue @@ -640,537 +457,4 @@ def plot_line_plot_by_channel(self, stat_label, sensor_label, experiment_name, c plt.ylim(top=y_max) # Adjust layout to avoid label clipping - plt.tight_layout() - - - - -def get_colormap(cmap = cc.cm.CET_D1A, discrete_levels = 51, num_ticks=11, - vmin=-5, vmax=5): - # Create discrete colormap - colors = cmap(np.linspace(0, 1, discrete_levels)) - cmap_discrete = mcolors.ListedColormap(colors) - - # Create boundaries for the colormap - boundaries = np.linspace(vmin, vmax, discrete_levels) - norm = mcolors.BoundaryNorm(boundaries, cmap_discrete.N) - - # Adjust tick labels: Use fewer ticks - tick_positions = np.linspace(vmin, vmax, num_ticks) - - return(cmap_discrete, boundaries, norm, tick_positions) - -def extract_unique_stats(strings): - # Create sets to store unique values in the second and last positions - second_position_set = set() - last_position_set = set() - - # Iterate over the set of strings - for s in strings: - parts = s.split('_') - - # Add the second and last elements to their respective sets - if len(parts) > 1 and parts[-1] != 'None': # Ensure there are at least 2 parts, - second_position_set.add('_'.join(parts[1:-2])) - last_position_set.add(parts[-1]) - - # Convert sets to sorted lists to maintain a consistent order - second_position_list = sorted(list(second_position_set)) - last_position_list = sorted(list(last_position_set)) - - # Combine the two lists into a 2D array (list of lists) - unique_positions = [second_position_list, last_position_list] - - return unique_positions - -def make_line_plot_multi_expt(timeseries_dict, experiment_list): - """ - Plot time series for multiple experiments stored in a dictionary. - - Parameters: - - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. - - experiment_list: List of experiment names to plot. - """ - - # Prepare the plot - plt.figure(figsize=(10, 6)) - - # Loop through each experiment name in experiment_list - for experiment_name in experiment_list: - # Access the corresponding GSIStatsTimeSeries object from the dictionary - timeseries_obj = timeseries_dict.get(experiment_name) - - if timeseries_obj: - # Extract data from the timeseries object - time_valid = timeseries_obj.timestamp_dict # Replace with actual column name - value = timeseries_obj.value_dict # Replace with actual column name - - - # Plot the data - plt.plot(time_valid, value, label=experiment_name) - else: - print(f"No data found for experiment: {experiment_name}") - - # Add labels and title - plt.xlabel('Time') - plt.ylabel('Number AMSUA Obs Used') - plt.title(f'Time Series Comparison of Experiments') - plt.legend() - - # Rotate x-axis labels for readability - plt.xticks(rotation=45) - - # Show the plot - plt.tight_layout() - plt.show() - -#This function plots all channels on the same plot, but each stat and sensor combo gets it's own plot -def plot_experiment_comparison(timeseries_dict, experiment_list, output_dir, channel_list, sensor_name, sat_name, expt_colors=None, y_min=None, y_max=None): - """ - Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. - - Parameters: - - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. - - experiment_list: List of experiment names to plot. - - output_dir: Directory where plots will be saved. - """ - - # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects - if not timeseries_dict: - print("Error: timeseries_dict is empty.") - return - - # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict - first_timeseries_obj = list(timeseries_dict.values())[0] - statlabel_list = first_timeseries_obj.statlabel_list - sensorlabel_list = first_timeseries_obj.sensorlabel_list - - # Loop through each stat_label in the statlabel_list - for stat_label in statlabel_list: - # Loop through each sensor_label in the sensorlabel_list - for sensor_label in sensorlabel_list: - plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor combination - - # Loop through each experiment in the experiment list - for i, experiment_name in enumerate(experiment_list): - # Access the corresponding GSIStatsTimeSeries object from the dictionary - timeseries_obj = timeseries_dict.get(experiment_name) - - if timeseries_obj: - # Safely access the nested dictionary for time_valid and value - time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}) - for channel, timestamps in time_valid.items(): - values = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) - # Check if data exists for the stat_label and sensor_label - if timestamps and values: - # Plot the data for this experiment - color = expt_colors[i] if expt_colors else None - experiment_label = experiment_name - if experiment_name in friendly_names_dict: - experiment_label = friendly_names_dict[experiment_name] - plt.plot(timestamps, values, label=f"{experiment_label} - Ch {channel}", alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter - else: - print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") - else: - print(f"No data for experiment: {experiment_name}") - - # Set y-axis limits if specified - if y_min is not None or y_max is not None: - plt.ylim(y_min, y_max) - - # Add labels and title for the plot - plt.xlabel('Time Valid', fontsize=18) - plt.ylabel(f'{stat_label}', fontsize=18) - if channel_list is None: - plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments', fontsize=18) - else: - plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments for Channels {channel_list}', fontsize=18) - plt.legend(fontsize=20) - - # Rotate x-axis labels for readability - plt.xticks(rotation=45, fontsize=20) - plt.yticks(fontsize=20) - - # Save the plot to the specified output directory - plot_filename = f'{stat_label}_{sensor_label}_comparison.png' - plot_filepath = os.path.join(output_dir, plot_filename) - plt.savefig(plot_filepath) - - # Close the plot after saving - plt.close() - - print(f"Plot saved: {plot_filepath}") - -#This function plots each channel, stat, sensor combo on it's own plot -def plot_experiment_comparison_per_channel(timeseries_dict, experiment_list, output_dir, sensor_name, sat_name, expt_colors=None, y_min=None, y_max=None): - """ - Plot time series for multiple experiments for multiple stat and sensor combination, and save each plot. - - Parameters: - - timeseries_dict: Dictionary where keys are experiment names and values are GSIStatsTimeSeries objects. - - experiment_list: List of experiment names to plot. - - output_dir: Directory where plots will be saved. - """ - - # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects - if not timeseries_dict: - print("Error: timeseries_dict is empty.") - return - - # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict - first_timeseries_obj = list(timeseries_dict.values())[0] - statlabel_list = first_timeseries_obj.statlabel_list - sensorlabel_list = first_timeseries_obj.sensorlabel_list - channel_list = first_timeseries_obj.channel_list - - - # Loop through each stat_label in the statlabel_list - for stat_label in statlabel_list: - # Loop through each sensor_label in the sensorlabel_list - for sensor_label in sensorlabel_list: - for channel_label in channel_list: - plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each stat-sensor-channel combination - - # Loop through each experiment in the experiment list - for i, experiment_name in enumerate(experiment_list): - # Access the corresponding GSIStatsTimeSeries object from the dictionary - timeseries_obj = timeseries_dict.get(experiment_name) - - if timeseries_obj: - # Safely access the nested dictionary for time_valid and value - timestamps = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel_label, []) - values = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel_label, []) - # Check if data exists for the stat_label and sensor_label - if timestamps and values: - time_range = (max(timestamps) - min(timestamps)).total_seconds() - new_width = min(max(time_range / 50000, 12), 25) - fig = plt.gcf() - width, height = fig.get_size_inches() - if new_width > width: - fig.set_size_inches(new_width, height) - # Plot the data for this experiment - color = expt_colors[i] if expt_colors else None - experiment_label = experiment_name - if experiment_name in friendly_names_dict: - experiment_label = friendly_names_dict[experiment_name] - plt.plot(timestamps, values, label=f"{experiment_label} - Ch {channel_label}", alpha=0.6, color=color) #set plot for line or bar for bar or scatter for scatter - else: - print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") - else: - print(f"No data for experiment: {experiment_name}") - - # Set y-axis limits if specified - if y_min is not None or y_max is not None: - plt.ylim(y_min, y_max) - - # Add labels and title for the plot - plt.xlabel('Time Valid', fontsize=18) - plt.ylabel(f'{stat_label}', fontsize=18) - plt.title(f'Comparison of {stat_label} and {sensor_label} for {sensor_name} {sat_name} across Experiments for Channel {channel_label}', fontsize=18) - plt.legend(fontsize=20) - - # Rotate x-axis labels for readability - plt.xticks(rotation=45, fontsize=20) - plt.yticks(fontsize=20) - - # Save the plot to the specified output directory - plot_filename = f'{stat_label}_{sensor_label}_{channel_label}_comparison.png' - plot_filepath = os.path.join(output_dir, plot_filename) - plt.savefig(plot_filepath) - - # Close the plot after saving - plt.close() - - print(f"Plot saved: {plot_filepath}") - -#This function plots stat and sensor combos on the same plot wihtout regard to separate channels (expects channels averaged and a list of which channels are included for title) -def plot_experiment_comparison_multi_stat(timeseries_dict, experiment_list, output_dir, channel_list, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): - """ - Plot time series for multiple experiments for each stat and sensor combination, and save each plot. - - Parameters: - - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. - - experiment_list: List of experiment names to plot. - - output_dir: Directory where plots will be saved. - - channel_list: List of channel indices to plot. - - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). - - array_metrics_list: List of array metric types corresponding to the stat labels. - - line_colors: List of colors for each line (optional). If None, default colors will be used. - """ - - # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects - if not timeseries_dict: - print("Error: timeseries_dict is empty.") - return - - # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict - first_timeseries_obj = list(timeseries_dict.values())[0].values() - sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list - - # Set default line colors if none are provided - # if line_colors is None: - # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) - - # Loop through each sensor_label in the sensorlabel_list - for sensor_label in sensorlabel_list: - plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination - - # Loop through each experiment in the experiment list - for i, experiment_name in enumerate(experiment_list): - # Access the corresponding dictionary for the experiment - experiment_data = timeseries_dict.get(experiment_name) - - if experiment_data: - # Loop through both stat labels in the stat_pair - for j, stat_label in enumerate(stat_pair): - array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type - timeseries_obj = experiment_data.get(array_metric_type) - - if timeseries_obj: - # Safely access the nested dictionary for time_valid and value - time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, []) - value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, []) - #TODO: update to handle channels? - # Check if data exists for the stat_label and sensor_label - if time_valid and value: - # Plot the data for this experiment and stat_label with custom color - color = line_colors[i][j] if line_colors else None - experiment_label = experiment_name - stat_friendly = stat_label - if experiment_name in friendly_names_dict: - experiment_label = friendly_names_dict[experiment_name] - if stat_label in friendly_names_dict: - stat_friendly = friendly_names_dict[stat_label] - plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly}', color=color, alpha=0.6) - else: - print(f"No data for {stat_label}, {sensor_label} in experiment: {experiment_name}") - else: - print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") - else: - print(f"No data for experiment: {experiment_name}") - - # Set y-axis limits if specified - if y_min is not None or y_max is not None: - plt.ylim(y_min, y_max) - - # Add labels and title for the plot - plt.xlabel('Time Valid', fontsize=18) - plt.ylabel(f'Statistic Values', fontsize=18) - plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} and Channel(s) {channel_list}', fontsize=18) - plt.legend(fontsize=20) - - #Rotate x-axis labels for readability - plt.xticks(rotation=45, fontsize=20) - plt.yticks(fontsize=20) - - # Save the plot to the specified output directory - plot_filename = f'{sensor_label}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' - plot_filepath = os.path.join(output_dir, plot_filename) - plt.savefig(plot_filepath) - - # Close the plot after saving - plt.close() - - print(f"Plot saved: {plot_filepath}") - -#This function plots stat, sensor, and channels all on the same plot -def plot_experiment_comparison_multi_stat_all_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): - """ - Plot time series for multiple experiments for each stat and sensor combination, and save each plot. - - Parameters: - - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. - - experiment_list: List of experiment names to plot. - - output_dir: Directory where plots will be saved. - - channel_list: List of channel indices to plot. - - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). - - array_metrics_list: List of array metric types corresponding to the stat labels. - - line_colors: List of colors for each line (optional). If None, default colors will be used. - """ - - # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects - if not timeseries_dict: - print("Error: timeseries_dict is empty.") - return - - # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict - first_timeseries_obj = list(timeseries_dict.values())[0].values() - sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list - channel_list = list(first_timeseries_obj)[0].channel_list - - # Set default line colors if none are provided - # if line_colors is None: - # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) - - # Loop through each sensor_label in the sensorlabel_list - for sensor_label in sensorlabel_list: - plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination - - for channel in channel_list: - # Loop through each experiment in the experiment list - for i, experiment_name in enumerate(experiment_list): - # Access the corresponding dictionary for the experiment - experiment_data = timeseries_dict.get(experiment_name) - - if experiment_data: - # Loop through both stat labels in the stat_pair - for j, stat_label in enumerate(stat_pair): - array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type - timeseries_obj = experiment_data.get(array_metric_type) - - if timeseries_obj: - # Safely access the nested dictionary for time_valid and value - time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) - value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) - # Check if data exists for the stat_label and sensor_label - if time_valid and value: - time_range = (max(time_valid) - min(time_valid)).total_seconds() - new_width = min(max(time_range / 50000, 12), 25) - fig = plt.gcf() - width, height = fig.get_size_inches() - if new_width > width: - fig.set_size_inches(new_width, height) - # Plot the data for this experiment and stat_label with custom color - color = line_colors[i][j] if line_colors else None - experiment_label = experiment_name - stat_friendly = stat_label - if experiment_name in friendly_names_dict: - experiment_label = friendly_names_dict[experiment_name] - if stat_label in friendly_names_dict: - stat_friendly = friendly_names_dict[stat_label] - plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly} - Ch {channel}', color=color, alpha=0.6) - else: - print(f"No data for {stat_label}, {sensor_label}, {channel} in experiment: {experiment_name}") - else: - print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") - else: - print(f"No data for experiment: {experiment_name}") - - # Set y-axis limits if specified - if y_min is not None or y_max is not None: - plt.ylim(y_min, y_max) - - # Add labels and title for the plot - plt.xlabel('Time Valid', fontsize=18) - plt.ylabel(f'Statistic Values', fontsize=18) - plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} and Channel(s) {channel_list}', fontsize=18) - plt.legend(fontsize=20) - - #Rotate x-axis labels for readability - plt.xticks(rotation=45, fontsize=20) - plt.yticks(fontsize=20) - - # Save the plot to the specified output directory - plot_filename = f'{sensor_label}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' - plot_filepath = os.path.join(output_dir, plot_filename) - plt.savefig(plot_filepath) - - # Close the plot after saving - plt.close() - - print(f"Plot saved: {plot_filepath}") - -#This function plots stat and sensor combos on the same plot but each channel receives it's own plot -def plot_experiment_comparison_multi_stat_per_channel(timeseries_dict, experiment_list, output_dir, stat_pair, array_metrics_list, sensor_name, sat_name, line_colors=None, y_min=None, y_max=None): - """ - Plot time series for multiple experiments for each stat and sensor combination, and save each plot. - - Parameters: - - timeseries_dict: Dictionary where keys are experiment names and values are nested dictionaries of array metric types. - - experiment_list: List of experiment names to plot. - - output_dir: Directory where plots will be saved. - - channel_list: List of channel indices to plot. - - stat_pair: List containing two stat labels to compare (e.g., ['std_GSIstage_1', 'bias_post_corr_GSIstage_1']). - - array_metrics_list: List of array metric types corresponding to the stat labels. - - line_colors: List of colors for each line (optional). If None, default colors will be used. - """ - - # Get statlabel_list and sensorlabel_list from one of the GSIStatsTimeSeries objects - if not timeseries_dict: - print("Error: timeseries_dict is empty.") - return - - # Extract the statlabel_list and sensorlabel_list from the first object in timeseries_dict - first_timeseries_obj = list(timeseries_dict.values())[0].values() - sensorlabel_list = list(first_timeseries_obj)[0].sensorlabel_list - channel_list = list(first_timeseries_obj)[0].channel_list - - # Set default line colors if none are provided - # if line_colors is None: - # line_colors = plt.cm.get_cmap('tab10', len(experiment_list)) - - for channel in channel_list: - # Loop through each sensor_label in the sensorlabel_list - for sensor_label in sensorlabel_list: - plt.figure(figsize=(16, 12), dpi=300) # Create a new figure for each sensor combination - - # Loop through each experiment in the experiment list - for i, experiment_name in enumerate(experiment_list): - # Access the corresponding dictionary for the experiment - experiment_data = timeseries_dict.get(experiment_name) - - if experiment_data: - # Loop through both stat labels in the stat_pair - for j, stat_label in enumerate(stat_pair): - array_metric_type = array_metrics_list[j] # Map stat_label to array_metric_type - timeseries_obj = experiment_data.get(array_metric_type) - - if timeseries_obj: - # Safely access the nested dictionary for time_valid and value - time_valid = timeseries_obj.timestamp_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) - value = timeseries_obj.value_dict.get(stat_label, {}).get(sensor_label, {}).get(channel, []) - # Check if data exists for the stat_label and sensor_label - if time_valid and value: - time_range = (max(time_valid) - min(time_valid)).total_seconds() - new_width = min(max(time_range / 50000, 12), 25) - fig = plt.gcf() - width, height = fig.get_size_inches() - if new_width > width: - fig.set_size_inches(new_width, height) - # Plot the data for this experiment and stat_label with custom color - color = line_colors[i][j] if line_colors else None - experiment_label = experiment_name - stat_friendly = stat_label - if experiment_name in friendly_names_dict: - experiment_label = friendly_names_dict[experiment_name] - if stat_label in friendly_names_dict: - stat_friendly = friendly_names_dict[stat_label] - plt.plot(time_valid, value, label=f'{experiment_label} - {stat_friendly}', color=color, alpha=0.6) - else: - print(f"No data for {stat_label}, {sensor_label}, {channel} in experiment: {experiment_name}") - else: - print(f"No data for array metric type {array_metric_type} in experiment: {experiment_name}") - else: - print(f"No data for experiment: {experiment_name}") - - # Set y-axis limits if specified - if y_min is not None or y_max is not None: - plt.ylim(y_min, y_max) - - # Add labels and title for the plot - plt.xlabel('Time Valid', fontsize=18) - plt.ylabel(f'Statistic Values', fontsize=18) - plt.title(f'{stat_pair[0]} and {stat_pair[1]} for {sensor_name} {sat_name} Channel {channel}', fontsize=18) - plt.legend(fontsize=20) - - #Rotate x-axis labels for readability - plt.xticks(rotation=45, fontsize=20) - plt.yticks(fontsize=20) - - # Save the plot to the specified output directory - plot_filename = f'{sensor_label}_ch{channel}_comparison_{stat_pair[0]}_{stat_pair[1]}.png' - plot_filepath = os.path.join(output_dir, plot_filename) - plt.savefig(plot_filepath) - - # Close the plot after saving - plt.close() - - print(f"Plot saved: {plot_filepath}") - - -def main(): - #run() - run_line_plot() - -if __name__=='__main__': - main() + plt.tight_layout() \ No newline at end of file diff --git a/src/score_plotting/core_scripts/instrument_channel_nums.py b/src/score_plotting/core_scripts/instrument_channel_nums.py new file mode 100644 index 0000000..8e522eb --- /dev/null +++ b/src/score_plotting/core_scripts/instrument_channel_nums.py @@ -0,0 +1,35 @@ +def get_instrument_channels(): + instrument_channels = { + 'abi': [7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + 'ahi': [7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + 'airs': [1, 6, 7, 10, 11, 15, 16, 17, 20, 21, 22, 24, 27, 28, 30, 36, 39, 40, 42, 51, 52, 54, 55, 56, 59, 62, 63, 68, 69, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 84, 86, 92, 93, 98, 99, 101, 104, 105, 108, 110, 111, 113, 116, 117, 123, 124, 128, 129, 138, 139, 144, 145, 150, 151, 156, 157, 159, 162, 165, 168, 169, 170, 172, 173, 174, 175, 177, 179, 180, 182, 185, 186, 190, 192, 198, 201, 204, 207, 210, 215, 216, 221, 226, 227, 232, 252, 253, 256, 257, 261, 262, 267, 272, 295, 299, 300, 305, 310, 321, 325, 333, 338, 355, 362, 375, 453, 475, 484, 497, 528, 587, 672, 787, 791, 843, 870, 914, 950, 1003, 1012, 1019, 1024, 1030, 1038, 1048, 1069, 1079, 1082, 1083, 1088, 1090, 1092, 1095, 1104, 1111, 1115, 1116, 1119, 1120, 1123, 1130, 1138, 1142, 1178, 1199, 1206, 1221, 1237, 1252, 1260, 1263, 1266, 1285, 1301, 1304, 1329, 1371, 1382, 1415, 1424, 1449, 1455, 1466, 1477, 1500, 1519, 1538, 1545, 1565, 1574, 1583, 1593, 1614, 1627, 1636, 1644, 1652, 1669, 1674, 1681, 1694, 1708, 1717, 1723, 1740, 1748, 1751, 1756, 1763, 1766, 1771, 1777, 1780, 1783, 1794, 1800, 1803, 1806, 1812, 1826, 1843, 1852, 1865, 1866, 1868, 1869, 1872, 1873, 1876, 1881, 1882, 1883, 1911, 1917, 1918, 1924, 1928, 1937, 1941, 2099, 2100, 2101, 2103, 2104, 2106, 2107, 2108, 2109, 2110, 2111, 2112, 2113, 2114, 2115, 2116, 2117, 2118, 2119, 2120, 2121, 2122, 2123, 2128, 2134, 2141, 2145, 2149, 2153, 2164, 2189, 2197, 2209, 2226, 2234, 2280, 2318, 2321, 2325, 2328, 2333, 2339, 2348, 2353, 2355, 2357, 2363, 2370, 2371, 2377], + 'amsre' : [1,2,3,4,5,6,7,8,9,10,11,12], + 'amsr2': [1,2,3,4,5,6,7,8,9,10,11,12,13,14], + 'amsua': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + 'amsub': [1, 2, 3, 4, 5], + 'atms': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], + 'avhrr2': [3, 4, 5], + 'avhrr3': [3, 4, 5], + 'cris': [27, 28, 31, 32, 33, 37, 49, 51, 53, 59, 61, 63, 64, 65, 67, 69, 71, 73, 75, 79, 80, 81, 83, 85, 87, 88, 89, 93, 95, 96, 99, 101, 102, 104, 106, 107, 111, 113, 116, 120, 123, 124, 125, 126, 130, 132, 133, 136, 137, 138, 142, 143, 144, 145, 147, 148, 150, 151, 153, 154, 155, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 170, 171, 173, 175, 181, 183, 198, 208, 211, 216, 224, 228, 236, 238, 242, 248, 266, 268, 279, 283, 311, 317, 330, 333, 334, 338, 340, 341, 342, 349, 352, 358, 361, 364, 366, 367, 368, 378, 390, 391, 392, 394, 395, 396, 397, 398, 399, 404, 427, 447, 464, 473, 482, 484, 501, 529, 556, 557, 558, 560, 561, 562, 564, 565, 566, 569, 573, 574, 577, 580, 581, 584, 585, 587, 590, 591, 594, 597, 598, 601, 604, 607, 611, 614, 616, 617, 619, 622, 626, 628, 634, 637, 638, 640, 641, 642, 644, 646, 647, 650, 651, 652, 654, 655, 657, 659, 663, 667, 670, 707, 710, 713, 716, 730, 735, 736, 739, 743, 744, 746, 748, 751, 754, 755, 756, 757, 758, 760, 761, 762, 763, 766, 767, 768, 771, 772, 773, 776, 777, 778, 779, 780, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 794, 796, 798, 800, 802, 803, 804, 806, 807, 808, 809, 811, 812, 814, 816, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 838, 839, 840, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 856, 861, 862, 864, 865, 866, 867, 869, 871, 872, 874, 876, 878, 879, 880, 884, 886, 887, 888, 889, 890, 900, 921, 924, 927, 945, 991, 994, 1007, 1015, 1030, 1094, 1106, 1130, 1132, 1133, 1135, 1142, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1177, 1178, 1179, 1180, 1181, 1187, 1189, 1190, 1192, 1193, 1194, 1196, 1197, 1198, 1199, 1200, 1202, 1203, 1204, 1206, 1207, 1208, 1210, 1212, 1214, 1215, 1217, 1218, 1220, 1222, 1224, 1226, 1228, 1229, 1231, 1232, 1234, 1235, 1236, 1237, 1238, 1239, 1241, 1242, 1243, 1244, 1245, 1247, 1250, 1270, 1271, 1282, 1285, 1288, 1290, 1293, 1298, 1301], + 'cris-fsr': [19, 24, 26, 27, 28, 31, 32, 33, 37, 39, 42, 44, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 208, 211, 216, 224, 234, 236, 238, 239, 242, 246, 248, 255, 264, 266, 268, 275, 279, 283, 285, 291, 295, 301, 305, 311, 332, 342, 389, 400, 402, 404, 406, 410, 427, 439, 440, 441, 445, 449, 455, 458, 461, 464, 467, 470, 473, 475, 482, 486, 487, 490, 493, 496, 499, 501, 503, 505, 511, 513, 514, 518, 519, 520, 522, 529, 534, 563, 568, 575, 592, 594, 596, 598, 600, 602, 604, 611, 614, 616, 618, 620, 622, 626, 631, 638, 646, 648, 652, 659, 673, 675, 678, 684, 688, 694, 700, 707, 710, 713, 714, 718, 720, 722, 725, 728, 735, 742, 748, 753, 762, 780, 784, 798, 849, 860, 862, 866, 874, 882, 890, 898, 906, 907, 908, 914, 937, 972, 973, 978, 980, 981, 988, 995, 998, 1000, 1003, 1008, 1009, 1010, 1014, 1017, 1018, 1020, 1022, 1024, 1026, 1029, 1030, 1032, 1034, 1037, 1038, 1041, 1042, 1044, 1046, 1049, 1050, 1053, 1054, 1058, 1060, 1062, 1064, 1066, 1069, 1076, 1077, 1080, 1086, 1091, 1095, 1101, 1109, 1112, 1121, 1128, 1133, 1163, 1172, 1187, 1189, 1205, 1211, 1219, 1231, 1245, 1271, 1289, 1300, 1313, 1316, 1325, 1329, 1346, 1347, 1473, 1474, 1491, 1499, 1553, 1570, 1596, 1602, 1619, 1624, 1635, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 2119, 2140, 2143, 2147, 2153, 2158, 2161, 2168, 2171, 2175, 2182], + 'gmi': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + 'hirs2': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + 'hirs3': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + 'hirs4': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + 'iasi': [16, 29, 32, 35, 38, 41, 44, 47, 49, 50, 51, 53, 55, 56, 57, 59, 61, 62, 63, 66, 68, 70, 72, 74, 76, 78, 79, 81, 82, 83, 84, 85, 86, 87, 89, 92, 93, 95, 97, 99, 101, 103, 104, 106, 109, 110, 111, 113, 116, 119, 122, 125, 128, 131, 133, 135, 138, 141, 144, 146, 148, 150, 151, 154, 157, 159, 160, 161, 163, 167, 170, 173, 176, 179, 180, 185, 187, 191, 193, 197, 199, 200, 202, 203, 205, 207, 210, 212, 213, 214, 217, 218, 219, 222, 224, 225, 226, 228, 230, 231, 232, 236, 237, 239, 243, 246, 249, 252, 254, 259, 260, 262, 265, 267, 269, 275, 279, 282, 285, 294, 296, 299, 300, 303, 306, 309, 313, 320, 323, 326, 327, 329, 332, 335, 345, 347, 350, 354, 356, 360, 363, 366, 371, 372, 373, 375, 377, 379, 381, 383, 386, 389, 398, 401, 404, 405, 407, 408, 410, 411, 414, 416, 418, 423, 426, 428, 432, 433, 434, 439, 442, 445, 450, 457, 459, 472, 477, 483, 509, 515, 546, 552, 559, 566, 571, 573, 578, 584, 594, 625, 646, 662, 668, 705, 739, 756, 797, 867, 906, 921, 1027, 1046, 1090, 1098, 1121, 1133, 1173, 1191, 1194, 1222, 1271, 1283, 1338, 1409, 1414, 1420, 1424, 1427, 1430, 1434, 1440, 1442, 1445, 1450, 1454, 1460, 1463, 1469, 1474, 1479, 1483, 1487, 1494, 1496, 1502, 1505, 1509, 1510, 1513, 1518, 1521, 1526, 1529, 1532, 1536, 1537, 1541, 1545, 1548, 1553, 1560, 1568, 1574, 1579, 1583, 1585, 1587, 1606, 1626, 1639, 1643, 1652, 1658, 1659, 1666, 1671, 1675, 1681, 1694, 1697, 1710, 1786, 1791, 1805, 1839, 1884, 1913, 1946, 1947, 1991, 2019, 2094, 2119, 2213, 2239, 2271, 2289, 2321, 2333, 2346, 2349, 2352, 2359, 2367, 2374, 2398, 2426, 2562, 2701, 2741, 2745, 2760, 2819, 2889, 2907, 2910, 2919, 2921, 2939, 2944, 2945, 2948, 2951, 2958, 2971, 2977, 2985, 2988, 2990, 2991, 2993, 3002, 3008, 3014, 3027, 3029, 3030, 3036, 3047, 3049, 3052, 3053, 3055, 3058, 3064, 3069, 3087, 3093, 3098, 3105, 3107, 3110, 3116, 3127, 3129, 3136, 3146, 3151, 3160, 3165, 3168, 3175, 3178, 3189, 3207, 3228, 3244, 3248, 3252, 3256, 3263, 3281, 3295, 3303, 3309, 3312, 3322, 3326, 3354, 3366, 3375, 3378, 3411, 3416, 3432, 3438, 3440, 3442, 3444, 3446, 3448, 3450, 3452, 3454, 3458, 3467, 3476, 3484, 3491, 3497, 3499, 3504, 3506, 3509, 3518, 3527, 3555, 3575, 3577, 3580, 3582, 3586, 3589, 3599, 3610, 3626, 3638, 3646, 3653, 3658, 3661, 3673, 3689, 3700, 3710, 3726, 3763, 3814, 3841, 3888, 4032, 4059, 4068, 4082, 4095, 4160, 4234, 4257, 4411, 4498, 4520, 4552, 4567, 4608, 4646, 4698, 4808, 4849, 4920, 4939, 4947, 4967, 4991, 4996, 5015, 5028, 5056, 5128, 5130, 5144, 5170, 5178, 5183, 5188, 5191, 5368, 5371, 5379, 5381, 5383, 5397, 5399, 5401, 5403, 5405, 5446, 5455, 5472, 5480, 5483, 5485, 5492, 5497, 5502, 5507, 5509, 5517, 5528, 5558, 5697, 5714, 5749, 5766, 5785, 5798, 5799, 5801, 5817, 5833, 5834, 5836, 5849, 5851, 5852, 5865, 5869, 5881, 5884, 5897, 5900, 5916, 5932, 5948, 5963, 5968, 5978, 5988, 5992, 5994, 5997, 6003, 6008, 6023, 6026, 6039, 6053, 6056, 6067, 6071, 6082, 6085, 6098, 6112, 6126, 6135, 6140, 6149, 6154, 6158, 6161, 6168, 6174, 6182, 6187, 6205, 6209, 6213, 6317, 6339, 6342, 6366, 6381, 6391, 6489, 6962, 6966, 6970, 6975, 6977, 6982, 6985, 6987, 6989, 6991, 6993, 6995, 6997, 6999, 7000, 7004, 7008, 7013, 7016, 7021, 7024, 7027, 7029, 7032, 7038, 7043, 7046, 7049, 7069, 7072, 7076, 7081, 7084, 7089, 7099, 7209, 7222, 7231, 7235, 7247, 7267, 7269, 7284, 7389, 7419, 7423, 7424, 7426, 7428, 7431, 7436, 7444, 7475, 7549, 7584, 7665, 7666, 7831, 7836, 7853, 7865, 7885, 7888, 7912, 7950, 7972, 7980, 7995, 8007, 8015, 8055, 8078], + 'mhs': [1, 2, 3, 4, 5], + 'msu': [1, 2, 3, 4], + 'saphir': [1, 2, 3, 4, 5, 6], + 'seviri': [4, 5, 6, 7, 8, 9, 10, 11], + 'sndrD1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + 'sndrD2': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + 'sndrD3': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + 'sndrD4': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + 'sndr': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + 'ssmi': [1, 2, 3, 4, 5, 6, 7], + 'ssmis': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], + 'ssu': [1, 2, 3], + 'tmi': [1, 2, 3, 4, 5, 6, 7,8,9], # NASA + 'imgr': [2,3,4,5], # GDAS + } + return instrument_channels \ No newline at end of file diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py new file mode 100755 index 0000000..ae3b92a --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python + +""" +""" + +import os +import pathlib + +import numpy as np +from matplotlib import pyplot as plt +import matplotlib.dates as mdates +import pandas as pd + +from gsistats_timeseries import GSIStatsTimeSeries +from instrument_channel_nums import get_instrument_channels +import satellite_names + +def config(): + config_dict = { + 'config_path': + os.path.join(pathlib.Path(__file__).parent.parent.resolve(), + 'style_lib'), + 'config_file': 'agu_full_3pg.mplstyle', + 'output_path': + os.path.join('/', 'Users', 'aschneider', 'projects', + 'coupled_global_reanalysis', 'results'), + 'experiment_list': ['NASA_GEOSIT_GSISTATS', + 'GDAS', + 'replay_observer_diagnostic_v1', + 'scout_run_v1' + ], + 'color_list': ['#E4002B', '#A2A4A3', '#003087', '#0085CA'], + #'fmt_list': ['|', "1", "2"], + 'ls_list': [':', '-.', '--', '-'], + 'lw_list': [0.75, 1.0, 1.25, 1.5], + 'sensor_list': ['amsua'], + 'start_date': '1979-01-01 00:00:00', + 'stop_date': '2025-01-01 00:00:00', + } + + ''' + could this be done by string matching for the std/bias etc part? we could + have a basic friendly dict for that + ''' + friendly_names_dict={"scout_run_v1": "NOAA atmo-scout", + "NASA_GEOSIT_GSISTATS": "NASA GEOS-IT", + "GDAS": "GDAS", + "replay_observer_diagnostic_v1": "NOAA ROD", + "std_GSIstage_1": "STD", + "variance_GSIstage_1": "obs error variance", + "bias_post_corr_GSIstage_1": "ME", + "sqrt_bias_GSIstage_1": "RMSE"} + + return(config_dict, friendly_names_dict) + +class GSIRadianceFit2ObsFig(object): + """ + """ + def __init__(self): + self.config_dict, self.friendly_names_dict = config() + self.channel_dict = get_instrument_channels() + self.experiment_list = self.config_dict['experiment_list'] + + if self.config_dict['config_path'] and self.config_dict['config_file']: + style_file = os.path.join(self.config_dict['config_path'], + self.config_dict['config_file']) + plt.style.use(style_file) + + for sensor, channel_list in self.channel_dict.items(): + if sensor in self.config_dict['sensor_list']: + array_metric_list = [f'{sensor}_bias_post_corr_GSIstage_1', + f'{sensor}_std_GSIstage_1', + f'{sensor}_variance_GSIstage_1', + f'{sensor}_sqrt_bias_GSIstage_1', + #f'{sensor}_nobs_used_GSIstage_1', + #f'{sensor}_nobs_tossed_GSIstage_1' + ] + + self.experiment_timeseries_dict = dict() + for experiment in self.config_dict['experiment_list']: + self.experiment_timeseries_dict[experiment] = dict() + for array_metric in array_metric_list: + self.experiment_timeseries_dict[ + experiment][array_metric] = GSIStatsTimeSeries( + self.config_dict['start_date'], + self.config_dict['stop_date'], + experiment_name=experiment, + select_array_metric_types=True, + array_metric_types=array_metric) + self.experiment_timeseries_dict[experiment][array_metric].build() + + self.make_figures(sensor) + + def make_figures(self, sensor, ncols=2): + nrows = 0 + sat_set = set() + for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): + for full_stat_name, timeseries_data in timeseries_dict.items(): + for stat_label, value_dict in timeseries_data.value_dict.items(): + for sat_sensor in value_dict.keys(): + sat_set.add(sat_sensor) + + for channel_idx, channel_num in enumerate(self.channel_dict[sensor]): + fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=True) + fig.suptitle(f"{sensor} channel {channel_num} global mean (left) and RMS (right) error (o - b)") + #axes[-1, 0].set_xlabel = 'cycle date (Gregorian)' + #axes[-1, 1].set_xlabel = 'cycle date (Gregorian)' + + for row, sat_sensor in enumerate(sorted(sat_set)): + sat_short_name = sat_sensor.split('_')[:-1][0] + sat_label = satellite_names.get_longname(sat_short_name) + axes[row, 0].set_title(f"{sat_label} mean error") + axes[row, 1].set_title(f"{sat_label} RMS error") + axes[row, 0].set_ylabel('temperature (K)') + axes[row, 0].axhline(color='black', lw=0.75) + axes[row, 1].axhline(color='black', lw=0.75) + + experiment_idx = 0 + for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): + yerrs=list() + yerrs2=list() + for full_stat_name, timeseries_data in timeseries_dict.items(): + for stat_label, value_dict in timeseries_data.value_dict.items(): + if stat_label == 'bias_post_corr_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + bias_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] + bias_values = np.array(value_dict[sat_sensor])[:,channel_idx] + std_timestamps = timeseries_dict[ + f'{sensor}_std_GSIstage_1'].timestamp_dict[ + 'std_GSIstage_1'][sat_sensor] + std_values = timeseries_dict[ + f'{sensor}_std_GSIstage_1'].value_dict[ + 'std_GSIstage_1'][sat_sensor] + + for time_idx, bias_timestamp in enumerate(bias_timestamps): + if bias_timestamp in std_timestamps: + std_time_idx = std_timestamps.index(bias_timestamp) + yerr = np.array(std_values)[std_time_idx, channel_idx] + if yerr: + yerrs.append(yerr) + else: + yerrs.append(0) + else: + yerrs.append(0) + + yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) + mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) + + axes[row, 0].bar( + bias_timestamps, + 2.*yerrs_plot, + width=pd.Timedelta(hours=6), + bottom=mean_values_plot - yerrs_plot, + color=self.config_dict['color_list'][experiment_idx], + alpha=0.2 + ) + + axes[row, 0].errorbar( + bias_timestamps, + mean_values_plot, + xerr=pd.Timedelta(hours=3), + fmt=self.config_dict['ls_list'][experiment_idx], + lw=self.config_dict['lw_list'][experiment_idx], + color=self.config_dict['color_list'][experiment_idx], + alpha = 0.5, + ) + + elif stat_label == 'sqrt_bias_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + rmse_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] + rmse_values = np.array(value_dict[sat_sensor])[:,channel_idx] + obs_err_var_timestamps = timeseries_dict[ + f'{sensor}_variance_GSIstage_1'].timestamp_dict[ + 'variance_GSIstage_1'][sat_sensor] + obs_err_var_values = timeseries_dict[ + f'{sensor}_variance_GSIstage_1'].value_dict[ + 'variance_GSIstage_1'][sat_sensor] + + for time_idx, rmse_timestamp in enumerate(rmse_timestamps): + if rmse_timestamp in obs_err_var_timestamps: + obs_err_var_time_idx = obs_err_var_timestamps.index(rmse_timestamp) + yerr2 = np.array(obs_err_var_values)[obs_err_var_time_idx, channel_idx] + if yerr2: + yerrs2.append(yerr) + else: + yerrs2.append(0) + else: + yerrs2.append(0) + + yerrs_plot = np.sqrt( + np.array( + [0 if x is None else float(x) for x in yerrs2]) + ) + rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) + + axes[row, 1].bar( + rmse_timestamps, + 2.*yerrs_plot, + width=pd.Timedelta(hours=6), + bottom=rmse_values_plot - yerrs_plot, + color=self.config_dict['color_list'][experiment_idx], + alpha=0.2 + ) + + axes[row, 1].errorbar( + rmse_timestamps, + rmse_values_plot, + xerr=pd.Timedelta(hours=3), + fmt=self.config_dict['ls_list'][experiment_idx], + lw=self.config_dict['lw_list'][experiment_idx], + color=self.config_dict['color_list'][experiment_idx], + alpha = 0.5, + label=self.friendly_names_dict[experiment] + ) + + axes[row,1].legend(loc=4) + + axes[row, 0].xaxis.set_major_formatter( + mdates.ConciseDateFormatter( + axes[row, 0].xaxis.get_major_locator())) + axes[row, 1].xaxis.set_major_formatter( + mdates.ConciseDateFormatter( + axes[row, 1].xaxis.get_major_locator())) + + experiment_idx += 1 + plt.savefig(os.path.join(self.config_dict['output_path'], + f'gsi_radiance_omb_{sensor}_ch{channel_num}.png'), + dpi=600) + plt.close() + +def run(): + + experiment_metrics_timeseries_data = GSIRadianceFit2ObsFig() + +def main(): + """ + """ + run() + +if __name__ == "__main__": + main() diff --git a/src/score_plotting/core_scripts/satellite_names.py b/src/score_plotting/core_scripts/satellite_names.py new file mode 100644 index 0000000..fab0cb4 --- /dev/null +++ b/src/score_plotting/core_scripts/satellite_names.py @@ -0,0 +1,115 @@ +def get_satellite_names(): + sats = { + "NOAA 5": {"sat_id": 705, "short_name": "n05"}, + "NOAA 6": {"sat_id": 706, "short_name": "n06"}, + "NOAA 7": {"sat_id": 707, "short_name": "n07"}, + "NOAA 8": {"sat_id": 708, "short_name": "n08"}, + "NOAA 9": {"sat_id": 709, "short_name": "n09"}, + "NOAA 10": {"sat_id": 710, "short_name": "n10"}, + "NOAA 11": {"sat_id": 711, "short_name": "n11"}, + "NOAA 12": {"sat_id": 712, "short_name": "n12"}, + "NOAA 13": {"sat_id": 713, "short_name": "n13"}, + "NOAA 14": {"sat_id": 714, "short_name": "n14"}, + "NOAA 15": {"sat_id": 715, "short_name": "n15"}, + "NOAA 16": {"sat_id": 716, "short_name": "n16"}, + "NOAA 17": {"sat_id": 717, "short_name": "n17"}, + "NOAA 18": {"sat_id": 718, "short_name": "n18"}, + "NOAA 19": {"sat_id": 719, "short_name": "n19"}, + "NOAA 20": {"sat_id": 720, "short_name": "n20"}, + "NOAA 21": {"sat_id": 721, "short_name": "n21"}, + "METOP-1": {"sat_id": 3, "short_name": "metop-b"}, + "METOP-2": {"sat_id": 4, "short_name": "metop-a"}, + "METOP-3": {"sat_id": 5, "short_name": "metop-c"}, + "METOP-1 (Metop-A": {"sat_id": 4, "short_name": "metop-b"}, + "METOP-2 (Metop-B": {"sat_id": 3, "short_name": "metop-a"}, + "METOP-3 (Metop-C": {"sat_id": 5, "short_name": "metop-c"}, + "AQUA": {"sat_id": 784, "short_name": "aqua"}, + "NPP": {"sat_id": 224, "short_name": "npp"}, + "GOES 7": {"sat_id": 251, "short_name": "g07"}, + "GOES 8": {"sat_id": 252, "short_name": "g08"}, + "GOES 9": {"sat_id": 253, "short_name": "g09"}, + "GOES 10": {"sat_id": 254, "short_name": "g10"}, + "GOES 11": {"sat_id": 255, "short_name": "g11"}, + "GOES 12": {"sat_id": 256, "short_name": "g12"}, + "GOES 13": {"sat_id": 257, "short_name": "g13"}, + "GOES 14": {"sat_id": 258, "short_name": "g14"}, + "GOES 15": {"sat_id": 259, "short_name": "g15"}, + "GOES 16": {"sat_id": 270, "short_name": "g16"}, + "GOES 17": {"sat_id": 271, "short_name": "g17"}, + "GOES 18": {"sat_id": 272, "short_name": "g18"}, + "MTSAT-2": {"sat_id": 172, "short_name": "MTSAT-2"}, + "MTSAT-1R": {"sat_id": 171, "short_name": "MTSAT-1R"}, + "METEOSAT 2": {"sat_id": 59, "short_name": "m02"}, + "METEOSAT 3": {"sat_id": 51, "short_name": "m03"}, + "METEOSAT 4": {"sat_id": 52, "short_name": "m04"}, + "METEOSAT 5": {"sat_id": 53, "short_name": "m05"}, + "METEOSAT 6": {"sat_id": 54, "short_name": "m06"}, + "METEOSAT 7": {"sat_id": 55, "short_name": "m07"}, + "METEOSAT 8": {"sat_id": 56, "short_name": "m08"}, + "METEOSAT 9": {"sat_id": 57, "short_name": "m09"}, + "METEOSAT 10": {"sat_id": 67, "short_name": "m10"}, + "METEOSAT 11": {"sat_id": 70, "short_name": "m11"}, + "DMSP 8": {"sat_id": 241, "short_name": "f08"}, + "DMSP 9": {"sat_id": 242, "short_name": "f09"}, + "DMSP 10": {"sat_id": 243, "short_name": "f10"}, + "DMSP 11": {"sat_id": 244, "short_name": "f11"}, + "DMSP 12": {"sat_id": 245, "short_name": "f12"}, + "DMSP 13": {"sat_id": 246, "short_name": "f13"}, + "DMSP 14": {"sat_id": 247, "short_name": "f14"}, + "DMSP 15": {"sat_id": 248, "short_name": "f15"}, + "DMSP 16": {"sat_id": 249, "short_name": "f16"}, + "DMSP17": {"sat_id": 285, "short_name": "f17"}, + "DMSP18": {"sat_id": 286, "short_name": "f18"}, + "DMSP-19": {"sat_id": 287, "short_name": "f19"}, + "DMSP20": {"sat_id": 'xxx', "short_name": "f20"}, + "CHAMP": {"sat_id": 41, "short_name": "CHAMP"}, + "COSMIC-1": {"sat_id": 740, "short_name": "COSMIC-1"}, + "COSMIC-2": {"sat_id": 741, "short_name": "COSMIC-2"}, + "COSMIC-3": {"sat_id": 742, "short_name": "COSMIC-3"}, + "COSMIC-4": {"sat_id": 743, "short_name": "COSMIC-4"}, + "COSMIC-5": {"sat_id": 744, "short_name": "COSMIC-5"}, + "COSMIC-6": {"sat_id": 745, "short_name": "COSMIC-6"}, + "COSMIC-2 E1": {"sat_id": 750, "short_name": "COSMIC-2 E1"}, + "COSMIC-2 E2": {"sat_id": 751, "short_name": "COSMIC-2 E2"}, + "COSMIC-2 E3": {"sat_id": 752, "short_name": "COSMIC-2 E3"}, + "COSMIC-2 E4": {"sat_id": 753, "short_name": "COSMIC-2 E4"}, + "COSMIC-2 E5": {"sat_id": 754, "short_name": "COSMIC-2 E5"}, + "COSMIC-2 E6": {"sat_id": 755, "short_name": "COSMIC-2 E6"}, + "GRACE A": {"sat_id": 722, "short_name": "GRACE A"}, + "GRACE B": {"sat_id": 723, "short_name": "GRACE B"}, + "GRACE C (GRACE-F": {"sat_id": 803, "short_name": "GRACE C"}, + "GRACE D (GRACE-F": {"sat_id": 804, "short_name": "GRACE D"}, + "SAC-C": {"sat_id": 820, "short_name": "SAC C"}, + "TerraSAR-X": {"sat_id": 42, "short_name": "TerraSAR-X"}, + "TERRA": {"sat_id": 783, "short_name": "TERRA"}, + "ERS 2": {"sat_id": 2, "short_name": "ERS 2"}, + "GMS 3": {"sat_id": 150, "short_name": "GMS 3"}, + "GMS 4": {"sat_id": 151, "short_name": "GMS 4"}, + "GMS 5": {"sat_id": 152, "short_name": "GMS 5"}, + "INSAT 3A": {"sat_id": 470, "short_name": "INSAT 3A"}, + "INSAT 3D": {"sat_id": 471, "short_name": "INSAT 3D"}, + "INSAT 3DR": {"sat_id": 472, "short_name": "INSAT 3DR"}, + "TIROS-N": {"sat_id": 254, "short_name": "tirosn"}, + "Megha-Tropiques": {"sat_id": 367, "short_name": "meghat"}, + "TanDEM-X": {"sat_id": 551, "short_name": "TanDEM-X"}, + "PAZ": {"sat_id": 431, "short_name": "PAZ"}, + "KOMPSAT-5": {"sat_id": 536, "short_name": "KOMPSAT-5"}, + "LANDSAT 5": {"sat_id": 207, "short_name": "LANDSAT 5"}, + "GPM-core": {"sat_id": 371, "short_name": "gpm"}, + "TRMM": {"sat_id": 241, "short_name": "TRMM"}, + "Himawari-8": {"sat_id": 370, "short_name": "himawari8"}, + "Himawari-9": {"sat_id": 372, "short_name": "himawari9"}, + "Spire Lemur 3U C": {"sat_id": 409, "short_name": "Spire L3UC"}, + "Sentinel 6A": {"sat_id": 835, "short_name": "Sentinel 6A"}, + "PlanetiQ GNOMES-": {"sat_id": 687, "short_name": "PlanetiQ GNOMES"}, + "AURA": {"sat_id": 296, "short_name": "AURA"}, + "NIMBUS 7": {"sat_id": 16, "short_name": "nim07"} + } + return sats + +def get_longname(short_name): + dict_of_dicts = get_satellite_names() + for top_level_key, inner_dict in dict_of_dicts.items(): + if 'short_name' in inner_dict and inner_dict['short_name'] == short_name: + return top_level_key + return short_name # return short_name if no match is found \ No newline at end of file diff --git a/src/score_plotting/style_lib/agu_full_3pg.mplstyle b/src/score_plotting/style_lib/agu_full_3pg.mplstyle new file mode 100644 index 0000000..917646c --- /dev/null +++ b/src/score_plotting/style_lib/agu_full_3pg.mplstyle @@ -0,0 +1,28 @@ +#### matplotlib style file for AGU figures + +### FONT +font.size : 8 +font.family : serif +font.serif : Times +font.sans-serif : Helvetica, Arial + +### TEXT +#text.usetex : True + +### AXES +axes.titlesize : 8 +axes.labelsize : 8 + +### TICKS +xtick.labelsize : 8 +ytick.labelsize : 8 + +### Legend +legend.fontsize : 8 + +### FIGURE +figure.titlesize : 8 +figure.figsize : 7.48, 27.18 + +### SAVING FIGURES +savefig.format : pdf From d86f876e46e605b42ffaf31139d22c802f4f523e Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Mon, 24 Feb 2025 18:36:01 -0700 Subject: [PATCH 16/44] MPI support for plotting multiple sensors in parallel --- .../plot_gsi_radiance_fit_to_obs.py | 280 +++++++++++------- 1 file changed, 166 insertions(+), 114 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index ae3b92a..84a52b3 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -5,11 +5,13 @@ import os import pathlib +import warnings import numpy as np from matplotlib import pyplot as plt import matplotlib.dates as mdates import pandas as pd +from mpi4py import MPI from gsistats_timeseries import GSIStatsTimeSeries from instrument_channel_nums import get_instrument_channels @@ -22,8 +24,8 @@ def config(): 'style_lib'), 'config_file': 'agu_full_3pg.mplstyle', 'output_path': - os.path.join('/', 'Users', 'aschneider', 'projects', - 'coupled_global_reanalysis', 'results'), + os.path.join('/', 'media', 'darr', 'results', 'figures', + 'brightness_temperature_error_timeseries'), 'experiment_list': ['NASA_GEOSIT_GSISTATS', 'GDAS', 'replay_observer_diagnostic_v1', @@ -33,7 +35,7 @@ def config(): #'fmt_list': ['|', "1", "2"], 'ls_list': [':', '-.', '--', '-'], 'lw_list': [0.75, 1.0, 1.25, 1.5], - 'sensor_list': ['amsua'], + 'sensor_list': get_instrument_channels().keys(),#['amsua'], 'start_date': '1979-01-01 00:00:00', 'stop_date': '2025-01-01 00:00:00', } @@ -65,7 +67,8 @@ def __init__(self): style_file = os.path.join(self.config_dict['config_path'], self.config_dict['config_file']) plt.style.use(style_file) - + + def build_timeseries(self): for sensor, channel_list in self.channel_dict.items(): if sensor in self.config_dict['sensor_list']: array_metric_list = [f'{sensor}_bias_post_corr_GSIstage_1', @@ -80,18 +83,29 @@ def __init__(self): for experiment in self.config_dict['experiment_list']: self.experiment_timeseries_dict[experiment] = dict() for array_metric in array_metric_list: - self.experiment_timeseries_dict[ - experiment][array_metric] = GSIStatsTimeSeries( + try: + self.experiment_timeseries_dict[ + experiment][array_metric] = GSIStatsTimeSeries( self.config_dict['start_date'], self.config_dict['stop_date'], experiment_name=experiment, select_array_metric_types=True, array_metric_types=array_metric) - self.experiment_timeseries_dict[experiment][array_metric].build() + self.experiment_timeseries_dict[experiment][array_metric].build() + except KeyError: # remove array_metric from dict if no records returned + self.experiment_timeseries_dict.pop(experiment, None) + warnings.warn(f'missing {sensor} records for ' + f'{experiment} experiment') self.make_figures(sensor) def make_figures(self, sensor, ncols=2): + output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") + + # Check if the directory exists, and create it if it doesn't + if not os.path.exists(output_dir): + os.makedirs(output_dir) + nrows = 0 sat_set = set() for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): @@ -101,139 +115,177 @@ def make_figures(self, sensor, ncols=2): sat_set.add(sat_sensor) for channel_idx, channel_num in enumerate(self.channel_dict[sensor]): - fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=True) - fig.suptitle(f"{sensor} channel {channel_num} global mean (left) and RMS (right) error (o - b)") - #axes[-1, 0].set_xlabel = 'cycle date (Gregorian)' - #axes[-1, 1].set_xlabel = 'cycle date (Gregorian)' + if len(sat_set) > 0: + fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=True, + squeeze=False) + fig.suptitle(f"{sensor} channel {channel_num} global mean (left) and RMS (right) error (o - b)") + #axes[-1, 0].set_xlabel = 'cycle date (Gregorian)' + #axes[-1, 1].set_xlabel = 'cycle date (Gregorian)' - for row, sat_sensor in enumerate(sorted(sat_set)): - sat_short_name = sat_sensor.split('_')[:-1][0] - sat_label = satellite_names.get_longname(sat_short_name) - axes[row, 0].set_title(f"{sat_label} mean error") - axes[row, 1].set_title(f"{sat_label} RMS error") - axes[row, 0].set_ylabel('temperature (K)') - axes[row, 0].axhline(color='black', lw=0.75) - axes[row, 1].axhline(color='black', lw=0.75) + for row, sat_sensor in enumerate(sorted(sat_set)): + sat_short_name = sat_sensor.split('_')[:-1][0] + sat_label = satellite_names.get_longname(sat_short_name) + axes[row, 0].set_title(f"{sat_label} mean error") + axes[row, 1].set_title(f"{sat_label} RMS error") + axes[row, 0].set_ylabel('temperature (K)') + axes[row, 0].axhline(color='black', lw=0.75) + axes[row, 1].axhline(color='black', lw=0.75) - experiment_idx = 0 - for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): - yerrs=list() - yerrs2=list() - for full_stat_name, timeseries_data in timeseries_dict.items(): - for stat_label, value_dict in timeseries_data.value_dict.items(): - if stat_label == 'bias_post_corr_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): - bias_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] - bias_values = np.array(value_dict[sat_sensor])[:,channel_idx] - std_timestamps = timeseries_dict[ - f'{sensor}_std_GSIstage_1'].timestamp_dict[ - 'std_GSIstage_1'][sat_sensor] - std_values = timeseries_dict[ - f'{sensor}_std_GSIstage_1'].value_dict[ - 'std_GSIstage_1'][sat_sensor] + experiment_idx = 0 + for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): + yerrs=list() + yerrs2=list() + for full_stat_name, timeseries_data in timeseries_dict.items(): + for stat_label, value_dict in timeseries_data.value_dict.items(): + if stat_label == 'bias_post_corr_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + bias_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] + bias_values = np.array(value_dict[sat_sensor])[:,channel_idx] + std_timestamps = timeseries_dict[ + f'{sensor}_std_GSIstage_1'].timestamp_dict[ + 'std_GSIstage_1'][sat_sensor] + std_values = timeseries_dict[ + f'{sensor}_std_GSIstage_1'].value_dict[ + 'std_GSIstage_1'][sat_sensor] - for time_idx, bias_timestamp in enumerate(bias_timestamps): - if bias_timestamp in std_timestamps: - std_time_idx = std_timestamps.index(bias_timestamp) - yerr = np.array(std_values)[std_time_idx, channel_idx] - if yerr: - yerrs.append(yerr) + for time_idx, bias_timestamp in enumerate(bias_timestamps): + if bias_timestamp in std_timestamps: + std_time_idx = std_timestamps.index(bias_timestamp) + yerr = np.array(std_values)[std_time_idx, channel_idx] + if yerr: + yerrs.append(yerr) + else: + yerrs.append(0) else: yerrs.append(0) - else: - yerrs.append(0) - yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) - mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) + yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) + mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) - axes[row, 0].bar( - bias_timestamps, - 2.*yerrs_plot, - width=pd.Timedelta(hours=6), - bottom=mean_values_plot - yerrs_plot, - color=self.config_dict['color_list'][experiment_idx], - alpha=0.2 - ) + axes[row, 0].bar( + bias_timestamps, + 2.*yerrs_plot, + width=pd.Timedelta(hours=6), + bottom=mean_values_plot - yerrs_plot, + color=self.config_dict['color_list'][experiment_idx], + alpha=0.2 + ) - axes[row, 0].errorbar( - bias_timestamps, - mean_values_plot, - xerr=pd.Timedelta(hours=3), - fmt=self.config_dict['ls_list'][experiment_idx], - lw=self.config_dict['lw_list'][experiment_idx], - color=self.config_dict['color_list'][experiment_idx], - alpha = 0.5, - ) + axes[row, 0].errorbar( + bias_timestamps, + mean_values_plot, + xerr=pd.Timedelta(hours=3), + fmt=self.config_dict['ls_list'][experiment_idx], + lw=self.config_dict['lw_list'][experiment_idx], + color=self.config_dict['color_list'][experiment_idx], + alpha = 0.5, + ) - elif stat_label == 'sqrt_bias_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): - rmse_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] - rmse_values = np.array(value_dict[sat_sensor])[:,channel_idx] - obs_err_var_timestamps = timeseries_dict[ - f'{sensor}_variance_GSIstage_1'].timestamp_dict[ - 'variance_GSIstage_1'][sat_sensor] - obs_err_var_values = timeseries_dict[ - f'{sensor}_variance_GSIstage_1'].value_dict[ - 'variance_GSIstage_1'][sat_sensor] + elif stat_label == 'sqrt_bias_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + rmse_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] + rmse_values = np.array(value_dict[sat_sensor])[:,channel_idx] + obs_err_var_timestamps = timeseries_dict[ + f'{sensor}_variance_GSIstage_1'].timestamp_dict[ + 'variance_GSIstage_1'][sat_sensor] + obs_err_var_values = timeseries_dict[ + f'{sensor}_variance_GSIstage_1'].value_dict[ + 'variance_GSIstage_1'][sat_sensor] - for time_idx, rmse_timestamp in enumerate(rmse_timestamps): - if rmse_timestamp in obs_err_var_timestamps: - obs_err_var_time_idx = obs_err_var_timestamps.index(rmse_timestamp) - yerr2 = np.array(obs_err_var_values)[obs_err_var_time_idx, channel_idx] - if yerr2: - yerrs2.append(yerr) + for time_idx, rmse_timestamp in enumerate(rmse_timestamps): + if rmse_timestamp in obs_err_var_timestamps: + obs_err_var_time_idx = obs_err_var_timestamps.index(rmse_timestamp) + yerr2 = np.array(obs_err_var_values)[obs_err_var_time_idx, channel_idx] + if yerr2: + yerrs2.append(yerr) + else: + yerrs2.append(0) else: yerrs2.append(0) - else: - yerrs2.append(0) - yerrs_plot = np.sqrt( - np.array( - [0 if x is None else float(x) for x in yerrs2]) - ) - rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) + yerrs_plot = np.sqrt( + np.array( + [0 if x is None else float(x) for x in yerrs2]) + ) + rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) - axes[row, 1].bar( - rmse_timestamps, - 2.*yerrs_plot, - width=pd.Timedelta(hours=6), - bottom=rmse_values_plot - yerrs_plot, - color=self.config_dict['color_list'][experiment_idx], - alpha=0.2 - ) + axes[row, 1].bar( + rmse_timestamps, + 2.*yerrs_plot, + width=pd.Timedelta(hours=6), + bottom=rmse_values_plot - yerrs_plot, + color=self.config_dict['color_list'][experiment_idx], + alpha=0.2 + ) - axes[row, 1].errorbar( - rmse_timestamps, - rmse_values_plot, - xerr=pd.Timedelta(hours=3), - fmt=self.config_dict['ls_list'][experiment_idx], - lw=self.config_dict['lw_list'][experiment_idx], - color=self.config_dict['color_list'][experiment_idx], - alpha = 0.5, - label=self.friendly_names_dict[experiment] - ) + axes[row, 1].errorbar( + rmse_timestamps, + rmse_values_plot, + xerr=pd.Timedelta(hours=3), + fmt=self.config_dict['ls_list'][experiment_idx], + lw=self.config_dict['lw_list'][experiment_idx], + color=self.config_dict['color_list'][experiment_idx], + alpha = 0.5, + label=self.friendly_names_dict[experiment] + ) - axes[row,1].legend(loc=4) + axes[row,1].legend(loc=4) - axes[row, 0].xaxis.set_major_formatter( - mdates.ConciseDateFormatter( - axes[row, 0].xaxis.get_major_locator())) - axes[row, 1].xaxis.set_major_formatter( - mdates.ConciseDateFormatter( - axes[row, 1].xaxis.get_major_locator())) + axes[row, 0].xaxis.set_major_formatter( + mdates.ConciseDateFormatter( + axes[row, 0].xaxis.get_major_locator())) + axes[row, 1].xaxis.set_major_formatter( + mdates.ConciseDateFormatter( + axes[row, 1].xaxis.get_major_locator())) - experiment_idx += 1 - plt.savefig(os.path.join(self.config_dict['output_path'], - f'gsi_radiance_omb_{sensor}_ch{channel_num}.png'), - dpi=600) - plt.close() + experiment_idx += 1 + + plt.savefig(os.path.join(self.config_dict['output_path'], + f'gsi_radiance_omb_{sensor}_ch{channel_num}.png'), + dpi=600) + plt.close() def run(): + # Initialize MPI communicator + comm = MPI.COMM_WORLD + rank = comm.Get_rank() # Get the rank of the current process + size = comm.Get_size() # Get the total number of processes + + # Load global configurations and friendly names + global_config_dict, global_friendly_names_dict = config() + sensor_list = list() + for sensor in global_config_dict['sensor_list']: + sensor_list.append(sensor) + # Calculate how many sensors each process should handle + sensors_per_process = len(sensor_list) // size + + # Handle leftover sensors (remaining sensors are distributed to the first few processes) + leftover_sensors = len(sensor_list) % size + + # Calculate the start and end indices for each process + start_idx = rank * sensors_per_process + min(rank, leftover_sensors) # Adjust start index for extra sensors + end_idx = start_idx + sensors_per_process + (1 if rank < leftover_sensors else 0) # Adjust end index for extra sensors + + # Slice the sensor list for this process + local_sensor_list = sensor_list[start_idx:end_idx] + + # Create an instance of GSIRadianceFit2ObsFig for experiment data experiment_metrics_timeseries_data = GSIRadianceFit2ObsFig() + # Each process handles its portion of the sensor list + for sensor in local_sensor_list: + # Set the current sensor for the experiment + experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] + + #print(f'{sensor}', rank) + + # Build time series data for the current sensor + experiment_metrics_timeseries_data.build_timeseries() + + def main(): """ """ run() if __name__ == "__main__": - main() + main() \ No newline at end of file From 03a3ef3f9ef5fdd44b67591915edfc5bd690fea6 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 26 Feb 2025 17:08:58 -0700 Subject: [PATCH 17/44] added n_obs_tossed to GSI analysis fit to obs plots and improved figure readability --- .../core_scripts/gsistats_timeseries.py | 2 +- .../plot_gsi_radiance_fit_to_obs.py | 293 ++++++++++++++---- .../style_lib/full_3x3pg.mplstyle | 27 ++ 3 files changed, 261 insertions(+), 61 deletions(-) create mode 100644 src/score_plotting/style_lib/full_3x3pg.mplstyle diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index b3d2047..85f9bb3 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -77,7 +77,7 @@ def get_data_frame(self, start_date, stop_date): if self.select_array_metric_types: request_dict['params']['filters']['array_metric_types'] = { - 'name': {'like': self.array_metric_types} + 'name': {'exact': self.array_metric_types} } if self.select_sat_name: diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 84a52b3..462ba9a 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -17,37 +17,43 @@ from instrument_channel_nums import get_instrument_channels import satellite_names +DA_CYCLE = 6. # hours + +YMIN = -2.5 # temperature (K) +YMAX = 2.5 # temperature (K) + def config(): config_dict = { 'config_path': os.path.join(pathlib.Path(__file__).parent.parent.resolve(), 'style_lib'), - 'config_file': 'agu_full_3pg.mplstyle', + 'config_file': ['full_3x3pg.mplstyle'], 'output_path': os.path.join('/', 'media', 'darr', 'results', 'figures', 'brightness_temperature_error_timeseries'), - 'experiment_list': ['NASA_GEOSIT_GSISTATS', - 'GDAS', + 'experiment_list': ['GDAS', 'replay_observer_diagnostic_v1', + 'NASA_GEOSIT_GSISTATS', 'scout_run_v1' ], - 'color_list': ['#E4002B', '#A2A4A3', '#003087', '#0085CA'], + 'color_list': ['#A2A4A3', '#0085CA', '#E4002B', 'black'], #'fmt_list': ['|', "1", "2"], - 'ls_list': [':', '-.', '--', '-'], - 'lw_list': [0.75, 1.0, 1.25, 1.5], + #'ls_list': [':', '-.', '--', '-'], + 'ls_list': ['-', '-', '-', '-'], + 'lw_list': [2.0, 1.5, 1.0, 0.5], 'sensor_list': get_instrument_channels().keys(),#['amsua'], 'start_date': '1979-01-01 00:00:00', - 'stop_date': '2025-01-01 00:00:00', + 'stop_date': '2026-01-01 00:00:00', } ''' could this be done by string matching for the std/bias etc part? we could have a basic friendly dict for that ''' - friendly_names_dict={"scout_run_v1": "NOAA atmo-scout", - "NASA_GEOSIT_GSISTATS": "NASA GEOS-IT", + friendly_names_dict={"scout_run_v1": "scout run (3DVar)", + "NASA_GEOSIT_GSISTATS": "GEOS-IT", "GDAS": "GDAS", - "replay_observer_diagnostic_v1": "NOAA ROD", + "replay_observer_diagnostic_v1": "UFS Replay", "std_GSIstage_1": "STD", "variance_GSIstage_1": "obs error variance", "bias_post_corr_GSIstage_1": "ME", @@ -64,19 +70,21 @@ def __init__(self): self.experiment_list = self.config_dict['experiment_list'] if self.config_dict['config_path'] and self.config_dict['config_file']: - style_file = os.path.join(self.config_dict['config_path'], - self.config_dict['config_file']) - plt.style.use(style_file) + for style_file in self.config_dict['config_file']: + style_file_path = os.path.join(self.config_dict['config_path'], + style_file) + plt.style.use(style_file_path) def build_timeseries(self): for sensor, channel_list in self.channel_dict.items(): if sensor in self.config_dict['sensor_list']: + experiment_timeseries_datetime_init=None array_metric_list = [f'{sensor}_bias_post_corr_GSIstage_1', f'{sensor}_std_GSIstage_1', f'{sensor}_variance_GSIstage_1', f'{sensor}_sqrt_bias_GSIstage_1', - #f'{sensor}_nobs_used_GSIstage_1', - #f'{sensor}_nobs_tossed_GSIstage_1' + f'{sensor}_nobs_used_GSIstage_1', + f'{sensor}_nobs_tossed_GSIstage_1' ] self.experiment_timeseries_dict = dict() @@ -91,15 +99,20 @@ def build_timeseries(self): experiment_name=experiment, select_array_metric_types=True, array_metric_types=array_metric) + experiment_timeseries_datetime_init = self.experiment_timeseries_dict[ + experiment][array_metric].init_datetime self.experiment_timeseries_dict[experiment][array_metric].build() except KeyError: # remove array_metric from dict if no records returned self.experiment_timeseries_dict.pop(experiment, None) warnings.warn(f'missing {sensor} records for ' f'{experiment} experiment') + + self.db_name = os.getenv('SCORE_POSTGRESQL_DB_NAME') + self.make_figures( + sensor, + init_datetime=experiment_timeseries_datetime_init) - self.make_figures(sensor) - - def make_figures(self, sensor, ncols=2): + def make_figures(self, sensor, ncols=3, init_datetime=None): output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") # Check if the directory exists, and create it if it doesn't @@ -116,28 +129,66 @@ def make_figures(self, sensor, ncols=2): for channel_idx, channel_num in enumerate(self.channel_dict[sensor]): if len(sat_set) > 0: - fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=True, + fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=False, squeeze=False) - fig.suptitle(f"{sensor} channel {channel_num} global mean (left) and RMS (right) error (o - b)") + title_str0 = f"GSI radiance data analysis fit to observations (O-B) [metrics downloaded from RDB {self.db_name}" + + if init_datetime: + init_ctime = init_datetime.ctime() + title_str1 = f" {init_ctime}]" + + else: + title_str1 = "]" + + fig.suptitle(f"{title_str0}{title_str1}") #axes[-1, 0].set_xlabel = 'cycle date (Gregorian)' #axes[-1, 1].set_xlabel = 'cycle date (Gregorian)' for row, sat_sensor in enumerate(sorted(sat_set)): sat_short_name = sat_sensor.split('_')[:-1][0] sat_label = satellite_names.get_longname(sat_short_name) - axes[row, 0].set_title(f"{sat_label} mean error") - axes[row, 1].set_title(f"{sat_label} RMS error") - axes[row, 0].set_ylabel('temperature (K)') - axes[row, 0].axhline(color='black', lw=0.75) - axes[row, 1].axhline(color='black', lw=0.75) + + # subplot titles + axes[row, 0].set_title(f"{sat_label} {sensor} channel {channel_num}") + axes[row, 1].set_title(f"{sat_label} {sensor} channel {channel_num}") + axes[row, 2].set_title(f"{sat_label} {sensor} channel {channel_num}") + + # vertical axes labels + axes[row, 0].set_ylabel('Temperature mean error (K)') + axes[row, 1].set_ylabel('Temperature RMS error (K)') + axes[row, 2].set_ylabel('Number of observations tossed') + rejection_ratio_ax = axes[row, 2].twinx() + rejection_ratio_ax.set_ylabel('Percentage of observations tossed (%%)') + + axes[row, 0].axhline(color='black', lw=0.5) + axes[row, 0].set_ylim(YMIN, YMAX) + axes[row, 1].set_ylim(0, 2.*YMAX) + rejection_ratio_ax.set_ylim(0, 100) + + + axes[row, 0].set_yticks(np.arange(YMIN, YMAX + 0.1, YMAX/5.)) + axes[row, 1].set_yticks(np.arange(0, 2.*YMAX + 0.1, YMAX/5.)) + rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 20)) + + # Set minor ticks at 0.1 intervals + axes[row, 0].set_yticks(np.arange(YMIN, YMAX, 0.1), minor=True) + axes[row, 1].set_yticks(np.arange(0, 2*YMAX, 0.1), minor=True) + rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 5), minor=True) + + # Set ticks on both left and right vertical axes + axes[row, 0].tick_params(axis='y', which='both', left=True, right=True) + axes[row, 1].tick_params(axis='y', which='both', left=True, right=True) + + axes[row, 0].tick_params(axis='x', which='both', top=True, bottom=True) + axes[row, 1].tick_params(axis='x', which='both', top=True, bottom=True) experiment_idx = 0 for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): - yerrs=list() - yerrs2=list() for full_stat_name, timeseries_data in timeseries_dict.items(): for stat_label, value_dict in timeseries_data.value_dict.items(): if stat_label == 'bias_post_corr_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + """ mean error plot + """ bias_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] bias_values = np.array(value_dict[sat_sensor])[:,channel_idx] std_timestamps = timeseries_dict[ @@ -146,41 +197,86 @@ def make_figures(self, sensor, ncols=2): std_values = timeseries_dict[ f'{sensor}_std_GSIstage_1'].value_dict[ 'std_GSIstage_1'][sat_sensor] + + nobs_used_timestamps = timeseries_dict[ + f'{sensor}_nobs_used_GSIstage_1' + ].timestamp_dict['nobs_used_GSIstage_1' + ][sat_sensor] + nobs_used_values = timeseries_dict[ + f'{sensor}_nobs_used_GSIstage_1' + ].value_dict[f'nobs_used_GSIstage_1' + ][sat_sensor] + yerrs=list() + nobs_used_arr=list() for time_idx, bias_timestamp in enumerate(bias_timestamps): - if bias_timestamp in std_timestamps: + if bias_timestamp in std_timestamps and bias_timestamp in nobs_used_timestamps: std_time_idx = std_timestamps.index(bias_timestamp) + nobs_used_time_idx = nobs_used_timestamps.index(bias_timestamp) yerr = np.array(std_values)[std_time_idx, channel_idx] - if yerr: + nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] + if yerr and nobs_used_channel: yerrs.append(yerr) + nobs_used_arr.append(nobs_used_channel) + else: yerrs.append(0) + nobs_used_arr.append(np.nan) else: yerrs.append(0) - - yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) + nobs_used_arr.append(np.nan) + mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) + yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) + nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) + standard_errs = np.array(yerrs_plot) / np.sqrt(nobs_used_plot) axes[row, 0].bar( bias_timestamps, - 2.*yerrs_plot, - width=pd.Timedelta(hours=6), - bottom=mean_values_plot - yerrs_plot, + 2.*standard_errs, + width=pd.Timedelta(hours=DA_CYCLE), + bottom=mean_values_plot - standard_errs, color=self.config_dict['color_list'][experiment_idx], alpha=0.2 ) + ''' + axes[row, 0].barh(np.clip(mean_values_plot, + YMIN, + YMAX), + pd.Timedelta(hours=2), + height=0.1, + left=bias_timestamps - pd.Timedelta(hours=1), + color=self.config_dict['color_list'][experiment_idx], + alpha = 1.0) + ''' + axes[row, 0].plot( + bias_timestamps, + np.clip(mean_values_plot, YMIN, YMAX), + marker='none', + color=self.config_dict['color_list'][experiment_idx], + alpha=0.8, + lw=self.config_dict['lw_list'][experiment_idx], + ls=self.config_dict['ls_list'][experiment_idx], + label=self.friendly_names_dict[experiment] + ) + ''' axes[row, 0].errorbar( bias_timestamps, - mean_values_plot, + np.clip(mean_values_plot, YMIN, YMAX), xerr=pd.Timedelta(hours=3), - fmt=self.config_dict['ls_list'][experiment_idx], - lw=self.config_dict['lw_list'][experiment_idx], + fmt='none',#self.config_dict['ls_list'][experiment_idx], + #lw=self.config_dict['lw_list'][experiment_idx], + elinewidth=self.config_dict['lw_list'][experiment_idx], color=self.config_dict['color_list'][experiment_idx], - alpha = 0.5, + alpha = 1.0, ) + ''' + axes[row,0].legend(loc='upper left') elif stat_label == 'sqrt_bias_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + """RMS error plot + """ rmse_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] rmse_values = np.array(value_dict[sat_sensor])[:,channel_idx] obs_err_var_timestamps = timeseries_dict[ @@ -190,6 +286,7 @@ def make_figures(self, sensor, ncols=2): f'{sensor}_variance_GSIstage_1'].value_dict[ 'variance_GSIstage_1'][sat_sensor] + yerrs2=list() for time_idx, rmse_timestamp in enumerate(rmse_timestamps): if rmse_timestamp in obs_err_var_timestamps: obs_err_var_time_idx = obs_err_var_timestamps.index(rmse_timestamp) @@ -201,49 +298,122 @@ def make_figures(self, sensor, ncols=2): else: yerrs2.append(0) - yerrs_plot = np.sqrt( - np.array( - [0 if x is None else float(x) for x in yerrs2]) - ) + yerrs_plot = np.sqrt(np.array([np.nan if x is None else float(x) for x in yerrs2])) rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) axes[row, 1].bar( rmse_timestamps, 2.*yerrs_plot, - width=pd.Timedelta(hours=6), + width=pd.Timedelta(hours=DA_CYCLE), bottom=rmse_values_plot - yerrs_plot, color=self.config_dict['color_list'][experiment_idx], alpha=0.2 ) - axes[row, 1].errorbar( + axes[row, 1].plot( rmse_timestamps, - rmse_values_plot, - xerr=pd.Timedelta(hours=3), - fmt=self.config_dict['ls_list'][experiment_idx], - lw=self.config_dict['lw_list'][experiment_idx], + np.clip(rmse_values_plot, + 0, 2.*YMAX), + marker='none', color=self.config_dict['color_list'][experiment_idx], - alpha = 0.5, - label=self.friendly_names_dict[experiment] + alpha=1.0, + lw=self.config_dict['lw_list'][experiment_idx], + ls=self.config_dict['ls_list'][experiment_idx], + label=self.friendly_names_dict[experiment], + #xerr=pd.Timedelta(hours=3), + #fmt='none',#,self.config_dict['ls_list'][experiment_idx], + #lw=self.config_dict['lw_list'][experiment_idx], + #elinewidth=self.config_dict['lw_list'][experiment_idx], ) - - axes[row,1].legend(loc=4) + axes[row,1].legend(loc='upper left') + elif stat_label == 'nobs_tossed_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + """ nobs tossed and rejection ratio plot + """ + nobs_tossed_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] + nobs_tossed_values = np.array(value_dict[sat_sensor])[:,channel_idx] + nobs_used_timestamps = timeseries_dict[ + f'{sensor}_nobs_used_GSIstage_1' + ].timestamp_dict['nobs_used_GSIstage_1' + ][sat_sensor] + nobs_used_values = timeseries_dict[ + f'{sensor}_nobs_used_GSIstage_1' + ].value_dict[f'nobs_used_GSIstage_1' + ][sat_sensor] + + nobs_used_arr=list() + nobs_tossed_arr = list() + for time_idx, nobs_tossed_timestamp in enumerate(nobs_tossed_timestamps): + if nobs_tossed_timestamp in nobs_used_timestamps: + nobs_used_time_idx = nobs_used_timestamps.index(nobs_tossed_timestamp) + nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] + if nobs_used_channel: + nobs_used_arr.append(nobs_used_channel) + + else: + nobs_used_arr.append(np.nan) + else: + nobs_used_arr.append(np.nan) + + nobs_tossed_plot = np.array([np.nan if x is None else float(x) for x in nobs_tossed_values]) + nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) + + rejection_rate = nobs_tossed_plot / ( + nobs_used_plot + nobs_tossed_plot) + + axes[row, 2].bar( + nobs_tossed_timestamps, + nobs_tossed_plot, + width=pd.Timedelta(hours=DA_CYCLE), + color=self.config_dict['color_list'][experiment_idx], + alpha=0.2, + label=f"n tossed ({self.friendly_names_dict[experiment]})" + ) + + rejection_ratio_ax.plot( + nobs_tossed_timestamps, + 100.*rejection_rate, + marker='none', + color=self.config_dict['color_list'][experiment_idx], + alpha=0.8, + lw=self.config_dict['lw_list'][experiment_idx], + ls=self.config_dict['ls_list'][experiment_idx], + label=f"%% tossed ({self.friendly_names_dict[experiment]})" + ) + + axes[row,2].legend(loc='upper left') + rejection_ratio_ax.legend(loc='') + axes[row, 0].xaxis.set_major_formatter( mdates.ConciseDateFormatter( axes[row, 0].xaxis.get_major_locator())) axes[row, 1].xaxis.set_major_formatter( mdates.ConciseDateFormatter( axes[row, 1].xaxis.get_major_locator())) + axes[row, 2].xaxis.set_major_formatter( + mdates.ConciseDateFormatter( + axes[row, 2].xaxis.get_major_locator())) experiment_idx += 1 - plt.savefig(os.path.join(self.config_dict['output_path'], + plt.savefig(os.path.join(output_dir, f'gsi_radiance_omb_{sensor}_ch{channel_num}.png'), - dpi=600) + dpi=300) plt.close() -def run(): +def run_microwave_sounders(sensor_list=['amsua', 'amsub', 'atms', 'ssmis']): + prun(sensor_list=sensor_list) + +def run_airs(sensor_list=['airs']): + prun(sensor_list=sensor_list) + +def run_tovs(sensor_list = ['hirs2', 'hirs3', 'hirs4', 'ssu', 'msu']): + prun(sensor_list = sensor_list) + +def run_avhrr(sensor_list = ['avhrr2', 'avhrr3']): + prun(sensor_list=sensor_list) + +def prun(sensor_list=None): # Initialize MPI communicator comm = MPI.COMM_WORLD rank = comm.Get_rank() # Get the rank of the current process @@ -251,9 +421,11 @@ def run(): # Load global configurations and friendly names global_config_dict, global_friendly_names_dict = config() - sensor_list = list() - for sensor in global_config_dict['sensor_list']: - sensor_list.append(sensor) + + if sensor_list==None: + sensor_list = list() + for sensor in global_config_dict['sensor_list']: + sensor_list.append(sensor) # Calculate how many sensors each process should handle sensors_per_process = len(sensor_list) // size @@ -281,11 +453,12 @@ def run(): # Build time series data for the current sensor experiment_metrics_timeseries_data.build_timeseries() - def main(): """ """ - run() + #run_avhrr() + #run_tovs() + run_microwave_sounders() if __name__ == "__main__": main() \ No newline at end of file diff --git a/src/score_plotting/style_lib/full_3x3pg.mplstyle b/src/score_plotting/style_lib/full_3x3pg.mplstyle new file mode 100644 index 0000000..fcc8996 --- /dev/null +++ b/src/score_plotting/style_lib/full_3x3pg.mplstyle @@ -0,0 +1,27 @@ +#### matplotlib style file for AGU figures + +### FONT +font.size : 11 +font.family : sans-serif +font.sans-serif : Arial + +### TEXT +#text.usetex : True + +### AXES +axes.titlesize : 11 +axes.labelsize : 11 + +### TICKS +xtick.labelsize : 11 +ytick.labelsize : 11 + +### Legend +legend.fontsize : 11 + +### FIGURE +figure.titlesize : 11 +figure.figsize : 24, 27.18 + +### SAVING FIGURES +savefig.format : pdf From b30a1f00c647813f82c611fd8261e3e9ed868891 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Tue, 4 Mar 2025 18:33:54 -0700 Subject: [PATCH 18/44] readability improvements to figures including temporal smoothing and explicit mpi4py send/recv (instead of bcast) calls to distribute filtered data frames to child processes --- .../core_scripts/gsistats_timeseries.py | 152 +++++--- .../plot_gsi_radiance_fit_to_obs.py | 363 ++++++++++++++---- .../style_lib/full_3x3pg.mplstyle | 2 +- 3 files changed, 384 insertions(+), 133 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 85f9bb3..b1c902d 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -12,7 +12,7 @@ def extract_unique_stats(strings): parts = s.split('_') # Add the second and last elements to their respective sets - if len(parts) > 1 and parts[-1] != 'None': # Ensure there are at least 2 parts, + if len(parts) > 1: #and parts[-1] != 'None': # Ensure there are at least 2 parts, second_position_set.add('_'.join(parts[1:-2])) last_position_set.add(parts[-1]) @@ -25,8 +25,75 @@ def extract_unique_stats(strings): return unique_positions +def get_data_frame(experiment_list, + array_metric_list, + start_date='1979-01-01 00:00:00', + stop_date='2026-01-01 00:00:00', + select_sat_name=False, + sat_name=None): + """request from the score-db application experiment data + Database requests are submitted via score-db with a request dictionary + """ + request_dict = { + 'db_request_name': 'expt_array_metrics', + 'method': 'GET', + 'params': {'filters': + {'experiment':{ + 'experiment_name': + {'exact': + experiment_list} + }, + 'regions': { + 'name': { + 'exact': ['global'] + }, + }, + + 'time_valid': { + 'from': start_date, + 'to': stop_date, + }, + }, + 'ordering': [ {'name': 'time_valid', 'order_by': 'asc'}] + } + + } + + request_dict['params']['filters']['array_metric_types'] = { + 'name': {'exact': array_metric_list} + } + + if select_sat_name: + request_dict['params']['filters']['sat_meta'] = { + 'sat_name': {'like': sat_name} + } + + db_action_response = score_db_base.handle_request(request_dict) + data_frame = db_action_response.details['records'] + + # sort by timestamp, created at + data_frame.sort_values(by=['expt_name', + 'metric_name', + 'sat_short_name', + 'time_valid', + 'created_at'], + inplace=True) + + # remove duplicate data + data_frame.drop_duplicates(subset=['expt_name', + 'metric_name', + 'sat_short_name', + 'time_valid'], + keep='last', inplace=True) + + return data_frame + class GSIStatsTimeSeries(object): - def __init__(self, start_date, stop_date, + def __init__(self, + start_date, + stop_date, + data_frame=None, + input_data_frame=False, experiment_name = 'scout_run_v1',# #'scout_runs_gsi3dvar_1979stream',# select_array_metric_types = True, @@ -44,60 +111,19 @@ def __init__(self, start_date, stop_date, self.select_sat_name = select_sat_name self.sat_name = sat_name self.experiment_id = experiment_id - self.get_data_frame(start_date, stop_date) - - def get_data_frame(self, start_date, stop_date): - """request from the score-db application experiment data - Database requests are submitted via score-db with a request dictionary - """ - request_dict = { - 'db_request_name': 'expt_array_metrics', - 'method': 'GET', - 'params': {'filters': - {'experiment':{ - 'experiment_name': - {'exact': - self.experiment_name} - }, - 'regions': { - 'name': { - 'exact': ['global'] - }, - }, - - 'time_valid': { - 'from': start_date, - 'to': stop_date, - }, - }, - 'ordering': [ {'name': 'time_valid', 'order_by': 'asc'}] - } + if input_data_frame and type(self.array_metric_types) == str: + self.data_frame = data_frame[( + data_frame['expt_name'] == self.experiment_name) & + (data_frame['metric_name'] == self.array_metric_types)] - } - - if self.select_array_metric_types: - request_dict['params']['filters']['array_metric_types'] = { - 'name': {'exact': self.array_metric_types} - } - - if self.select_sat_name: - request_dict['params']['filters']['sat_meta'] = { - 'sat_name': {'exact': self.sat_name} - } - - db_action_response = score_db_base.handle_request(request_dict) - self.data_frame = db_action_response.details['records'] - - # sort by timestamp, created at - self.data_frame.sort_values(by=['metric_instrument_name', - 'sat_short_name', - 'time_valid', - 'created_at'], - inplace=True) - - # remove duplicate data - self.data_frame.drop_duplicates(subset=['metric_name', 'time_valid'], - keep='last', inplace=True) + else: + self.data_frame = get_data_frame( + [self.experiment_name], + [self.array_metric_types], + start_date=start_date, + stop_date=stop_date, + select_sat_name=self.select_sat_name, + sat_name=self.sat_name) def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): self.unique_stat_list = extract_unique_stats( @@ -109,7 +135,7 @@ def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): for i, stat_name in enumerate(self.unique_stat_list[0]): for j, gsi_stage in enumerate(self.unique_stat_list[1]): self.timestamp_dict[f'{stat_name}_GSIstage_{gsi_stage}'] = dict() - self.timelabel_dict[f'{stat_name}_GSIstage_{gsi_stage}'] = dict() + #self.timelabel_dict[f'{stat_name}_GSIstage_{gsi_stage}'] = dict() self.value_dict[f'{stat_name}_GSIstage_{gsi_stage}'] = dict() for key in self.value_dict.keys(): @@ -119,7 +145,7 @@ def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): sensor_label = f'{sat_short_name}_{instrument_name}' self.timestamp_dict[key][sensor_label] = list() - self.timelabel_dict[key][sensor_label] = list() + #self.timelabel_dict[key][sensor_label] = list() self.value_dict[key][sensor_label] = list() self.sensorlabel_dict = dict() @@ -127,7 +153,7 @@ def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): for row in self.data_frame.itertuples(): metric_name_parts = row.metric_name.split('_') - if metric_name_parts[0] == row.metric_instrument_name and metric_name_parts[-1] != 'None': + if metric_name_parts[0] == row.metric_instrument_name and row.expt_name == self.experiment_name: stat_name = '_'.join(metric_name_parts[1:-2]) gsi_stage = metric_name_parts[-1] @@ -135,9 +161,11 @@ def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): sensor_label = f'{row.sat_short_name}_{row.metric_instrument_name}' timestamp = row.time_valid#.timestamp() - time_label = '%02d-%02d-%04d' % (row.time_valid.month, - row.time_valid.day, - row.time_valid.year,) + + if False: + time_label = '%02d-%02d-%04d' % (row.time_valid.month, + row.time_valid.day, + row.time_valid.year,) if all_channel_mean and all_channel_max: warnings.warn("got both channel mean and max, returning " @@ -158,7 +186,7 @@ def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): #print(gsi_stage, stat_name, sensor_label, time_label, value) self.timestamp_dict[stat_label][sensor_label].append(timestamp) - self.timelabel_dict[stat_label][sensor_label].append(time_label) + #self.timelabel_dict[stat_label][sensor_label].append(time_label) self.value_dict[stat_label][sensor_label].append(value) if not sensor_label in self.sensorlabel_dict.keys(): diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 462ba9a..ea9199d 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -6,6 +6,7 @@ import os import pathlib import warnings +import argparse import numpy as np from matplotlib import pyplot as plt @@ -13,34 +14,41 @@ import pandas as pd from mpi4py import MPI -from gsistats_timeseries import GSIStatsTimeSeries +import gsistats_timeseries from instrument_channel_nums import get_instrument_channels import satellite_names +HOURS_PER_DAY = 24. # hours DA_CYCLE = 6. # hours +DAYS_TO_SMOOTH = 8. # days -YMIN = -2.5 # temperature (K) -YMAX = 2.5 # temperature (K) +def parse_arguments(): + parser = argparse.ArgumentParser( + description='script to create GSI analysis timeseries figures for ' + 'radiance error monitoring') + parser.add_argument('figure_output_path', type=str, + help='path to where figures will be saved.') + args = parser.parse_args() + + return args def config(): + args = parse_arguments() config_dict = { 'config_path': os.path.join(pathlib.Path(__file__).parent.parent.resolve(), 'style_lib'), 'config_file': ['full_3x3pg.mplstyle'], - 'output_path': - os.path.join('/', 'media', 'darr', 'results', 'figures', - 'brightness_temperature_error_timeseries'), + 'output_path': args.figure_output_path, 'experiment_list': ['GDAS', 'replay_observer_diagnostic_v1', 'NASA_GEOSIT_GSISTATS', 'scout_run_v1' ], - 'color_list': ['#A2A4A3', '#0085CA', '#E4002B', 'black'], - #'fmt_list': ['|', "1", "2"], + 'color_list': ['#CFB87C', '#0085CA', '#E4002B', 'black'], #'ls_list': [':', '-.', '--', '-'], - 'ls_list': ['-', '-', '-', '-'], - 'lw_list': [2.0, 1.5, 1.0, 0.5], + 'ls_list': ['-', '-.', '-', '-'], + 'lw_list': [2.5, 2.0, 1.5, 1.0], 'sensor_list': get_instrument_channels().keys(),#['amsua'], 'start_date': '1979-01-01 00:00:00', 'stop_date': '2026-01-01 00:00:00', @@ -61,10 +69,34 @@ def config(): return(config_dict, friendly_names_dict) +def get_data_frame(experiment_list, sensor_list, + start_date='1979-01-01 00:00:00', + stop_date='2026-01-01 00:00:00'): + + array_metric_list = list() + for sensor in sensor_list: + array_metric_list.append(f'{sensor}_bias_post_corr_GSIstage_1') + array_metric_list.append(f'{sensor}_std_GSIstage_1') + array_metric_list.append(f'{sensor}_variance_GSIstage_1') + array_metric_list.append(f'{sensor}_sqrt_bias_GSIstage_1') + array_metric_list.append(f'{sensor}_nobs_used_GSIstage_1') + array_metric_list.append(f'{sensor}_nobs_tossed_GSIstage_1') + array_metric_list.append(f'{sensor}_use_GSIstage_None') + + return gsistats_timeseries.get_data_frame( + experiment_list, + array_metric_list, + start_date=start_date, + stop_date=stop_date, + select_sat_name=False, + sat_name=None) + class GSIRadianceFit2ObsFig(object): """ """ - def __init__(self): + def __init__(self, data_frame=None, input_data_frame=False): + """ + """ self.config_dict, self.friendly_names_dict = config() self.channel_dict = get_instrument_channels() self.experiment_list = self.config_dict['experiment_list'] @@ -74,6 +106,13 @@ def __init__(self): style_file_path = os.path.join(self.config_dict['config_path'], style_file) plt.style.use(style_file_path) + + if input_data_frame: + self.data_frame = data_frame + else: + self.data_frame = get_data_frame(self.experiment_list, + self.config_dict['start_date'], + self.config_dict['stop_date']) def build_timeseries(self): for sensor, channel_list in self.channel_dict.items(): @@ -84,37 +123,42 @@ def build_timeseries(self): f'{sensor}_variance_GSIstage_1', f'{sensor}_sqrt_bias_GSIstage_1', f'{sensor}_nobs_used_GSIstage_1', - f'{sensor}_nobs_tossed_GSIstage_1' + f'{sensor}_nobs_tossed_GSIstage_1', + f'{sensor}_use_GSIstage_None' ] - + self.experiment_timeseries_dict = dict() - for experiment in self.config_dict['experiment_list']: + for experiment in self.experiment_list: self.experiment_timeseries_dict[experiment] = dict() for array_metric in array_metric_list: try: self.experiment_timeseries_dict[ - experiment][array_metric] = GSIStatsTimeSeries( + experiment][array_metric] = gsistats_timeseries.GSIStatsTimeSeries( self.config_dict['start_date'], self.config_dict['stop_date'], + data_frame=self.data_frame, + input_data_frame=True, experiment_name=experiment, - select_array_metric_types=True, array_metric_types=array_metric) experiment_timeseries_datetime_init = self.experiment_timeseries_dict[ experiment][array_metric].init_datetime self.experiment_timeseries_dict[experiment][array_metric].build() except KeyError: # remove array_metric from dict if no records returned - self.experiment_timeseries_dict.pop(experiment, None) + self.experiment_timeseries_dict[experiment].pop( + array_metric, None) warnings.warn(f'missing {sensor} records for ' - f'{experiment} experiment') + f'{experiment} experiment: {array_metric}') self.db_name = os.getenv('SCORE_POSTGRESQL_DB_NAME') self.make_figures( sensor, init_datetime=experiment_timeseries_datetime_init) - def make_figures(self, sensor, ncols=3, init_datetime=None): + def make_figures(self, sensor, ncols=3, init_datetime=None, + alpha_foreground=0.9, + alpha_background=0.25): output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") - + window_size = int(DAYS_TO_SMOOTH * (HOURS_PER_DAY / DA_CYCLE)) # Check if the directory exists, and create it if it doesn't if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -129,8 +173,10 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): for channel_idx, channel_num in enumerate(self.channel_dict[sensor]): if len(sat_set) > 0: + max_yerr=0.1 # temperature (K) fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=False, - squeeze=False) + squeeze=False, + figsize=(2*ncols*3.74, len(sat_set)*4.53)) title_str0 = f"GSI radiance data analysis fit to observations (O-B) [metrics downloaded from RDB {self.db_name}" if init_datetime: @@ -158,21 +204,12 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): axes[row, 1].set_ylabel('Temperature RMS error (K)') axes[row, 2].set_ylabel('Number of observations tossed') rejection_ratio_ax = axes[row, 2].twinx() - rejection_ratio_ax.set_ylabel('Percentage of observations tossed (%%)') + rejection_ratio_ax.set_ylabel('Percentage of observations tossed (%)') axes[row, 0].axhline(color='black', lw=0.5) - axes[row, 0].set_ylim(YMIN, YMAX) - axes[row, 1].set_ylim(0, 2.*YMAX) - rejection_ratio_ax.set_ylim(0, 100) - - axes[row, 0].set_yticks(np.arange(YMIN, YMAX + 0.1, YMAX/5.)) - axes[row, 1].set_yticks(np.arange(0, 2.*YMAX + 0.1, YMAX/5.)) - rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 20)) - - # Set minor ticks at 0.1 intervals - axes[row, 0].set_yticks(np.arange(YMIN, YMAX, 0.1), minor=True) - axes[row, 1].set_yticks(np.arange(0, 2*YMAX, 0.1), minor=True) + rejection_ratio_ax.set_ylim(0, 100) + rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 20)) rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 5), minor=True) # Set ticks on both left and right vertical axes @@ -206,38 +243,73 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): f'{sensor}_nobs_used_GSIstage_1' ].value_dict[f'nobs_used_GSIstage_1' ][sat_sensor] + use_timestamps = timeseries_dict[ + f'{sensor}_use_GSIstage_None'].timestamp_dict[ + 'use_GSIstage_None'][sat_sensor] + use_values = timeseries_dict[ + f'{sensor}_use_GSIstage_None'].value_dict[ + 'use_GSIstage_None'][sat_sensor] yerrs=list() nobs_used_arr=list() + use_flags=list() for time_idx, bias_timestamp in enumerate(bias_timestamps): - if bias_timestamp in std_timestamps and bias_timestamp in nobs_used_timestamps: + if bias_timestamp in std_timestamps: std_time_idx = std_timestamps.index(bias_timestamp) - nobs_used_time_idx = nobs_used_timestamps.index(bias_timestamp) yerr = np.array(std_values)[std_time_idx, channel_idx] - nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] - if yerr and nobs_used_channel: + + if yerr: yerrs.append(yerr) + else: + yerrs.append(np.nan) + else: + yerrs.append(np.nan) + + if bias_timestamp in nobs_used_timestamps: + nobs_used_time_idx = nobs_used_timestamps.index(bias_timestamp) + nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] + + if nobs_used_channel: nobs_used_arr.append(nobs_used_channel) - else: - yerrs.append(0) nobs_used_arr.append(np.nan) else: - yerrs.append(0) nobs_used_arr.append(np.nan) + + if bias_timestamp in use_timestamps: + use_time_idx = use_timestamps.index(bias_timestamp) + use_flag = np.array(use_values)[use_time_idx, channel_idx] - mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) - yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) - nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) - standard_errs = np.array(yerrs_plot) / np.sqrt(nobs_used_plot) + if use_flag: + use_flags.append(use_flag) + else: + use_flags.append(np.nan) + else: + use_flags.append(np.nan) + + use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) + mean_values_plot = np.ma.masked_where( + use_flags_plot < 1, + np.array([np.nan if x is None else float(x) for x in bias_values]) + ) + + yerrs_plot = np.ma.masked_where( + use_flags_plot < 1, + np.array([np.nan if x is None else float(x) for x in yerrs]) + ) + mean_values_smooth = pd.Series( + mean_values_plot, + index=bias_timestamps) + #nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) + #standard_errs = np.array(yerrs_plot) / np.sqrt(nobs_used_plot) axes[row, 0].bar( bias_timestamps, - 2.*standard_errs, + 2.*yerrs_plot, width=pd.Timedelta(hours=DA_CYCLE), - bottom=mean_values_plot - standard_errs, + bottom=mean_values_plot - yerrs_plot, color=self.config_dict['color_list'][experiment_idx], - alpha=0.2 + alpha=alpha_background ) ''' @@ -252,10 +324,22 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): ''' axes[row, 0].plot( bias_timestamps, - np.clip(mean_values_plot, YMIN, YMAX), + mean_values_plot, + marker='none', + color=self.config_dict['color_list'][experiment_idx], + alpha=alpha_background, + lw=0.5, + ls='-', + ) + axes[row, 0].plot( + bias_timestamps, + mean_values_smooth.rolling( + window=window_size, + center=True, + win_type='triang').mean(), marker='none', color=self.config_dict['color_list'][experiment_idx], - alpha=0.8, + alpha=alpha_foreground, lw=self.config_dict['lw_list'][experiment_idx], ls=self.config_dict['ls_list'][experiment_idx], label=self.friendly_names_dict[experiment] @@ -285,46 +369,86 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): obs_err_var_values = timeseries_dict[ f'{sensor}_variance_GSIstage_1'].value_dict[ 'variance_GSIstage_1'][sat_sensor] + use_timestamps = timeseries_dict[ + f'{sensor}_use_GSIstage_None'].timestamp_dict[ + 'use_GSIstage_None'][sat_sensor] + use_values = timeseries_dict[ + f'{sensor}_use_GSIstage_None'].value_dict[ + 'use_GSIstage_None'][sat_sensor] yerrs2=list() + use_flags=list() for time_idx, rmse_timestamp in enumerate(rmse_timestamps): if rmse_timestamp in obs_err_var_timestamps: obs_err_var_time_idx = obs_err_var_timestamps.index(rmse_timestamp) yerr2 = np.array(obs_err_var_values)[obs_err_var_time_idx, channel_idx] + if yerr2: yerrs2.append(yerr) else: yerrs2.append(0) else: yerrs2.append(0) + + if rmse_timestamp in use_timestamps: + use_time_idx = use_timestamps.index(rmse_timestamp) + use_flag = np.array(use_values)[use_time_idx, channel_idx] + + if use_flag: + use_flags.append(use_flag) + else: + use_flags.append(np.nan) + else: + use_flags.append(np.nan) yerrs_plot = np.sqrt(np.array([np.nan if x is None else float(x) for x in yerrs2])) - rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) + use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) + max_yerr = np.max(yerrs_plot, initial=max_yerr) + rmse_values_plot = np.ma.masked_where( + use_flags_plot < 1, + np.array([np.nan if x is None else float(x) for x in rmse_values]) + ) + rmse_values_smooth = pd.Series(rmse_values_plot, + index=rmse_timestamps) + ''' axes[row, 1].bar( rmse_timestamps, - 2.*yerrs_plot, + np.ma.masked_where(use_flags_plot < 1, 2.*yerrs_plot), width=pd.Timedelta(hours=DA_CYCLE), bottom=rmse_values_plot - yerrs_plot, color=self.config_dict['color_list'][experiment_idx], - alpha=0.2 + alpha=alpha_background ) - + ''' + axes[row, 1].plot( rmse_timestamps, - np.clip(rmse_values_plot, - 0, 2.*YMAX), + rmse_values_plot, marker='none', color=self.config_dict['color_list'][experiment_idx], - alpha=1.0, - lw=self.config_dict['lw_list'][experiment_idx], - ls=self.config_dict['ls_list'][experiment_idx], - label=self.friendly_names_dict[experiment], + alpha=alpha_background, + lw=0.5, + ls='-', #xerr=pd.Timedelta(hours=3), #fmt='none',#,self.config_dict['ls_list'][experiment_idx], #lw=self.config_dict['lw_list'][experiment_idx], #elinewidth=self.config_dict['lw_list'][experiment_idx], ) + axes[row, 1].plot( + rmse_timestamps, + rmse_values_smooth.rolling( + window=window_size, + center=True, + win_type='triang' + ).mean(), + marker='none', + color=self.config_dict['color_list'][experiment_idx], + alpha=alpha_foreground, + lw=self.config_dict['lw_list'][experiment_idx], + ls=self.config_dict['ls_list'][experiment_idx], + label=self.friendly_names_dict[experiment], + ) axes[row,1].legend(loc='upper left') elif stat_label == 'nobs_tossed_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): @@ -366,7 +490,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): nobs_tossed_plot, width=pd.Timedelta(hours=DA_CYCLE), color=self.config_dict['color_list'][experiment_idx], - alpha=0.2, + alpha=alpha_background, label=f"n tossed ({self.friendly_names_dict[experiment]})" ) @@ -375,14 +499,14 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): 100.*rejection_rate, marker='none', color=self.config_dict['color_list'][experiment_idx], - alpha=0.8, + alpha=alpha_foreground, lw=self.config_dict['lw_list'][experiment_idx], ls=self.config_dict['ls_list'][experiment_idx], - label=f"%% tossed ({self.friendly_names_dict[experiment]})" + label=f"{self.friendly_names_dict[experiment]}" ) - axes[row,2].legend(loc='upper left') - rejection_ratio_ax.legend(loc='') + #axes[row,2].legend(loc='upper left') + rejection_ratio_ax.legend(loc='upper right') axes[row, 0].xaxis.set_major_formatter( mdates.ConciseDateFormatter( @@ -396,6 +520,17 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): experiment_idx += 1 + for row, sat_sensor in enumerate(sorted(sat_set)): + # set ylim, ticks + axes[row, 1].set_yticks(np.arange(0, 3.*max_yerr + 0.1, 0.1), minor=True) + axes[row, 0].set_yticks(np.arange(-1.5*max_yerr, 1.5*max_yerr + 0.1, 0.1), minor=True) + + axes[row, 0].set_yticks(np.arange(np.around(-1.5*max_yerr - 1), np.around(1.5*max_yerr + 2), 0.5)) + axes[row, 1].set_yticks(np.arange(0, np.around(3.*max_yerr + 2), 0.5)) + + axes[row, 0].set_ylim(-1.5*max_yerr, 1.5*max_yerr) + axes[row, 1].set_ylim(0, 3.*max_yerr) + plt.savefig(os.path.join(output_dir, f'gsi_radiance_omb_{sensor}_ch{channel_num}.png'), dpi=300) @@ -404,6 +539,9 @@ def make_figures(self, sensor, ncols=3, init_datetime=None): def run_microwave_sounders(sensor_list=['amsua', 'amsub', 'atms', 'ssmis']): prun(sensor_list=sensor_list) +def run_atms(sensor_list=['atms']): + prun(sensor_list=sensor_list) + def run_airs(sensor_list=['airs']): prun(sensor_list=sensor_list) @@ -414,6 +552,77 @@ def run_avhrr(sensor_list = ['avhrr2', 'avhrr3']): prun(sensor_list=sensor_list) def prun(sensor_list=None): + # Initialize MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + + # Load global configurations and friendly names + global_config_dict, global_friendly_names_dict = config() + + if sensor_list==None: + sensor_list = list() + for sensor in global_config_dict['sensor_list']: + sensor_list.append(sensor) + + # Rank 0 prepares the data + if rank == 0: + global_data_frame = get_data_frame( + global_config_dict['experiment_list'], + sensor_list, + start_date=global_config_dict['start_date'], + stop_date=global_config_dict['stop_date'] + ) + + # Split the data by sensor (one part per sensor) + data_frame_parts_dict = dict() + for sensor in sensor_list: + data_frame_parts_dict[sensor] = global_data_frame[ + global_data_frame['metric_instrument_name'] == sensor] + + else: + data_frame_parts_dict = None + + # Calculate how many sensors each process should handle + sensors_per_process = len(sensor_list) // size + + # Handle leftover sensors (remaining sensors are distributed to the first few processes) + leftover_sensors = len(sensor_list) % size + + if rank == 0: + for i in range(0, size): + # Calculate the subset of sensors for this rank + start_idx = i * sensors_per_process + min(i, leftover_sensors) + end_idx = start_idx + sensors_per_process + (1 if i < leftover_sensors else 0) + rank_sensors = sensor_list[start_idx:end_idx] + + # Prepare the data for this rank + data_to_send = {sensor: data_frame_parts_dict[sensor] for sensor in rank_sensors} + + if i==0: + local_data_frames = data_to_send + else: + comm.send(data_to_send, dest=i, tag=11+i) + else: + local_data_frames = comm.recv(source=0, tag=11+rank) + + # Each process works on its part of the data + if local_data_frames is not None: + for sensor, data_frame in local_data_frames.items(): + # Only work on data for the specific sensor assigned to the process + ''' + print(f"Rank {rank} is processing sensor: {sensor} and here is " + f"the data frame: {data_frame.metric_instrument_name}") + + ''' + experiment_metrics_timeseries_data = GSIRadianceFit2ObsFig( + data_frame=data_frame, + input_data_frame=True + ) + experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] + experiment_metrics_timeseries_data.build_timeseries() + +def prun0(sensor_list=None): # Initialize MPI communicator comm = MPI.COMM_WORLD rank = comm.Get_rank() # Get the rank of the current process @@ -427,6 +636,16 @@ def prun(sensor_list=None): for sensor in global_config_dict['sensor_list']: sensor_list.append(sensor) + if rank == 0: + global_data_frame = get_data_frame(global_config_dict['experiment_list'], + sensor_list, + start_date=global_config_dict['start_date'], + stop_date=global_config_dict['stop_date']) + else: + global_data_frame = None + + global_data_frame = comm.bcast(global_data_frame, root=0) + # Calculate how many sensors each process should handle sensors_per_process = len(sensor_list) // size @@ -439,15 +658,17 @@ def prun(sensor_list=None): # Slice the sensor list for this process local_sensor_list = sensor_list[start_idx:end_idx] - - # Create an instance of GSIRadianceFit2ObsFig for experiment data - experiment_metrics_timeseries_data = GSIRadianceFit2ObsFig() # Each process handles its portion of the sensor list for sensor in local_sensor_list: + experiment_metrics_timeseries_data = GSIRadianceFit2ObsFig( + data_frame=global_data_frame[ + global_data_frame['metric_instrument_name']==sensor], + input_data_frame=True + ) + # Set the current sensor for the experiment experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] - #print(f'{sensor}', rank) # Build time series data for the current sensor @@ -458,7 +679,9 @@ def main(): """ #run_avhrr() #run_tovs() - run_microwave_sounders() + #run_microwave_sounders() + #run_atms() + prun() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/score_plotting/style_lib/full_3x3pg.mplstyle b/src/score_plotting/style_lib/full_3x3pg.mplstyle index fcc8996..a460648 100644 --- a/src/score_plotting/style_lib/full_3x3pg.mplstyle +++ b/src/score_plotting/style_lib/full_3x3pg.mplstyle @@ -3,7 +3,7 @@ ### FONT font.size : 11 font.family : sans-serif -font.sans-serif : Arial +font.sans-serif : Roboto, Arial ### TEXT #text.usetex : True From 7bbbc89a6abba376dbb88516ad80c9308ccc8b13 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 5 Mar 2025 16:46:32 +0000 Subject: [PATCH 19/44] bug fix for calculation of max obs error --- .../core_scripts/plot_gsi_radiance_fit_to_obs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index ea9199d..2ff5d8f 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -47,7 +47,7 @@ def config(): ], 'color_list': ['#CFB87C', '#0085CA', '#E4002B', 'black'], #'ls_list': [':', '-.', '--', '-'], - 'ls_list': ['-', '-.', '-', '-'], + 'ls_list': ['-', '-', '-', '-'], 'lw_list': [2.5, 2.0, 1.5, 1.0], 'sensor_list': get_instrument_channels().keys(),#['amsua'], 'start_date': '1979-01-01 00:00:00', @@ -386,9 +386,9 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, if yerr2: yerrs2.append(yerr) else: - yerrs2.append(0) + yerrs2.append(np.nan) else: - yerrs2.append(0) + yerrs2.append(np.nan) if rmse_timestamp in use_timestamps: use_time_idx = use_timestamps.index(rmse_timestamp) @@ -403,7 +403,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, yerrs_plot = np.sqrt(np.array([np.nan if x is None else float(x) for x in yerrs2])) use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) - max_yerr = np.max(yerrs_plot, initial=max_yerr) + max_yerr = np.max(np.nan_to_num(yerrs_plot), initial=max_yerr) rmse_values_plot = np.ma.masked_where( use_flags_plot < 1, np.array([np.nan if x is None else float(x) for x in rmse_values]) @@ -679,9 +679,9 @@ def main(): """ #run_avhrr() #run_tovs() - #run_microwave_sounders() + run_microwave_sounders() #run_atms() - prun() + #prun() if __name__ == "__main__": main() From 367416145649898cef5a818d90b3097de6c192fb Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 5 Mar 2025 13:59:04 -0700 Subject: [PATCH 20/44] interactive plotting with cli options for single satellite, sensor and channel --- .../core_scripts/gsistats_timeseries.py | 4 +- .../plot_gsi_radiance_fit_to_obs.py | 121 ++++++++++++++---- 2 files changed, 96 insertions(+), 29 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index b1c902d..4838f7a 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -65,7 +65,7 @@ def get_data_frame(experiment_list, if select_sat_name: request_dict['params']['filters']['sat_meta'] = { - 'sat_name': {'like': sat_name} + 'sat_name': {'exact': sat_name} } db_action_response = score_db_base.handle_request(request_dict) @@ -130,7 +130,7 @@ def build(self, all_channel_max=False, all_channel_mean=False, by_channel=True): set(self.data_frame['metric_name'])) self.timestamp_dict = dict() - self.timelabel_dict = dict() + #self.timelabel_dict = dict() self.value_dict = dict() for i, stat_name in enumerate(self.unique_stat_list[0]): for j, gsi_stage in enumerate(self.unique_stat_list[1]): diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 2ff5d8f..7cb82cd 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -22,18 +22,10 @@ DA_CYCLE = 6. # hours DAYS_TO_SMOOTH = 8. # days -def parse_arguments(): - parser = argparse.ArgumentParser( - description='script to create GSI analysis timeseries figures for ' - 'radiance error monitoring') - parser.add_argument('figure_output_path', type=str, - help='path to where figures will be saved.') - args = parser.parse_args() - - return args +import argparse def config(): - args = parse_arguments() + args = parse_arguments() config_dict = { 'config_path': os.path.join(pathlib.Path(__file__).parent.parent.resolve(), @@ -68,10 +60,42 @@ def config(): "sqrt_bias_GSIstage_1": "RMSE"} return(config_dict, friendly_names_dict) + +def parse_arguments(): + parser = argparse.ArgumentParser( + description='Script to create GSI analysis timeseries figures for ' + 'radiance error monitoring') + + # Make figure_output_path optional (defaults to $HOME) + parser.add_argument('figure_output_path', type=str, nargs='?', + default=pathlib.Path.home(), + help='Path to where figures will be saved') + + # Add an argument for interactive plotting + parser.add_argument('--interactive', action='store_true', + help='Enable interactive plotting. If specified, plots will be displayed interactively.') + + # Add optional argument for satellite + parser.add_argument('--satellite', type=str, default='all', + help='Satellite name') + + # Add optional argument for channel + parser.add_argument('--channel', type=int, default='all', + help='Channel number (e.g., 1, 2, 3, ...)') + + # Add optional argument for sensor + parser.add_argument('--sensor', type=str, default='all', + help='Sensor name (e.g., atms, amsua)') + + args = parser.parse_args() + + return args def get_data_frame(experiment_list, sensor_list, start_date='1979-01-01 00:00:00', - stop_date='2026-01-01 00:00:00'): + stop_date='2026-01-01 00:00:00', + select_sat_name=False, + sat_name=None): array_metric_list = list() for sensor in sensor_list: @@ -88,8 +112,8 @@ def get_data_frame(experiment_list, sensor_list, array_metric_list, start_date=start_date, stop_date=stop_date, - select_sat_name=False, - sat_name=None) + select_sat_name=select_sat_name, + sat_name=sat_name) class GSIRadianceFit2ObsFig(object): """ @@ -114,7 +138,7 @@ def __init__(self, data_frame=None, input_data_frame=False): self.config_dict['start_date'], self.config_dict['stop_date']) - def build_timeseries(self): + def build_timeseries(self, interactive_figure=False): for sensor, channel_list in self.channel_dict.items(): if sensor in self.config_dict['sensor_list']: experiment_timeseries_datetime_init=None @@ -152,11 +176,13 @@ def build_timeseries(self): self.db_name = os.getenv('SCORE_POSTGRESQL_DB_NAME') self.make_figures( sensor, - init_datetime=experiment_timeseries_datetime_init) + init_datetime=experiment_timeseries_datetime_init, + interactive=interactive_figure) def make_figures(self, sensor, ncols=3, init_datetime=None, alpha_foreground=0.9, - alpha_background=0.25): + alpha_background=0.25, + interactive=False): output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") window_size = int(DAYS_TO_SMOOTH * (HOURS_PER_DAY / DA_CYCLE)) # Check if the directory exists, and create it if it doesn't @@ -522,16 +548,35 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, for row, sat_sensor in enumerate(sorted(sat_set)): # set ylim, ticks - axes[row, 1].set_yticks(np.arange(0, 3.*max_yerr + 0.1, 0.1), minor=True) - axes[row, 0].set_yticks(np.arange(-1.5*max_yerr, 1.5*max_yerr + 0.1, 0.1), minor=True) - - axes[row, 0].set_yticks(np.arange(np.around(-1.5*max_yerr - 1), np.around(1.5*max_yerr + 2), 0.5)) - axes[row, 1].set_yticks(np.arange(0, np.around(3.*max_yerr + 2), 0.5)) + axes[row, 0].set_yticks( + np.arange(np.around(-1.5 * max_yerr - 0.2, decimals=1), + 1.5 * max_yerr + 0.2, + 0.1), + minor=True + ) + + axes[row, 1].set_yticks( + np.arange(0, 3. * max_yerr + 0.1, 0.1), + minor=True + ) + + axes[row, 0].set_yticks( + np.arange(np.around(-1.5 * max_yerr - 1), + 1.5*max_yerr + 1, + 0.5) + ) + + axes[row, 1].set_yticks( + np.arange(0, 3. * max_yerr + 1, 0.5) + ) axes[row, 0].set_ylim(-1.5*max_yerr, 1.5*max_yerr) axes[row, 1].set_ylim(0, 3.*max_yerr) - plt.savefig(os.path.join(output_dir, + if interactive: + plt.show() + else: + plt.savefig(os.path.join(output_dir, f'gsi_radiance_omb_{sensor}_ch{channel_num}.png'), dpi=300) plt.close() @@ -552,6 +597,8 @@ def run_avhrr(sensor_list = ['avhrr2', 'avhrr3']): prun(sensor_list=sensor_list) def prun(sensor_list=None): + args = parse_arguments() + # Initialize MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() @@ -560,10 +607,16 @@ def prun(sensor_list=None): # Load global configurations and friendly names global_config_dict, global_friendly_names_dict = config() + if args.sensor != 'all': + sensor_list = [args.sensor] + if sensor_list==None: sensor_list = list() for sensor in global_config_dict['sensor_list']: sensor_list.append(sensor) + + if args.satellite != 'all': + select_sat_name = True # Rank 0 prepares the data if rank == 0: @@ -571,8 +624,9 @@ def prun(sensor_list=None): global_config_dict['experiment_list'], sensor_list, start_date=global_config_dict['start_date'], - stop_date=global_config_dict['stop_date'] - ) + stop_date=global_config_dict['stop_date'], + select_sat_name = select_sat_name, + sat_name=args.satellite) # Split the data by sensor (one part per sensor) data_frame_parts_dict = dict() @@ -619,10 +673,13 @@ def prun(sensor_list=None): data_frame=data_frame, input_data_frame=True ) + if args.channel != 'all': + experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] - experiment_metrics_timeseries_data.build_timeseries() + experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive) def prun0(sensor_list=None): + args = parser.parse_args() # Initialize MPI communicator comm = MPI.COMM_WORLD rank = comm.Get_rank() # Get the rank of the current process @@ -631,16 +688,24 @@ def prun0(sensor_list=None): # Load global configurations and friendly names global_config_dict, global_friendly_names_dict = config() + if args.sensor != 'all': + sensor_list = [args.sensor] + if sensor_list==None: sensor_list = list() for sensor in global_config_dict['sensor_list']: sensor_list.append(sensor) + + if args.satellite != 'all': + select_sat_name = True if rank == 0: global_data_frame = get_data_frame(global_config_dict['experiment_list'], sensor_list, start_date=global_config_dict['start_date'], - stop_date=global_config_dict['stop_date']) + stop_date=global_config_dict['stop_date'], + select_sat_name = select_sat_name, + sat_name=args.satellite) else: global_data_frame = None @@ -666,13 +731,15 @@ def prun0(sensor_list=None): global_data_frame['metric_instrument_name']==sensor], input_data_frame=True ) + if args.channel != 'all': + experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} # Set the current sensor for the experiment experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] #print(f'{sensor}', rank) # Build time series data for the current sensor - experiment_metrics_timeseries_data.build_timeseries() + experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive) def main(): """ From 7176228345389394347bc2b28bd06ef75d1b6254 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Thu, 6 Mar 2025 18:59:34 -0700 Subject: [PATCH 21/44] add usage to plot background --- .../plot_gsi_radiance_fit_to_obs.py | 91 ++++++++++++------- 1 file changed, 56 insertions(+), 35 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 7cb82cd..82f079f 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -40,7 +40,7 @@ def config(): 'color_list': ['#CFB87C', '#0085CA', '#E4002B', 'black'], #'ls_list': [':', '-.', '--', '-'], 'ls_list': ['-', '-', '-', '-'], - 'lw_list': [2.5, 2.0, 1.5, 1.0], + 'lw_list': [4.0, 3.0, 2.0, 1.0], 'sensor_list': get_instrument_channels().keys(),#['amsua'], 'start_date': '1979-01-01 00:00:00', 'stop_date': '2026-01-01 00:00:00', @@ -181,10 +181,11 @@ def build_timeseries(self, interactive_figure=False): def make_figures(self, sensor, ncols=3, init_datetime=None, alpha_foreground=0.9, - alpha_background=0.25, + alpha_background=0.3, interactive=False): output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") window_size = int(DAYS_TO_SMOOTH * (HOURS_PER_DAY / DA_CYCLE)) + vbar_width = pd.Timedelta(hours=DA_CYCLE) # Check if the directory exists, and create it if it doesn't if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -261,6 +262,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, f'{sensor}_std_GSIstage_1'].value_dict[ 'std_GSIstage_1'][sat_sensor] + nobs_used_timestamps = timeseries_dict[ f'{sensor}_nobs_used_GSIstage_1' ].timestamp_dict['nobs_used_GSIstage_1' @@ -269,6 +271,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, f'{sensor}_nobs_used_GSIstage_1' ].value_dict[f'nobs_used_GSIstage_1' ][sat_sensor] + use_timestamps = timeseries_dict[ f'{sensor}_use_GSIstage_None'].timestamp_dict[ 'use_GSIstage_None'][sat_sensor] @@ -291,6 +294,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, else: yerrs.append(np.nan) + if bias_timestamp in nobs_used_timestamps: nobs_used_time_idx = nobs_used_timestamps.index(bias_timestamp) nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] @@ -314,26 +318,38 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, use_flags.append(np.nan) use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) - mean_values_plot = np.ma.masked_where( - use_flags_plot < 1, - np.array([np.nan if x is None else float(x) for x in bias_values]) - ) - - yerrs_plot = np.ma.masked_where( - use_flags_plot < 1, - np.array([np.nan if x is None else float(x) for x in yerrs]) - ) + mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) + yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) + nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) + standard_errs = np.array(yerrs_plot) / np.sqrt(nobs_used_plot) + mean_values_smooth = pd.Series( mean_values_plot, - index=bias_timestamps) - #nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) - #standard_errs = np.array(yerrs_plot) / np.sqrt(nobs_used_plot) - + index=bias_timestamps).rolling( + window=window_size, + center=True, + win_type='triang').mean() + + standard_errs_times_2 = 2.*standard_errs + yerr_bot = mean_values_plot - standard_errs + + use_flags_plot_mask = np.ma.masked_where( + use_flags_plot < 1, 30.*use_flags_plot) + axes[row, 0].bar( bias_timestamps, - 2.*yerrs_plot, - width=pd.Timedelta(hours=DA_CYCLE), - bottom=mean_values_plot - yerrs_plot, + use_flags_plot_mask, + width=vbar_width, + bottom=-15, + color=self.config_dict['color_list'][experiment_idx], + alpha=0.5*alpha_background + ) + + axes[row, 0].bar( + bias_timestamps, + standard_errs_times_2, + width=vbar_width, + bottom=yerr_bot, color=self.config_dict['color_list'][experiment_idx], alpha=alpha_background ) @@ -359,10 +375,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, ) axes[row, 0].plot( bias_timestamps, - mean_values_smooth.rolling( - window=window_size, - center=True, - win_type='triang').mean(), + mean_values_smooth, marker='none', color=self.config_dict['color_list'][experiment_idx], alpha=alpha_foreground, @@ -430,12 +443,15 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, yerrs_plot = np.sqrt(np.array([np.nan if x is None else float(x) for x in yerrs2])) use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) max_yerr = np.max(np.nan_to_num(yerrs_plot), initial=max_yerr) - rmse_values_plot = np.ma.masked_where( - use_flags_plot < 1, - np.array([np.nan if x is None else float(x) for x in rmse_values]) - ) + rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) rmse_values_smooth = pd.Series(rmse_values_plot, - index=rmse_timestamps) + index=rmse_timestamps).rolling( + window=window_size, + center=True, + win_type='triang' + ).mean() + use_flags_plot_mask = np.ma.masked_where( + use_flags_plot < 1, 30.*use_flags_plot) ''' axes[row, 1].bar( @@ -448,6 +464,15 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, ) ''' + axes[row, 1].bar( + rmse_timestamps, + use_flags_plot_mask, + width=vbar_width, + bottom=0, + color=self.config_dict['color_list'][experiment_idx], + alpha=0.5*alpha_background + ) + axes[row, 1].plot( rmse_timestamps, rmse_values_plot, @@ -463,11 +488,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, ) axes[row, 1].plot( rmse_timestamps, - rmse_values_smooth.rolling( - window=window_size, - center=True, - win_type='triang' - ).mean(), + rmse_values_smooth, marker='none', color=self.config_dict['color_list'][experiment_idx], alpha=alpha_foreground, @@ -508,7 +529,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, nobs_tossed_plot = np.array([np.nan if x is None else float(x) for x in nobs_tossed_values]) nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) - rejection_rate = nobs_tossed_plot / ( + rejection_percent = (100.*nobs_tossed_plot) / ( nobs_used_plot + nobs_tossed_plot) axes[row, 2].bar( @@ -522,7 +543,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, rejection_ratio_ax.plot( nobs_tossed_timestamps, - 100.*rejection_rate, + rejection_percent, marker='none', color=self.config_dict['color_list'][experiment_idx], alpha=alpha_foreground, @@ -581,7 +602,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, dpi=300) plt.close() -def run_microwave_sounders(sensor_list=['amsua', 'amsub', 'atms', 'ssmis']): +def run_microwave_sounders(sensor_list=['amsua', 'amsub', 'atms', 'ssmi', 'ssmis']): prun(sensor_list=sensor_list) def run_atms(sensor_list=['atms']): From e97f64191676aa8246046dc2d6b2d4261427b791 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 7 Mar 2025 02:03:12 +0000 Subject: [PATCH 22/44] sbatch script for generating GSI radiance fit to obs figures --- .../plot_gsi_radiance_fit_to_obs.sbatch | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch new file mode 100644 index 0000000..1d8f324 --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch @@ -0,0 +1,17 @@ +#!/bin/bash +#SBATCH --job-name=gsi_radiance_plots # Name of the job +#SBATCH --output=plot_gsi%j.log # Output log file with job ID +#SBATCH --ntasks=12 +#SBATCH --ntasks-per-node=12 # Number of tasks (CPUs) per node +#SBATCH --cpus-per-task=2 +#SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) +#SBATCH --time=48:00:00 # Max wall time (adjust as needed) +#SBATCH --partition=batch # Partition to use +#SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails +#SBATCH --mail-user=Adam.Schneider@noaa.gov # Email address for notifications + +#micromamba activate darr_score_sqlalchemy1_env + +# Run the MPI Python script using srun +mpirun python plot_gsi_radiance_fit_to_obs.py /media/darr/results/figures/brightness_temperature_error_timeseries2 + From 5fe520f4501c684f50bb61b4cc13850c9e283f25 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 7 Mar 2025 03:39:57 +0000 Subject: [PATCH 23/44] fixes for noninteractive batch jobs --- .../core_scripts/plot_gsi_radiance_fit_to_obs.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 82f079f..6abdf2d 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -80,7 +80,7 @@ def parse_arguments(): help='Satellite name') # Add optional argument for channel - parser.add_argument('--channel', type=int, default='all', + parser.add_argument('--channel', type=int, default=9999, help='Channel number (e.g., 1, 2, 3, ...)') # Add optional argument for sensor @@ -204,11 +204,11 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=False, squeeze=False, figsize=(2*ncols*3.74, len(sat_set)*4.53)) - title_str0 = f"GSI radiance data analysis fit to observations (O-B) [metrics downloaded from RDB {self.db_name}" + title_str0 = f"GSI radiance data analysis fit to observations (O-B) [metrics downloaded from {self.db_name}" if init_datetime: init_ctime = init_datetime.ctime() - title_str1 = f" {init_ctime}]" + title_str1 = f" {init_ctime} UTC]" else: title_str1 = "]" @@ -638,6 +638,8 @@ def prun(sensor_list=None): if args.satellite != 'all': select_sat_name = True + else: + select_sat_name = False # Rank 0 prepares the data if rank == 0: @@ -694,7 +696,7 @@ def prun(sensor_list=None): data_frame=data_frame, input_data_frame=True ) - if args.channel != 'all': + if args.channel != 9999: experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive) @@ -752,7 +754,7 @@ def prun0(sensor_list=None): global_data_frame['metric_instrument_name']==sensor], input_data_frame=True ) - if args.channel != 'all': + if args.channel != 9999: experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} # Set the current sensor for the experiment From 1701d4073bec4edd871b0d840a1a2db9bb7c78ca Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 12 Mar 2025 12:31:37 -0600 Subject: [PATCH 24/44] support for O-A statistics and plotting n_obs_used instead of n_obs_tossed --- .../core_scripts/gsistats_timeseries.py | 2 +- .../plot_gsi_radiance_fit_to_obs.py | 351 +++++++++--------- 2 files changed, 179 insertions(+), 174 deletions(-) diff --git a/src/score_plotting/core_scripts/gsistats_timeseries.py b/src/score_plotting/core_scripts/gsistats_timeseries.py index 4838f7a..7888964 100755 --- a/src/score_plotting/core_scripts/gsistats_timeseries.py +++ b/src/score_plotting/core_scripts/gsistats_timeseries.py @@ -70,7 +70,7 @@ def get_data_frame(experiment_list, db_action_response = score_db_base.handle_request(request_dict) data_frame = db_action_response.details['records'] - + # sort by timestamp, created at data_frame.sort_values(by=['expt_name', 'metric_name', diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 6abdf2d..b92ba1f 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -32,18 +32,18 @@ def config(): 'style_lib'), 'config_file': ['full_3x3pg.mplstyle'], 'output_path': args.figure_output_path, - 'experiment_list': ['GDAS', + 'experiment_list': ['NASA_GEOSIT_GSISTATS', + 'GDAS', 'replay_observer_diagnostic_v1', - 'NASA_GEOSIT_GSISTATS', 'scout_run_v1' ], - 'color_list': ['#CFB87C', '#0085CA', '#E4002B', 'black'], + 'color_list': ['#E4002B', '#CFB87C','#0085CA', 'black'], #'ls_list': [':', '-.', '--', '-'], 'ls_list': ['-', '-', '-', '-'], 'lw_list': [4.0, 3.0, 2.0, 1.0], 'sensor_list': get_instrument_channels().keys(),#['amsua'], - 'start_date': '1979-01-01 00:00:00', - 'stop_date': '2026-01-01 00:00:00', + 'start_date': '2018-01-01 00:00:00', + 'stop_date': '2022-01-01 00:00:00', } ''' @@ -86,6 +86,9 @@ def parse_arguments(): # Add optional argument for sensor parser.add_argument('--sensor', type=str, default='all', help='Sensor name (e.g., atms, amsua)') + + parser.add_argument('--gsi_stage', type=int, default=1, + help='GSI analysis iteration') args = parser.parse_args() @@ -95,16 +98,18 @@ def get_data_frame(experiment_list, sensor_list, start_date='1979-01-01 00:00:00', stop_date='2026-01-01 00:00:00', select_sat_name=False, - sat_name=None): + sat_name=None, + gsi_it=1): + gsi_it = int(gsi_it) array_metric_list = list() for sensor in sensor_list: - array_metric_list.append(f'{sensor}_bias_post_corr_GSIstage_1') - array_metric_list.append(f'{sensor}_std_GSIstage_1') - array_metric_list.append(f'{sensor}_variance_GSIstage_1') - array_metric_list.append(f'{sensor}_sqrt_bias_GSIstage_1') - array_metric_list.append(f'{sensor}_nobs_used_GSIstage_1') - array_metric_list.append(f'{sensor}_nobs_tossed_GSIstage_1') + array_metric_list.append(f'{sensor}_bias_post_corr_GSIstage_{gsi_it}') + array_metric_list.append(f'{sensor}_std_GSIstage_{gsi_it}') + array_metric_list.append(f'{sensor}_variance_GSIstage_{gsi_it}') + array_metric_list.append(f'{sensor}_sqrt_bias_GSIstage_{gsi_it}') + array_metric_list.append(f'{sensor}_nobs_used_GSIstage_{gsi_it}') + array_metric_list.append(f'{sensor}_nobs_tossed_GSIstage_{gsi_it}') array_metric_list.append(f'{sensor}_use_GSIstage_None') return gsistats_timeseries.get_data_frame( @@ -118,9 +123,11 @@ def get_data_frame(experiment_list, sensor_list, class GSIRadianceFit2ObsFig(object): """ """ - def __init__(self, data_frame=None, input_data_frame=False): + def __init__(self, data_frame=None, input_data_frame=False, + gsi_it=1): """ """ + self.gsi_it = int(gsi_it) self.config_dict, self.friendly_names_dict = config() self.channel_dict = get_instrument_channels() self.experiment_list = self.config_dict['experiment_list'] @@ -136,18 +143,19 @@ def __init__(self, data_frame=None, input_data_frame=False): else: self.data_frame = get_data_frame(self.experiment_list, self.config_dict['start_date'], - self.config_dict['stop_date']) + self.config_dict['stop_date'], + gsi_it=self.gsi_it) def build_timeseries(self, interactive_figure=False): for sensor, channel_list in self.channel_dict.items(): if sensor in self.config_dict['sensor_list']: experiment_timeseries_datetime_init=None - array_metric_list = [f'{sensor}_bias_post_corr_GSIstage_1', - f'{sensor}_std_GSIstage_1', - f'{sensor}_variance_GSIstage_1', - f'{sensor}_sqrt_bias_GSIstage_1', - f'{sensor}_nobs_used_GSIstage_1', - f'{sensor}_nobs_tossed_GSIstage_1', + array_metric_list = [f'{sensor}_bias_post_corr_GSIstage_{self.gsi_it}', + f'{sensor}_std_GSIstage_{self.gsi_it}', + f'{sensor}_variance_GSIstage_{self.gsi_it}', + f'{sensor}_sqrt_bias_GSIstage_{self.gsi_it}', + f'{sensor}_nobs_used_GSIstage_{self.gsi_it}', + f'{sensor}_nobs_tossed_GSIstage_{self.gsi_it}', f'{sensor}_use_GSIstage_None' ] @@ -184,7 +192,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, alpha_background=0.3, interactive=False): output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") - window_size = int(DAYS_TO_SMOOTH * (HOURS_PER_DAY / DA_CYCLE)) + window_size = pd.Timedelta(hours=24.*DAYS_TO_SMOOTH) vbar_width = pd.Timedelta(hours=DA_CYCLE) # Check if the directory exists, and create it if it doesn't if not os.path.exists(output_dir): @@ -200,11 +208,15 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, for channel_idx, channel_num in enumerate(self.channel_dict[sensor]): if len(sat_set) > 0: - max_yerr=0.1 # temperature (K) + max_yerr=0.5 # temperature (K) fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=False, squeeze=False, figsize=(2*ncols*3.74, len(sat_set)*4.53)) - title_str0 = f"GSI radiance data analysis fit to observations (O-B) [metrics downloaded from {self.db_name}" + + if self.gsi_it == 1: + title_str0 = f"GSI radiance data analysis fit to observations (O-B) [metrics downloaded from {self.db_name}" + elif self.gsi_it >= 2: + title_str0 = f"GSI radiance data analysis fit to observations (O-A) [metrics downloaded from {self.db_name}" if init_datetime: init_ctime = init_datetime.ctime() @@ -229,15 +241,15 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, # vertical axes labels axes[row, 0].set_ylabel('Temperature mean error (K)') axes[row, 1].set_ylabel('Temperature RMS error (K)') - axes[row, 2].set_ylabel('Number of observations tossed') - rejection_ratio_ax = axes[row, 2].twinx() - rejection_ratio_ax.set_ylabel('Percentage of observations tossed (%)') + axes[row, 2].set_ylabel('Number of observations used') + #rejection_ratio_ax = axes[row, 2].twinx() + #rejection_ratio_ax.set_ylabel('Percentage of observations tossed (%)') axes[row, 0].axhline(color='black', lw=0.5) - rejection_ratio_ax.set_ylim(0, 100) - rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 20)) - rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 5), minor=True) + #rejection_ratio_ax.set_ylim(0, 100) + #rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 20)) + #rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 5), minor=True) # Set ticks on both left and right vertical axes axes[row, 0].tick_params(axis='y', which='both', left=True, right=True) @@ -250,26 +262,25 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): for full_stat_name, timeseries_data in timeseries_dict.items(): for stat_label, value_dict in timeseries_data.value_dict.items(): - if stat_label == 'bias_post_corr_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + if stat_label == f'bias_post_corr_GSIstage_{self.gsi_it}' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): """ mean error plot """ bias_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] bias_values = np.array(value_dict[sat_sensor])[:,channel_idx] std_timestamps = timeseries_dict[ - f'{sensor}_std_GSIstage_1'].timestamp_dict[ - 'std_GSIstage_1'][sat_sensor] + f'{sensor}_std_GSIstage_{self.gsi_it}'].timestamp_dict[ + f'std_GSIstage_{self.gsi_it}'][sat_sensor] std_values = timeseries_dict[ - f'{sensor}_std_GSIstage_1'].value_dict[ - 'std_GSIstage_1'][sat_sensor] + f'{sensor}_std_GSIstage_{self.gsi_it}'].value_dict[ + f'std_GSIstage_{self.gsi_it}'][sat_sensor] - nobs_used_timestamps = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_1' - ].timestamp_dict['nobs_used_GSIstage_1' + f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' + ].timestamp_dict[f'nobs_used_GSIstage_{self.gsi_it}' ][sat_sensor] nobs_used_values = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_1' - ].value_dict[f'nobs_used_GSIstage_1' + f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' + ].value_dict[f'nobs_used_GSIstage_{self.gsi_it}' ][sat_sensor] use_timestamps = timeseries_dict[ @@ -287,19 +298,18 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, std_time_idx = std_timestamps.index(bias_timestamp) yerr = np.array(std_values)[std_time_idx, channel_idx] - if yerr: + if yerr is not None: yerrs.append(yerr) else: yerrs.append(np.nan) else: yerrs.append(np.nan) - if bias_timestamp in nobs_used_timestamps: nobs_used_time_idx = nobs_used_timestamps.index(bias_timestamp) nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] - if nobs_used_channel: + if nobs_used_channel is not None: nobs_used_arr.append(nobs_used_channel) else: nobs_used_arr.append(np.nan) @@ -310,32 +320,34 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, use_time_idx = use_timestamps.index(bias_timestamp) use_flag = np.array(use_values)[use_time_idx, channel_idx] - if use_flag: + if use_flag is not None: use_flags.append(use_flag) else: use_flags.append(np.nan) else: use_flags.append(np.nan) - use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) + #use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) - yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) - nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) - standard_errs = np.array(yerrs_plot) / np.sqrt(nobs_used_plot) + #yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) + #nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) + standard_errs = np.array(yerrs) / np.sqrt(nobs_used_arr) mean_values_smooth = pd.Series( - mean_values_plot, + np.ma.masked_where( + np.array(use_flags) < 1, + mean_values_plot), index=bias_timestamps).rolling( window=window_size, + min_periods=1, center=True, - win_type='triang').mean() + #win_type='triang' + ).mean() standard_errs_times_2 = 2.*standard_errs yerr_bot = mean_values_plot - standard_errs - use_flags_plot_mask = np.ma.masked_where( - use_flags_plot < 1, 30.*use_flags_plot) - + ''' axes[row, 0].bar( bias_timestamps, use_flags_plot_mask, @@ -344,7 +356,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, color=self.config_dict['color_list'][experiment_idx], alpha=0.5*alpha_background ) - + ''' axes[row, 0].bar( bias_timestamps, standard_errs_times_2, @@ -395,19 +407,19 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, alpha = 1.0, ) ''' - axes[row,0].legend(loc='upper left') + axes[row,0].legend(loc='lower right') - elif stat_label == 'sqrt_bias_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + elif stat_label == f'sqrt_bias_GSIstage_{self.gsi_it}' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): """RMS error plot """ rmse_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] rmse_values = np.array(value_dict[sat_sensor])[:,channel_idx] obs_err_var_timestamps = timeseries_dict[ - f'{sensor}_variance_GSIstage_1'].timestamp_dict[ - 'variance_GSIstage_1'][sat_sensor] + f'{sensor}_variance_GSIstage_{self.gsi_it}'].timestamp_dict[ + f'variance_GSIstage_{self.gsi_it}'][sat_sensor] obs_err_var_values = timeseries_dict[ - f'{sensor}_variance_GSIstage_1'].value_dict[ - 'variance_GSIstage_1'][sat_sensor] + f'{sensor}_variance_GSIstage_{self.gsi_it}'].value_dict[ + f'variance_GSIstage_{self.gsi_it}'][sat_sensor] use_timestamps = timeseries_dict[ f'{sensor}_use_GSIstage_None'].timestamp_dict[ 'use_GSIstage_None'][sat_sensor] @@ -422,8 +434,8 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, obs_err_var_time_idx = obs_err_var_timestamps.index(rmse_timestamp) yerr2 = np.array(obs_err_var_values)[obs_err_var_time_idx, channel_idx] - if yerr2: - yerrs2.append(yerr) + if yerr2 is not None: + yerrs2.append(yerr2) else: yerrs2.append(np.nan) else: @@ -433,25 +445,27 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, use_time_idx = use_timestamps.index(rmse_timestamp) use_flag = np.array(use_values)[use_time_idx, channel_idx] - if use_flag: + if use_flag is not None: use_flags.append(use_flag) else: use_flags.append(np.nan) else: use_flags.append(np.nan) - yerrs_plot = np.sqrt(np.array([np.nan if x is None else float(x) for x in yerrs2])) - use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) - max_yerr = np.max(np.nan_to_num(yerrs_plot), initial=max_yerr) + #yerrs_plot = np.sqrt(np.array([np.nan if x is None else float(x) for x in yerrs2])) + #use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) + max_yerr = np.max(np.nan_to_num(np.sqrt(yerrs2)), initial=max_yerr) rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) - rmse_values_smooth = pd.Series(rmse_values_plot, - index=rmse_timestamps).rolling( - window=window_size, - center=True, - win_type='triang' - ).mean() - use_flags_plot_mask = np.ma.masked_where( - use_flags_plot < 1, 30.*use_flags_plot) + rmse_values_smooth = pd.Series( + np.ma.masked_where( + np.array(use_flags) < 1, + rmse_values_plot), + index=rmse_timestamps).rolling( + window=window_size, + min_periods=1, + center=True, + #win_type='triang' + ).mean() ''' axes[row, 1].bar( @@ -462,7 +476,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, color=self.config_dict['color_list'][experiment_idx], alpha=alpha_background ) - ''' + axes[row, 1].bar( rmse_timestamps, @@ -472,7 +486,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, color=self.config_dict['color_list'][experiment_idx], alpha=0.5*alpha_background ) - + ''' axes[row, 1].plot( rmse_timestamps, rmse_values_plot, @@ -496,42 +510,66 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, ls=self.config_dict['ls_list'][experiment_idx], label=self.friendly_names_dict[experiment], ) - axes[row,1].legend(loc='upper left') + axes[row,1].legend(loc='lower right') - elif stat_label == 'nobs_tossed_GSIstage_1' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): + elif stat_label == f'nobs_used_GSIstage_{self.gsi_it}' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): """ nobs tossed and rejection ratio plot """ - nobs_tossed_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] - nobs_tossed_values = np.array(value_dict[sat_sensor])[:,channel_idx] + nobs_used_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] + nobs_used_values = np.array(value_dict[sat_sensor])[:,channel_idx] + ''' nobs_used_timestamps = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_1' - ].timestamp_dict['nobs_used_GSIstage_1' + f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' + ].timestamp_dict[f'nobs_used_GSIstage_{self.gsi_it}' ][sat_sensor] nobs_used_values = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_1' - ].value_dict[f'nobs_used_GSIstage_1' + f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' + ].value_dict[f'nobs_used_GSIstage_{self.gsi_it}' ][sat_sensor] - - nobs_used_arr=list() - nobs_tossed_arr = list() - for time_idx, nobs_tossed_timestamp in enumerate(nobs_tossed_timestamps): - if nobs_tossed_timestamp in nobs_used_timestamps: - nobs_used_time_idx = nobs_used_timestamps.index(nobs_tossed_timestamp) - nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] - if nobs_used_channel: - nobs_used_arr.append(nobs_used_channel) - + ''' + use_timestamps = timeseries_dict[ + f'{sensor}_use_GSIstage_None'].timestamp_dict[ + 'use_GSIstage_None'][sat_sensor] + use_values = timeseries_dict[ + f'{sensor}_use_GSIstage_None'].value_dict[ + 'use_GSIstage_None'][sat_sensor] + + #nobs_used_arr=list() + #nobs_tossed_arr = list() + use_flags=list() + + for time_idx, nobs_use_timestamp in enumerate(nobs_used_timestamps): + if nobs_use_timestamp in use_timestamps: + use_time_idx = use_timestamps.index(nobs_use_timestamp) + use_flag = np.array(use_values)[use_time_idx, channel_idx] + + if use_flag is not None: + use_flags.append(use_flag) else: - nobs_used_arr.append(np.nan) + use_flags.append(np.nan) else: - nobs_used_arr.append(np.nan) - - nobs_tossed_plot = np.array([np.nan if x is None else float(x) for x in nobs_tossed_values]) - nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) + use_flags.append(np.nan) + + + #nobs_tossed_plot = np.array([np.nan if x is None else float(x) for x in nobs_tossed_values]) + nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_values]) + ''' rejection_percent = (100.*nobs_tossed_plot) / ( nobs_used_plot + nobs_tossed_plot) - + ''' + + nobs_used_smooth = pd.Series( + np.ma.masked_where( + np.array(use_flags) < 1, + nobs_used_plot), + index=nobs_used_timestamps).rolling( + window=window_size, + min_periods=1, + center=True, + #win_type='triang' + ).mean() + ''' axes[row, 2].bar( nobs_tossed_timestamps, nobs_tossed_plot, @@ -551,9 +589,29 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, ls=self.config_dict['ls_list'][experiment_idx], label=f"{self.friendly_names_dict[experiment]}" ) - - #axes[row,2].legend(loc='upper left') - rejection_ratio_ax.legend(loc='upper right') + ''' + axes[row, 2].plot( + nobs_used_timestamps, + nobs_used_plot, + marker='none', + color=self.config_dict['color_list'][experiment_idx], + alpha=alpha_background, + lw=0.5, + ls='-', + ) + + axes[row, 2].plot( + nobs_used_timestamps, + nobs_used_smooth, + marker='none', + color=self.config_dict['color_list'][experiment_idx], + alpha=alpha_foreground, + lw=self.config_dict['lw_list'][experiment_idx], + ls=self.config_dict['ls_list'][experiment_idx], + label=f"{self.friendly_names_dict[experiment]}" + ) + axes[row,2].legend(loc='lower right') + #rejection_ratio_ax.legend(loc='upper right') axes[row, 0].xaxis.set_major_formatter( mdates.ConciseDateFormatter( @@ -591,19 +649,26 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, np.arange(0, 3. * max_yerr + 1, 0.5) ) - axes[row, 0].set_ylim(-1.5*max_yerr, 1.5*max_yerr) - axes[row, 1].set_ylim(0, 3.*max_yerr) + axes[row, 0].set_ylim(-1.0*max_yerr, 1.0*max_yerr) + axes[row, 1].set_ylim(0, 2.*max_yerr) if interactive: plt.show() else: - plt.savefig(os.path.join(output_dir, - f'gsi_radiance_omb_{sensor}_ch{channel_num}.png'), - dpi=300) + if self.gsi_it ==1: + fig_title=f'gsi_radiance_omb_{sensor}_ch{channel_num}.png' + elif self.gsi_it >=2: + fig_title=f'gsi_radiance_oma_{sensor}_ch{channel_num}.png' + plt.savefig(os.path.join(output_dir, fig_title), dpi=300) plt.close() def run_microwave_sounders(sensor_list=['amsua', 'amsub', 'atms', 'ssmi', 'ssmis']): prun(sensor_list=sensor_list) + +def run_microwave_sounders2(sensor_list=['amsua', 'amsub', 'atms', 'ssmi', + 'ssmis','hirs2', 'hirs3', 'hirs4', + 'ssu', 'msu']): + prun(sensor_list=sensor_list) def run_atms(sensor_list=['atms']): prun(sensor_list=sensor_list) @@ -619,6 +684,7 @@ def run_avhrr(sensor_list = ['avhrr2', 'avhrr3']): def prun(sensor_list=None): args = parse_arguments() + gsi_it = args.gsi_stage # Initialize MPI comm = MPI.COMM_WORLD @@ -649,7 +715,8 @@ def prun(sensor_list=None): start_date=global_config_dict['start_date'], stop_date=global_config_dict['stop_date'], select_sat_name = select_sat_name, - sat_name=args.satellite) + sat_name=args.satellite, + gsi_it=gsi_it) # Split the data by sensor (one part per sensor) data_frame_parts_dict = dict() @@ -694,82 +761,20 @@ def prun(sensor_list=None): ''' experiment_metrics_timeseries_data = GSIRadianceFit2ObsFig( data_frame=data_frame, - input_data_frame=True + input_data_frame=True, + gsi_it=gsi_it ) if args.channel != 9999: experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive) -def prun0(sensor_list=None): - args = parser.parse_args() - # Initialize MPI communicator - comm = MPI.COMM_WORLD - rank = comm.Get_rank() # Get the rank of the current process - size = comm.Get_size() # Get the total number of processes - - # Load global configurations and friendly names - global_config_dict, global_friendly_names_dict = config() - - if args.sensor != 'all': - sensor_list = [args.sensor] - - if sensor_list==None: - sensor_list = list() - for sensor in global_config_dict['sensor_list']: - sensor_list.append(sensor) - - if args.satellite != 'all': - select_sat_name = True - - if rank == 0: - global_data_frame = get_data_frame(global_config_dict['experiment_list'], - sensor_list, - start_date=global_config_dict['start_date'], - stop_date=global_config_dict['stop_date'], - select_sat_name = select_sat_name, - sat_name=args.satellite) - else: - global_data_frame = None - - global_data_frame = comm.bcast(global_data_frame, root=0) - - # Calculate how many sensors each process should handle - sensors_per_process = len(sensor_list) // size - - # Handle leftover sensors (remaining sensors are distributed to the first few processes) - leftover_sensors = len(sensor_list) % size - - # Calculate the start and end indices for each process - start_idx = rank * sensors_per_process + min(rank, leftover_sensors) # Adjust start index for extra sensors - end_idx = start_idx + sensors_per_process + (1 if rank < leftover_sensors else 0) # Adjust end index for extra sensors - - # Slice the sensor list for this process - local_sensor_list = sensor_list[start_idx:end_idx] - - # Each process handles its portion of the sensor list - for sensor in local_sensor_list: - experiment_metrics_timeseries_data = GSIRadianceFit2ObsFig( - data_frame=global_data_frame[ - global_data_frame['metric_instrument_name']==sensor], - input_data_frame=True - ) - if args.channel != 9999: - experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} - - # Set the current sensor for the experiment - experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] - #print(f'{sensor}', rank) - - # Build time series data for the current sensor - experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive) - def main(): """ """ #run_avhrr() #run_tovs() - run_microwave_sounders() + run_microwave_sounders2() #run_atms() #prun() From 54ab703a83abdba401332dac231a2e4dfd1b62f6 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Thu, 13 Mar 2025 21:46:47 -0600 Subject: [PATCH 25/44] mean bias standard error plot update --- .../core_scripts/plot_gsi_radiance_fit_to_obs.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index b92ba1f..ea4e828 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -41,7 +41,8 @@ def config(): #'ls_list': [':', '-.', '--', '-'], 'ls_list': ['-', '-', '-', '-'], 'lw_list': [4.0, 3.0, 2.0, 1.0], - 'sensor_list': get_instrument_channels().keys(),#['amsua'], + 'sensor_list': get_instrument_channels().keys(), + #'sensor_list':['amsua'], 'start_date': '2018-01-01 00:00:00', 'stop_date': '2022-01-01 00:00:00', } @@ -344,8 +345,9 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, #win_type='triang' ).mean() - standard_errs_times_2 = 2.*standard_errs + #standard_errs_times_2 = 2.*standard_errs yerr_bot = mean_values_plot - standard_errs + yerr_top = mean_values_plot + standard_errs ''' axes[row, 0].bar( @@ -357,9 +359,10 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, alpha=0.5*alpha_background ) ''' - axes[row, 0].bar( + axes[row, 0].fill_between( bias_timestamps, - standard_errs_times_2, + yerr_top, + y2=yerr_bot, width=vbar_width, bottom=yerr_bot, color=self.config_dict['color_list'][experiment_idx], From 0cda8f3df46916e304ef5a541769d745a279baf7 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 21 Mar 2025 16:46:35 -0600 Subject: [PATCH 26/44] GSI analysis fit to obs figure enhancements with dark theme support --- .../plot_gsi_radiance_fit_to_obs.py | 750 ++++++++++-------- 1 file changed, 401 insertions(+), 349 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index ea4e828..041053c 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -7,6 +7,7 @@ import pathlib import warnings import argparse +from datetime import datetime import numpy as np from matplotlib import pyplot as plt @@ -19,32 +20,58 @@ import satellite_names HOURS_PER_DAY = 24. # hours -DA_CYCLE = 6. # hours -DAYS_TO_SMOOTH = 8. # days import argparse def config(): - args = parse_arguments() + """Add experiment name entries to experiment_list and + experiment_plot_dict. The order of experiment_list is used + to determine the order of plotting. + """ + args = parse_arguments() + if args.dark_theme: + mpl_style_sheet = 'darrmonitor.mplstyle' + else: + mpl_style_sheet = 'full_3x3pg.mplstyle' + config_dict = { 'config_path': os.path.join(pathlib.Path(__file__).parent.parent.resolve(), 'style_lib'), - 'config_file': ['full_3x3pg.mplstyle'], + 'config_file': [mpl_style_sheet], 'output_path': args.figure_output_path, 'experiment_list': ['NASA_GEOSIT_GSISTATS', 'GDAS', 'replay_observer_diagnostic_v1', 'scout_run_v1' ], - 'color_list': ['#E4002B', '#CFB87C','#0085CA', 'black'], - #'ls_list': [':', '-.', '--', '-'], - 'ls_list': ['-', '-', '-', '-'], - 'lw_list': [4.0, 3.0, 2.0, 1.0], + + 'experiment_plot_dict': { + 'NASA_GEOSIT_GSISTATS' : + {'color' : '#E4002B', + 'ls': '-', + 'lw': 1.5 + }, + 'GDAS' : { + 'color' : '#003087', + 'ls': '-', + 'lw': 1.25 + }, + 'replay_observer_diagnostic_v1' : { + 'color' : '#0085CA', + 'ls': '-', + 'lw': 1. + }, + 'scout_run_v1' : { + 'color' : 'black', + 'ls': '-', + 'lw': 0.75 + } + + }, 'sensor_list': get_instrument_channels().keys(), - #'sensor_list':['amsua'], - 'start_date': '2018-01-01 00:00:00', - 'stop_date': '2022-01-01 00:00:00', + 'start_date': '2018-10-01 00:00:00', + 'stop_date': '2019-09-30 00:00:00', } ''' @@ -54,7 +81,7 @@ def config(): friendly_names_dict={"scout_run_v1": "scout run (3DVar)", "NASA_GEOSIT_GSISTATS": "GEOS-IT", "GDAS": "GDAS", - "replay_observer_diagnostic_v1": "UFS Replay", + "replay_observer_diagnostic_v1": "UFS-replay", "std_GSIstage_1": "STD", "variance_GSIstage_1": "obs error variance", "bias_post_corr_GSIstage_1": "ME", @@ -90,6 +117,20 @@ def parse_arguments(): parser.add_argument('--gsi_stage', type=int, default=1, help='GSI analysis iteration') + + # Add DA cycle as an argument (optional, default to 6.0) + parser.add_argument('--da_cycle', type=float, default=6., + help='The DA cycle duration in hours (default: 6.0)') + + # Add days to smooth as an argument (optional, default to 8.0) + parser.add_argument('--days_to_smooth', type=float, default=8., + help='Number of days to smooth (default: 8.0)') + + parser.add_argument( + '--dark_theme', + action='store_true', # If this argument is provided, dark_theme will be True + help="Enable dark theme (default is False)" + ) args = parser.parse_args() @@ -132,12 +173,6 @@ def __init__(self, data_frame=None, input_data_frame=False, self.config_dict, self.friendly_names_dict = config() self.channel_dict = get_instrument_channels() self.experiment_list = self.config_dict['experiment_list'] - - if self.config_dict['config_path'] and self.config_dict['config_file']: - for style_file in self.config_dict['config_file']: - style_file_path = os.path.join(self.config_dict['config_path'], - style_file) - plt.style.use(style_file_path) if input_data_frame: self.data_frame = data_frame @@ -147,7 +182,55 @@ def __init__(self, data_frame=None, input_data_frame=False, self.config_dict['stop_date'], gsi_it=self.gsi_it) - def build_timeseries(self, interactive_figure=False): + def config_figure_params(self, days_to_smooth=1.): + if self.config_dict['config_path'] and self.config_dict['config_file']: + for style_file in self.config_dict['config_file']: + style_file_path = os.path.join(self.config_dict['config_path'], + style_file) + plt.style.use(style_file_path) + + if self.dark_theme: + self.default_plot_color = '#CFB87C' + self.fill_color = '#565A5C' + if 'GDAS' in self.config_dict['experiment_plot_dict'].keys(): + self.config_dict['experiment_plot_dict']['GDAS']['color'] = 'white' + + for expt_name in self.config_dict['experiment_plot_dict'].keys(): + if self.config_dict['experiment_plot_dict'][expt_name]['color'] == 'black': + self.config_dict['experiment_plot_dict'][expt_name]['color'] = self.default_plot_color + else: + self.default_plot_color = 'black' + self.fill_color = '#A2A4A3' + for expt_name in self.config_dict['experiment_plot_dict'].keys(): + if self.config_dict['experiment_plot_dict'][expt_name]['color'] == '#CFB87C': + self.config_dict['experiment_plot_dict'][expt_name]['color'] = self.default_plot_color + + self.window_size = pd.Timedelta(hours=HOURS_PER_DAY * days_to_smooth) + self.min_periods = int( + np.around(days_to_smooth * (HOURS_PER_DAY / self.da_cycle)) + ) + + self.time_domain = pd.Series( + data = np.nan, + index = pd.date_range( + start = datetime.strptime( + self.config_dict['start_date'], '%Y-%m-%d %H:%M:%S' + ), + end = datetime.strptime( + self.config_dict['stop_date'], '%Y-%m-%d %H:%M:%S' + ), + freq = pd.Timedelta(hours = self.da_cycle) + ) + ) + + def build_timeseries(self, interactive_figure=False, + da_cycle = 6., # hours + days_to_smooth = 1., # days + dark_theme=False): + self.dark_theme = dark_theme + self.da_cycle = da_cycle + self.config_figure_params(days_to_smooth=days_to_smooth) + for sensor, channel_list in self.channel_dict.items(): if sensor in self.config_dict['sensor_list']: experiment_timeseries_datetime_init=None @@ -187,14 +270,16 @@ def build_timeseries(self, interactive_figure=False): sensor, init_datetime=experiment_timeseries_datetime_init, interactive=interactive_figure) - + def make_figures(self, sensor, ncols=3, init_datetime=None, alpha_foreground=0.9, - alpha_background=0.3, + alpha_background=0.5, interactive=False): output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") - window_size = pd.Timedelta(hours=24.*DAYS_TO_SMOOTH) - vbar_width = pd.Timedelta(hours=DA_CYCLE) + locator = mdates.AutoDateLocator(minticks=5, maxticks=10) + formatter = mdates.ConciseDateFormatter(locator) + month_locator = mdates.MonthLocator(interval=1) + # Check if the directory exists, and create it if it doesn't if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -221,14 +306,15 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, if init_datetime: init_ctime = init_datetime.ctime() - title_str1 = f" {init_ctime} UTC]" + title_str1 = f" {init_ctime}]" else: title_str1 = "]" fig.suptitle(f"{title_str0}{title_str1}") - #axes[-1, 0].set_xlabel = 'cycle date (Gregorian)' - #axes[-1, 1].set_xlabel = 'cycle date (Gregorian)' + #axes[-1, 0].set_xlabel = 'Cycle date (Gregorian)' + #axes[-1, 1].set_xlabel = 'Cycle date (Gregorian)' + #axes[-1, 2].set_xlabel = 'Cycle date (Gregorian)' for row, sat_sensor in enumerate(sorted(sat_set)): sat_short_name = sat_sensor.split('_')[:-1][0] @@ -240,13 +326,15 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, axes[row, 2].set_title(f"{sat_label} {sensor} channel {channel_num}") # vertical axes labels - axes[row, 0].set_ylabel('Temperature mean error (K)') - axes[row, 1].set_ylabel('Temperature RMS error (K)') + axes[row, 0].set_ylabel('Brightness temperature mean error (K)') + axes[row, 1].set_ylabel('Brightness temperature RMS error (K)') axes[row, 2].set_ylabel('Number of observations used') #rejection_ratio_ax = axes[row, 2].twinx() #rejection_ratio_ax.set_ylabel('Percentage of observations tossed (%)') - - axes[row, 0].axhline(color='black', lw=0.5) + if self.dark_theme: + axes[row, 0].axhline(color='#A2A4A3', lw=0.5) + else: + axes[row, 0].axhline(color='black', lw=0.5) #rejection_ratio_ax.set_ylim(0, 100) #rejection_ratio_ax.set_yticks(np.arange(0, 100.1, 20)) @@ -255,406 +343,367 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, # Set ticks on both left and right vertical axes axes[row, 0].tick_params(axis='y', which='both', left=True, right=True) axes[row, 1].tick_params(axis='y', which='both', left=True, right=True) + axes[row, 2].tick_params(axis='y', which='both', left=True, right=True) - axes[row, 0].tick_params(axis='x', which='both', top=True, bottom=True) - axes[row, 1].tick_params(axis='x', which='both', top=True, bottom=True) + axes[row, 0].tick_params(axis='x', which='both', top=True, bottom=True, + labelbottom=True) + axes[row, 1].tick_params(axis='x', which='both', top=True, bottom=True, + labelbottom=True) + axes[row, 2].tick_params(axis='x', which='both', top=True, bottom=True, + labelbottom=True) - experiment_idx = 0 + #experiment_idx = 0 for experiment, timeseries_dict in self.experiment_timeseries_dict.items(): for full_stat_name, timeseries_data in timeseries_dict.items(): for stat_label, value_dict in timeseries_data.value_dict.items(): if stat_label == f'bias_post_corr_GSIstage_{self.gsi_it}' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): """ mean error plot """ - bias_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] - bias_values = np.array(value_dict[sat_sensor])[:,channel_idx] - std_timestamps = timeseries_dict[ - f'{sensor}_std_GSIstage_{self.gsi_it}'].timestamp_dict[ - f'std_GSIstage_{self.gsi_it}'][sat_sensor] - std_values = timeseries_dict[ - f'{sensor}_std_GSIstage_{self.gsi_it}'].value_dict[ - f'std_GSIstage_{self.gsi_it}'][sat_sensor] - - nobs_used_timestamps = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' - ].timestamp_dict[f'nobs_used_GSIstage_{self.gsi_it}' - ][sat_sensor] - nobs_used_values = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' - ].value_dict[f'nobs_used_GSIstage_{self.gsi_it}' - ][sat_sensor] + bias_timeseries = pd.Series( + data=np.array( + value_dict[sat_sensor] + )[:, channel_idx], + index=timeseries_data.timestamp_dict[stat_label][sat_sensor] + ).astype(float) - use_timestamps = timeseries_dict[ - f'{sensor}_use_GSIstage_None'].timestamp_dict[ - 'use_GSIstage_None'][sat_sensor] - use_values = timeseries_dict[ - f'{sensor}_use_GSIstage_None'].value_dict[ - 'use_GSIstage_None'][sat_sensor] - - yerrs=list() - nobs_used_arr=list() - use_flags=list() - for time_idx, bias_timestamp in enumerate(bias_timestamps): - if bias_timestamp in std_timestamps: - std_time_idx = std_timestamps.index(bias_timestamp) - yerr = np.array(std_values)[std_time_idx, channel_idx] - - if yerr is not None: - yerrs.append(yerr) - else: - yerrs.append(np.nan) - else: - yerrs.append(np.nan) - - if bias_timestamp in nobs_used_timestamps: - nobs_used_time_idx = nobs_used_timestamps.index(bias_timestamp) - nobs_used_channel = np.array(nobs_used_values)[nobs_used_time_idx, channel_idx] - - if nobs_used_channel is not None: - nobs_used_arr.append(nobs_used_channel) - else: - nobs_used_arr.append(np.nan) - else: - nobs_used_arr.append(np.nan) - - if bias_timestamp in use_timestamps: - use_time_idx = use_timestamps.index(bias_timestamp) - use_flag = np.array(use_values)[use_time_idx, channel_idx] - - if use_flag is not None: - use_flags.append(use_flag) - else: - use_flags.append(np.nan) - else: - use_flags.append(np.nan) - - #use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) - mean_values_plot = np.array([np.nan if x is None else float(x) for x in bias_values]) - #yerrs_plot = np.array([np.nan if x is None else float(x) for x in yerrs]) - #nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_arr]) - standard_errs = np.array(yerrs) / np.sqrt(nobs_used_arr) - - mean_values_smooth = pd.Series( - np.ma.masked_where( - np.array(use_flags) < 1, - mean_values_plot), - index=bias_timestamps).rolling( - window=window_size, - min_periods=1, - center=True, - #win_type='triang' - ).mean() + std_timeseries = pd.Series( + data=np.array( + timeseries_dict[f'{sensor}_std_GSIstage_{self.gsi_it}'].value_dict[ + f'std_GSIstage_{self.gsi_it}'][sat_sensor] + )[:, channel_idx], + index=timeseries_dict[f'{sensor}_std_GSIstage_{self.gsi_it}'].timestamp_dict[ + f'std_GSIstage_{self.gsi_it}'][sat_sensor] + ).astype(float) - #standard_errs_times_2 = 2.*standard_errs - yerr_bot = mean_values_plot - standard_errs - yerr_top = mean_values_plot + standard_errs + nobs_used_timeseries = pd.Series( + data=np.array( + timeseries_dict[f'{sensor}_nobs_used_GSIstage_{self.gsi_it}'].value_dict[ + f'nobs_used_GSIstage_{self.gsi_it}'][sat_sensor] + )[:, channel_idx], + index=timeseries_dict[f'{sensor}_nobs_used_GSIstage_{self.gsi_it}'].timestamp_dict[ + f'nobs_used_GSIstage_{self.gsi_it}'][sat_sensor] + ).astype(float) - ''' - axes[row, 0].bar( - bias_timestamps, - use_flags_plot_mask, - width=vbar_width, - bottom=-15, - color=self.config_dict['color_list'][experiment_idx], - alpha=0.5*alpha_background + use_flag_timeseries = pd.Series( + data=np.array( + timeseries_dict[ + f'{sensor}_use_GSIstage_None'].value_dict[ + 'use_GSIstage_None'][sat_sensor] + )[:, channel_idx], + index=timeseries_dict[f'{sensor}_use_GSIstage_None'].timestamp_dict[ + 'use_GSIstage_None'][sat_sensor] + ).astype(float) + + standard_errs = std_timeseries / np.sqrt(nobs_used_timeseries) + + bias_timeseries = bias_timeseries.combine_first( + self.time_domain ) - ''' + use_flag_timeseries = use_flag_timeseries.combine_first( + self.time_domain + ) + + mean_values_smooth = bias_timeseries.rolling( + window=self.window_size, + min_periods=self.min_periods, + center=True, + #win_type='triang' + ).mean() + + yerr_bot = ( + bias_timeseries - standard_errs + ).combine_first(self.time_domain) + yerr_top = ( + bias_timeseries + standard_errs + ).combine_first(self.time_domain) + axes[row, 0].fill_between( - bias_timestamps, - yerr_top, - y2=yerr_bot, - width=vbar_width, - bottom=yerr_bot, - color=self.config_dict['color_list'][experiment_idx], - alpha=alpha_background + yerr_bot.index, + yerr_top.values, + y2=yerr_bot.values, + lw=0, + edgecolor='none', + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], + alpha=alpha_background, + zorder=2 ) - ''' - axes[row, 0].barh(np.clip(mean_values_plot, - YMIN, - YMAX), - pd.Timedelta(hours=2), - height=0.1, - left=bias_timestamps - pd.Timedelta(hours=1), - color=self.config_dict['color_list'][experiment_idx], - alpha = 1.0) ''' axes[row, 0].plot( - bias_timestamps, - mean_values_plot, + bias_timeseries.index, + bias_timeseries.values, marker='none', - color=self.config_dict['color_list'][experiment_idx], + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], alpha=alpha_background, lw=0.5, ls='-', ) + ''' + axes[row, 0].plot( - bias_timestamps, - mean_values_smooth, + mean_values_smooth.index, + mean_values_smooth.where( + use_flag_timeseries < 1 + ).values, marker='none', - color=self.config_dict['color_list'][experiment_idx], + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], alpha=alpha_foreground, - lw=self.config_dict['lw_list'][experiment_idx], - ls=self.config_dict['ls_list'][experiment_idx], - label=self.friendly_names_dict[experiment] + lw=self.config_dict['experiment_plot_dict'] + [experiment]['lw'], + ls=':', + zorder=3, #label=self.friendly_names_dict[experiment] ) - ''' - axes[row, 0].errorbar( - bias_timestamps, - np.clip(mean_values_plot, YMIN, YMAX), - xerr=pd.Timedelta(hours=3), - fmt='none',#self.config_dict['ls_list'][experiment_idx], - #lw=self.config_dict['lw_list'][experiment_idx], - elinewidth=self.config_dict['lw_list'][experiment_idx], - color=self.config_dict['color_list'][experiment_idx], - alpha = 1.0, + + axes[row, 0].plot( + mean_values_smooth.index, + mean_values_smooth.where( + use_flag_timeseries > 0 + ).values, + marker='none', + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], + alpha=1.0, + lw=4.*self.config_dict['experiment_plot_dict'] + [experiment]['lw'], + ls=self.config_dict['experiment_plot_dict'] + [experiment]['ls'], + label=self.friendly_names_dict[experiment], + zorder=3 ) - ''' - axes[row,0].legend(loc='lower right') + + axes[row,0].legend(loc='upper right') elif stat_label == f'sqrt_bias_GSIstage_{self.gsi_it}' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): """RMS error plot """ - rmse_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] - rmse_values = np.array(value_dict[sat_sensor])[:,channel_idx] - obs_err_var_timestamps = timeseries_dict[ - f'{sensor}_variance_GSIstage_{self.gsi_it}'].timestamp_dict[ - f'variance_GSIstage_{self.gsi_it}'][sat_sensor] - obs_err_var_values = timeseries_dict[ - f'{sensor}_variance_GSIstage_{self.gsi_it}'].value_dict[ + rmse_timeseries = pd.Series( + data=np.array( + value_dict[sat_sensor] + )[:, channel_idx], + index=timeseries_data.timestamp_dict[stat_label][sat_sensor] + ).astype(float) + + obs_err_var_timeseries = pd.Series( + data=np.array( + timeseries_dict[f'{sensor}_variance_GSIstage_{self.gsi_it}'].value_dict[ + f'variance_GSIstage_{self.gsi_it}'][sat_sensor] + )[:, channel_idx], + index=timeseries_dict[f'{sensor}_variance_GSIstage_{self.gsi_it}'].timestamp_dict[ f'variance_GSIstage_{self.gsi_it}'][sat_sensor] - use_timestamps = timeseries_dict[ - f'{sensor}_use_GSIstage_None'].timestamp_dict[ - 'use_GSIstage_None'][sat_sensor] - use_values = timeseries_dict[ - f'{sensor}_use_GSIstage_None'].value_dict[ - 'use_GSIstage_None'][sat_sensor] - - yerrs2=list() - use_flags=list() - for time_idx, rmse_timestamp in enumerate(rmse_timestamps): - if rmse_timestamp in obs_err_var_timestamps: - obs_err_var_time_idx = obs_err_var_timestamps.index(rmse_timestamp) - yerr2 = np.array(obs_err_var_values)[obs_err_var_time_idx, channel_idx] - - if yerr2 is not None: - yerrs2.append(yerr2) - else: - yerrs2.append(np.nan) - else: - yerrs2.append(np.nan) - - if rmse_timestamp in use_timestamps: - use_time_idx = use_timestamps.index(rmse_timestamp) - use_flag = np.array(use_values)[use_time_idx, channel_idx] - - if use_flag is not None: - use_flags.append(use_flag) - else: - use_flags.append(np.nan) - else: - use_flags.append(np.nan) - - #yerrs_plot = np.sqrt(np.array([np.nan if x is None else float(x) for x in yerrs2])) - #use_flags_plot = np.array([np.nan if x is None else float(x) for x in use_flags]) - max_yerr = np.max(np.nan_to_num(np.sqrt(yerrs2)), initial=max_yerr) - rmse_values_plot = np.array([np.nan if x is None else float(x) for x in rmse_values]) - rmse_values_smooth = pd.Series( - np.ma.masked_where( - np.array(use_flags) < 1, - rmse_values_plot), - index=rmse_timestamps).rolling( - window=window_size, - min_periods=1, - center=True, - #win_type='triang' - ).mean() - - ''' - axes[row, 1].bar( - rmse_timestamps, - np.ma.masked_where(use_flags_plot < 1, 2.*yerrs_plot), - width=pd.Timedelta(hours=DA_CYCLE), - bottom=rmse_values_plot - yerrs_plot, - color=self.config_dict['color_list'][experiment_idx], - alpha=alpha_background - ) + ).astype(float) + use_flag_timeseries = pd.Series( + data=np.array( + timeseries_dict[ + f'{sensor}_use_GSIstage_None'].value_dict[ + 'use_GSIstage_None'][sat_sensor] + )[:, channel_idx], + index=timeseries_dict[f'{sensor}_use_GSIstage_None'].timestamp_dict[ + 'use_GSIstage_None'][sat_sensor] + ).astype(float) + + max_yerr = np.max( + np.nan_to_num( + np.sqrt(obs_err_var_timeseries.values) + ), + initial=max_yerr + ) - axes[row, 1].bar( - rmse_timestamps, - use_flags_plot_mask, - width=vbar_width, - bottom=0, - color=self.config_dict['color_list'][experiment_idx], - alpha=0.5*alpha_background + rmse_timeseries = rmse_timeseries.combine_first( + self.time_domain ) - ''' + use_flag_timeseries = use_flag_timeseries.combine_first( + self.time_domain + ) + + rmse_values_smooth = rmse_timeseries.rolling( + window=self.window_size, + min_periods=self.min_periods, + center=True, + #win_type='triang' + ).mean() + axes[row, 1].plot( - rmse_timestamps, - rmse_values_plot, + rmse_timeseries.index, + rmse_timeseries.values, marker='none', - color=self.config_dict['color_list'][experiment_idx], + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], alpha=alpha_background, lw=0.5, ls='-', - #xerr=pd.Timedelta(hours=3), - #fmt='none',#,self.config_dict['ls_list'][experiment_idx], - #lw=self.config_dict['lw_list'][experiment_idx], - #elinewidth=self.config_dict['lw_list'][experiment_idx], + zorder=2 ) + axes[row, 1].plot( - rmse_timestamps, - rmse_values_smooth, + rmse_values_smooth.index, + rmse_values_smooth.where( + use_flag_timeseries < 1 + ).values, marker='none', - color=self.config_dict['color_list'][experiment_idx], + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], alpha=alpha_foreground, - lw=self.config_dict['lw_list'][experiment_idx], - ls=self.config_dict['ls_list'][experiment_idx], + lw=self.config_dict['experiment_plot_dict'] + [experiment]['lw'], + ls=':', + zorder=3 + #label=self.friendly_names_dict[experiment], + ) + + axes[row, 1].plot( + rmse_values_smooth.index, + rmse_values_smooth.where( + use_flag_timeseries > 0 + ), + marker='none', + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], + alpha=1.0, + lw=4.*self.config_dict['experiment_plot_dict'] + [experiment]['lw'], + ls=self.config_dict['experiment_plot_dict'] + [experiment]['ls'], label=self.friendly_names_dict[experiment], + zorder=3 ) - axes[row,1].legend(loc='lower right') + + #axes[row,1].legend(loc='lower right') elif stat_label == f'nobs_used_GSIstage_{self.gsi_it}' and sat_sensor in timeseries_data.timestamp_dict[stat_label].keys(): """ nobs tossed and rejection ratio plot """ - nobs_used_timestamps = timeseries_data.timestamp_dict[stat_label][sat_sensor] - nobs_used_values = np.array(value_dict[sat_sensor])[:,channel_idx] - ''' - nobs_used_timestamps = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' - ].timestamp_dict[f'nobs_used_GSIstage_{self.gsi_it}' - ][sat_sensor] - nobs_used_values = timeseries_dict[ - f'{sensor}_nobs_used_GSIstage_{self.gsi_it}' - ].value_dict[f'nobs_used_GSIstage_{self.gsi_it}' - ][sat_sensor] - ''' - use_timestamps = timeseries_dict[ - f'{sensor}_use_GSIstage_None'].timestamp_dict[ - 'use_GSIstage_None'][sat_sensor] - use_values = timeseries_dict[ - f'{sensor}_use_GSIstage_None'].value_dict[ - 'use_GSIstage_None'][sat_sensor] - - #nobs_used_arr=list() - #nobs_tossed_arr = list() - use_flags=list() + nobs_used_timeseries = pd.Series( + data=np.array( + value_dict[sat_sensor] + )[:, channel_idx], + index=timeseries_data.timestamp_dict[stat_label][sat_sensor] + ).astype(float) + + use_flag_timeseries = pd.Series( + data=np.array( + timeseries_dict[ + f'{sensor}_use_GSIstage_None'].value_dict[ + 'use_GSIstage_None'][sat_sensor] + )[:, channel_idx], + index=timeseries_dict[f'{sensor}_use_GSIstage_None'].timestamp_dict[ + 'use_GSIstage_None'][sat_sensor] + ).astype(float) + + nobs_used_timeseries = nobs_used_timeseries.combine_first( + self.time_domain + ) - for time_idx, nobs_use_timestamp in enumerate(nobs_used_timestamps): - if nobs_use_timestamp in use_timestamps: - use_time_idx = use_timestamps.index(nobs_use_timestamp) - use_flag = np.array(use_values)[use_time_idx, channel_idx] - - if use_flag is not None: - use_flags.append(use_flag) - else: - use_flags.append(np.nan) - else: - use_flags.append(np.nan) - + use_flag_timeseries = use_flag_timeseries.combine_first( + self.time_domain + ) - #nobs_tossed_plot = np.array([np.nan if x is None else float(x) for x in nobs_tossed_values]) + nobs_used_smooth = nobs_used_timeseries.rolling( + window=self.window_size, + min_periods=self.min_periods, + center=True, + #win_type='triang' + ).mean() - nobs_used_plot = np.array([np.nan if x is None else float(x) for x in nobs_used_values]) - ''' - rejection_percent = (100.*nobs_tossed_plot) / ( - nobs_used_plot + nobs_tossed_plot) - ''' - - nobs_used_smooth = pd.Series( - np.ma.masked_where( - np.array(use_flags) < 1, - nobs_used_plot), - index=nobs_used_timestamps).rolling( - window=window_size, - min_periods=1, - center=True, - #win_type='triang' - ).mean() - ''' - axes[row, 2].bar( - nobs_tossed_timestamps, - nobs_tossed_plot, - width=pd.Timedelta(hours=DA_CYCLE), - color=self.config_dict['color_list'][experiment_idx], - alpha=alpha_background, - label=f"n tossed ({self.friendly_names_dict[experiment]})" - ) - - rejection_ratio_ax.plot( - nobs_tossed_timestamps, - rejection_percent, - marker='none', - color=self.config_dict['color_list'][experiment_idx], - alpha=alpha_foreground, - lw=self.config_dict['lw_list'][experiment_idx], - ls=self.config_dict['ls_list'][experiment_idx], - label=f"{self.friendly_names_dict[experiment]}" - ) - ''' axes[row, 2].plot( - nobs_used_timestamps, - nobs_used_plot, + nobs_used_timeseries.index, + nobs_used_timeseries.values, marker='none', - color=self.config_dict['color_list'][experiment_idx], + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], alpha=alpha_background, lw=0.5, ls='-', + zorder=2 ) axes[row, 2].plot( - nobs_used_timestamps, - nobs_used_smooth, + nobs_used_smooth.index, + nobs_used_smooth.where( + use_flag_timeseries < 1 + ).values, marker='none', - color=self.config_dict['color_list'][experiment_idx], + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], alpha=alpha_foreground, - lw=self.config_dict['lw_list'][experiment_idx], - ls=self.config_dict['ls_list'][experiment_idx], - label=f"{self.friendly_names_dict[experiment]}" + lw=self.config_dict['experiment_plot_dict'] + [experiment]['lw'], + ls=':', + zorder=3 + #label=f"{self.friendly_names_dict[experiment]}" + ) + + axes[row, 2].plot( + nobs_used_smooth.index, + nobs_used_smooth.where( + use_flag_timeseries > 0 + ).values, + marker='none', + color=self.config_dict['experiment_plot_dict'] + [experiment]['color'], + alpha=1.0, + lw=4.*self.config_dict['experiment_plot_dict'] + [experiment]['lw'], + ls=self.config_dict['experiment_plot_dict'] + [experiment]['ls'], + label=f"{self.friendly_names_dict[experiment]}", + zorder=3 ) - axes[row,2].legend(loc='lower right') + + #axes[row,2].legend(loc='lower right') #rejection_ratio_ax.legend(loc='upper right') - axes[row, 0].xaxis.set_major_formatter( - mdates.ConciseDateFormatter( - axes[row, 0].xaxis.get_major_locator())) - axes[row, 1].xaxis.set_major_formatter( - mdates.ConciseDateFormatter( - axes[row, 1].xaxis.get_major_locator())) - axes[row, 2].xaxis.set_major_formatter( - mdates.ConciseDateFormatter( - axes[row, 2].xaxis.get_major_locator())) + for col_idx in range(ncols): + if self.dark_theme: + axes[row, col_idx].grid(True) + axes[row, col_idx].xaxis.set_major_locator(locator) + axes[row, col_idx].xaxis.set_major_formatter(formatter) + axes[row, col_idx].xaxis.set_minor_locator(month_locator) - experiment_idx += 1 + #experiment_idx += 1 for row, sat_sensor in enumerate(sorted(sat_set)): # set ylim, ticks - axes[row, 0].set_yticks( - np.arange(np.around(-1.5 * max_yerr - 0.2, decimals=1), - 1.5 * max_yerr + 0.2, - 0.1), - minor=True + if max_yerr < 1: + axes[row, 0].set_yticks( + np.arange(np.around(-0.5 * max_yerr - 0.2, decimals=1), + 0.5 * max_yerr + 0.2, + 0.1), + ) + else: + axes[row, 0].set_yticks( + np.arange(np.around(-0.5 * max_yerr - 1), + 0.5 * max_yerr + 1, + 0.5), + ) + axes[row, 0].set_yticks( + np.arange(np.around(-0.5 * max_yerr - 0.2, decimals=1), + 0.5 * max_yerr + 0.2, + 0.1), + minor=True ) axes[row, 1].set_yticks( np.arange(0, 3. * max_yerr + 0.1, 0.1), minor=True ) - - axes[row, 0].set_yticks( - np.arange(np.around(-1.5 * max_yerr - 1), - 1.5*max_yerr + 1, - 0.5) - ) axes[row, 1].set_yticks( np.arange(0, 3. * max_yerr + 1, 0.5) ) - axes[row, 0].set_ylim(-1.0*max_yerr, 1.0*max_yerr) - axes[row, 1].set_ylim(0, 2.*max_yerr) + axes[row, 0].set_ylim(-0.5*max_yerr, 0.5*max_yerr) + axes[row, 1].set_ylim(0, 3.*max_yerr) + + nobs_ylims = axes[row, 2].get_ylim() + if nobs_ylims[0] < 0: + axes[row, 2].set_ylim(bottom=0) + plt.tight_layout() + plt.subplots_adjust(top=0.96) if interactive: plt.show() else: @@ -770,7 +819,10 @@ def prun(sensor_list=None): if args.channel != 9999: experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] - experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive) + experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive, + days_to_smooth=args.days_to_smooth, + da_cycle=args.da_cycle, + dark_theme=args.dark_theme) def main(): """ From bbeec9d5ddebca0c15dbde3e76e56fde0e61507d Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Mon, 24 Mar 2025 11:09:24 -0600 Subject: [PATCH 27/44] plt.close() calls for file count and replay increment plot scripts --- src/score_plotting/core_scripts/plot_file_counts.py | 1 + src/score_plotting/core_scripts/plot_increments.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/score_plotting/core_scripts/plot_file_counts.py b/src/score_plotting/core_scripts/plot_file_counts.py index 351033c..febead8 100755 --- a/src/score_plotting/core_scripts/plot_file_counts.py +++ b/src/score_plotting/core_scripts/plot_file_counts.py @@ -393,6 +393,7 @@ def plot_file_counts(experiments, metric, metrics_df, work_dir, fig_base_fn, experiment_name=expt_name) save_figure(fig_fn) + plt.close() @dataclass class PlotFileCountRequest(PlotInnovStatsRequest): diff --git a/src/score_plotting/core_scripts/plot_increments.py b/src/score_plotting/core_scripts/plot_increments.py index 254f135..b0a8b7e 100755 --- a/src/score_plotting/core_scripts/plot_increments.py +++ b/src/score_plotting/core_scripts/plot_increments.py @@ -448,6 +448,7 @@ def plot_increments(experiments, stat, metric, metrics_df, work_dir, fig_base_fn experiment_name=expt_name) save_figure(fig_fn) + plt.close() @dataclass class PlotIncrementRequest(PlotInnovStatsRequest): From 645ce213939015c1765290233916f4ed15990043 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Mon, 24 Mar 2025 11:47:40 -0600 Subject: [PATCH 28/44] rename dark_theme.mplstyle --- src/score_plotting/core_scripts/plot_file_counts.py | 2 +- src/score_plotting/core_scripts/plot_increments.py | 2 +- .../style_lib/{darrmonitor.mplstyle => dark_theme.mplstyle} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/score_plotting/style_lib/{darrmonitor.mplstyle => dark_theme.mplstyle} (100%) diff --git a/src/score_plotting/core_scripts/plot_file_counts.py b/src/score_plotting/core_scripts/plot_file_counts.py index febead8..2d56acb 100755 --- a/src/score_plotting/core_scripts/plot_file_counts.py +++ b/src/score_plotting/core_scripts/plot_file_counts.py @@ -432,7 +432,7 @@ def submit(self): if __name__=='__main__': args = parse_arguments() if args.dark_theme: - style_file = 'darrmonitor.mplstyle' + style_file = 'dark_theme.mplstyle' else: style_file = 'half_horizontal.mplstyle' diff --git a/src/score_plotting/core_scripts/plot_increments.py b/src/score_plotting/core_scripts/plot_increments.py index b0a8b7e..cbeb853 100755 --- a/src/score_plotting/core_scripts/plot_increments.py +++ b/src/score_plotting/core_scripts/plot_increments.py @@ -494,7 +494,7 @@ def submit(self): if __name__=='__main__': args = parse_arguments() if args.dark_theme: - style_file = 'darrmonitor.mplstyle' + style_file = 'dark_theme.mplstyle' else: style_file = 'half_horizontal.mplstyle' diff --git a/src/score_plotting/style_lib/darrmonitor.mplstyle b/src/score_plotting/style_lib/dark_theme.mplstyle similarity index 100% rename from src/score_plotting/style_lib/darrmonitor.mplstyle rename to src/score_plotting/style_lib/dark_theme.mplstyle From cece05fe8254c249ad1e3eeff85b0b724a90b966 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Thu, 27 Mar 2025 13:07:21 -0600 Subject: [PATCH 29/44] update dark theme mpl_style_sheet name --- src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 041053c..4093e88 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -30,7 +30,7 @@ def config(): """ args = parse_arguments() if args.dark_theme: - mpl_style_sheet = 'darrmonitor.mplstyle' + mpl_style_sheet = 'dark_theme.mplstyle' else: mpl_style_sheet = 'full_3x3pg.mplstyle' From 9908a87ab6e03497a12c1cadb140546474f3f3dc Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 4 Apr 2025 04:49:36 +0000 Subject: [PATCH 30/44] minor changes for plotting all sensors --- .../core_scripts/plot_file_counts.py | 4 ++-- .../plot_gsi_radiance_fit_to_obs.py | 17 ++++++++++------- .../core_scripts/plot_increments.py | 2 +- .../style_lib/dark_theme.mplstyle | 8 ++++---- .../style_lib/full_3x3pg.mplstyle | 5 +++-- .../style_lib/half_horizontal.mplstyle | 7 ++++--- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_file_counts.py b/src/score_plotting/core_scripts/plot_file_counts.py index 2d56acb..5789f22 100755 --- a/src/score_plotting/core_scripts/plot_file_counts.py +++ b/src/score_plotting/core_scripts/plot_file_counts.py @@ -253,7 +253,7 @@ def plot_file_counts(experiments, metric, metrics_df, work_dir, fig_base_fn, args = parse_arguments() if args.dark_theme: - default_plot_color = '#CFB87C' + default_plot_color = 'white'#'#CFB87C' fill_color = '#565A5C' else: default_plot_color = 'black' @@ -448,4 +448,4 @@ def submit(self): plot_control_dict_ext ]): plot_request = PlotFileCountRequest(plot_control_dict) - plot_request.submit() \ No newline at end of file + plot_request.submit() diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 4093e88..266ca8d 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -70,8 +70,8 @@ def config(): }, 'sensor_list': get_instrument_channels().keys(), - 'start_date': '2018-10-01 00:00:00', - 'stop_date': '2019-09-30 00:00:00', + 'start_date': '1978-10-01 00:00:00', + 'stop_date': '2025-09-30 00:00:00', } ''' @@ -295,9 +295,11 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, for channel_idx, channel_num in enumerate(self.channel_dict[sensor]): if len(sat_set) > 0: max_yerr=0.5 # temperature (K) + figsize_width = 2 * 3.74 * ncols + figsize_length = 4.53 * len(sat_set) fig, axes = plt.subplots(len(sat_set), ncols, sharex=True,sharey=False, squeeze=False, - figsize=(2*ncols*3.74, len(sat_set)*4.53)) + figsize=(figsize_width, figsize_length)) if self.gsi_it == 1: title_str0 = f"GSI radiance data analysis fit to observations (O-B) [metrics downloaded from {self.db_name}" @@ -311,7 +313,7 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, else: title_str1 = "]" - fig.suptitle(f"{title_str0}{title_str1}") + #fig.suptitle(f"{title_str0}{title_str1}") #axes[-1, 0].set_xlabel = 'Cycle date (Gregorian)' #axes[-1, 1].set_xlabel = 'Cycle date (Gregorian)' #axes[-1, 2].set_xlabel = 'Cycle date (Gregorian)' @@ -703,7 +705,8 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, axes[row, 2].set_ylim(bottom=0) plt.tight_layout() - plt.subplots_adjust(top=0.96) + plt.subplots_adjust(top = 1. - 1.2 / figsize_length) + fig.suptitle(f"{title_str0}{title_str1}") if interactive: plt.show() else: @@ -829,9 +832,9 @@ def main(): """ #run_avhrr() #run_tovs() - run_microwave_sounders2() + #run_microwave_sounders2() #run_atms() - #prun() + prun() if __name__ == "__main__": main() diff --git a/src/score_plotting/core_scripts/plot_increments.py b/src/score_plotting/core_scripts/plot_increments.py index cbeb853..3719823 100755 --- a/src/score_plotting/core_scripts/plot_increments.py +++ b/src/score_plotting/core_scripts/plot_increments.py @@ -309,7 +309,7 @@ def plot_increments(experiments, stat, metric, metrics_df, work_dir, fig_base_fn args = parse_arguments() if args.dark_theme: - default_plot_color = '#CFB87C' + default_plot_color = 'white'#'#CFB87C' fill_color = '#565A5C' else: default_plot_color = 'black' diff --git a/src/score_plotting/style_lib/dark_theme.mplstyle b/src/score_plotting/style_lib/dark_theme.mplstyle index cfdac02..abc53d0 100644 --- a/src/score_plotting/style_lib/dark_theme.mplstyle +++ b/src/score_plotting/style_lib/dark_theme.mplstyle @@ -12,9 +12,9 @@ patch.edgecolor: white # if forced, or patch is not filled hatch.color: white ### FONT -font.family : serif -font.sans-serif : 'Noto Serif CJK JP' -font.size : 11 +font.family: serif +font.serif: Noto Serif CJK JP +font.size: 11 ## *************************************************************************** ## * TEXT * @@ -79,4 +79,4 @@ savefig.edgecolor: black # figure edgecolor when saving savefig.format: png # {png, ps, pdf, svg} ## Legend -legend.fontsize : 11 \ No newline at end of file +legend.fontsize : 11 diff --git a/src/score_plotting/style_lib/full_3x3pg.mplstyle b/src/score_plotting/style_lib/full_3x3pg.mplstyle index a460648..141eb91 100644 --- a/src/score_plotting/style_lib/full_3x3pg.mplstyle +++ b/src/score_plotting/style_lib/full_3x3pg.mplstyle @@ -2,8 +2,9 @@ ### FONT font.size : 11 -font.family : sans-serif -font.sans-serif : Roboto, Arial +font.family: sans-serif +font.sans-serif: FreeSans, Roboto, Arial +font.serif: Noto Serif CJK JP ### TEXT #text.usetex : True diff --git a/src/score_plotting/style_lib/half_horizontal.mplstyle b/src/score_plotting/style_lib/half_horizontal.mplstyle index 8568231..77c9878 100644 --- a/src/score_plotting/style_lib/half_horizontal.mplstyle +++ b/src/score_plotting/style_lib/half_horizontal.mplstyle @@ -1,9 +1,10 @@ #### matplotlib style file for AGU figures ### FONT -font.size : 11 -font.family : sans-serif -font.sans-serif : Roboto, Arial, Helvetica +font.size: 11 +font.family: sans-serif +font.sans-serif: FreeSans, Roboto, Arial, Helvetica +font.serif: Noto Serif CJK JP ### TEXT #text.usetex : True From 4d12650465f546139454bc7d7b50f524aa62ff03 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Mon, 7 Apr 2025 22:15:12 +0000 Subject: [PATCH 31/44] replay file count and increment plot readability improvements --- .../attrs/file_counts_plot_attrs.py | 2 +- .../attrs/increments_plot_attrs.py | 2 +- .../core_scripts/plot_file_counts.py | 68 ++++++-- .../core_scripts/plot_increments.py | 152 ++++++++++++++---- .../style_lib/dark_theme.mplstyle | 14 +- .../style_lib/half_horizontal.mplstyle | 16 +- 6 files changed, 193 insertions(+), 61 deletions(-) diff --git a/src/score_plotting/attrs/file_counts_plot_attrs.py b/src/score_plotting/attrs/file_counts_plot_attrs.py index e60eb6a..efc4629 100755 --- a/src/score_plotting/attrs/file_counts_plot_attrs.py +++ b/src/score_plotting/attrs/file_counts_plot_attrs.py @@ -30,6 +30,6 @@ xlabel=DEFAULT_XLABEL, ylabel=AxesLabel( axis='y', - label='Number of files', + label=None, horizontalalignment='center' ))} diff --git a/src/score_plotting/attrs/increments_plot_attrs.py b/src/score_plotting/attrs/increments_plot_attrs.py index 47141ca..1027914 100755 --- a/src/score_plotting/attrs/increments_plot_attrs.py +++ b/src/score_plotting/attrs/increments_plot_attrs.py @@ -12,7 +12,7 @@ AxesLabel = namedtuple('AxesLabel', ['axis', 'label', 'horizontalalignment']) -DEFAULT_LEGEND_ATTRS = LegendData(loc='lower left', fancybox=None, +DEFAULT_LEGEND_ATTRS = LegendData(loc='upper left', fancybox=None, edgecolor=None, framealpha=None, shadow=None, fontsize='large', facecolor=None) diff --git a/src/score_plotting/core_scripts/plot_file_counts.py b/src/score_plotting/core_scripts/plot_file_counts.py index 5789f22..e5fe86e 100755 --- a/src/score_plotting/core_scripts/plot_file_counts.py +++ b/src/score_plotting/core_scripts/plot_file_counts.py @@ -26,6 +26,8 @@ from score_plotting.attrs.file_counts_plot_attrs import plot_attrs from score_plotting.core_scripts.plot_innov_stats import PlotInnovStatsRequest +UFS_REPLAY_BUCKET = 'noaa-ufs-gefsv13replay-pds' + def parse_arguments(): parser = argparse.ArgumentParser() @@ -145,13 +147,29 @@ def parse_arguments(): 'file_{metric}'}], 'work_dir': parse_arguments().figure_output_path} +plot_control_dict_forward_ext = {'date_range': {'datetime_str': '%Y-%m-%d %H:%M:%S', + 'end': '2025-09-30 00:00:00', + 'start': '2023-10-01 00:00:00'}, + 'db_request_name': 'expt_metrics', + 'method': 'GET', + 'experiments': [{'graph_color': 'black', + 'graph_label': 'Number of objects uploaded per cycle', + 'name': 'ufs_replay_ext', + 'wallclock_start': '2024-10-01 00:00:00'}], + 'fig_base_fn': 'forward_ext_files', + 'stat_groups': [{'cycles': [0, 21600, 43200, 64800], + 'metrics': ['count'], + 'stat_group_frmt_str': + 'file_{metric}'}], + 'work_dir': parse_arguments().figure_output_path} + plot_control_dict_ext = {'date_range': {'datetime_str': '%Y-%m-%d %H:%M:%S', 'end': '2025-09-30 00:00:00', - 'start': '2020-10-01 00:00:00'}, + 'start': '1978-10-01 00:00:00'}, 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'Number of objects uploaded per cycle', 'name': 'ufs_replay_ext', 'wallclock_start': '2024-10-01 00:00:00'}], 'fig_base_fn': 'files', @@ -246,17 +264,17 @@ def build_fig_dest(work_dir, fig_base_fn, metric, date_range, def save_figure(dest_full_path): print(f'saving figure to {dest_full_path}') - plt.savefig(dest_full_path, dpi=600) + plt.savefig(dest_full_path, dpi=300) def plot_file_counts(experiments, metric, metrics_df, work_dir, fig_base_fn, - date_range): + date_range, append_date_range=False): args = parse_arguments() if args.dark_theme: default_plot_color = 'white'#'#CFB87C' fill_color = '#565A5C' else: - default_plot_color = 'black' + default_plot_color = experiments[0]['graph_color'] fill_color = '#A2A4A3' da_cycle = args.da_cycle @@ -381,17 +399,30 @@ def plot_file_counts(experiments, metric, metrics_df, work_dir, fig_base_fn, today = date.today() plt.title(today, loc = "right") - locator = mdates.AutoDateLocator(minticks=5, maxticks=10) - month_locator = mdates.MonthLocator() + locator = mdates.AutoDateLocator(minticks=8, maxticks=16) + + if pd.Timedelta(date_range.end - date_range.start) > pd.Timedelta(days=10957): + # date range is greater than 30 years; set minor tick interval to 3 months + month_interval = 3 + else: + # date range is less than or equal to 30 years; set minor tick interval to 1 month + month_interval = 1 + month_locator = mdates.MonthLocator(interval=month_interval) formatter = mdates.ConciseDateFormatter(locator) ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formatter) ax.xaxis.set_minor_locator(month_locator) - plt.title(expt_name) + + if expt_name == 'ufs_replay_ext': + plot_title = f'{experiments[0]["graph_label"]} ({UFS_REPLAY_BUCKET})' + else: + plot_title = f'{experiments[0]["graph_label"]} ({expt_name})' + plt.title(plot_title, loc='left') format_figure(ax, pa) fig_fn = build_fig_dest(work_dir, fig_base_fn, metric, date_range, - experiment_name=expt_name) + experiment_name=expt_name, append_date_range=append_date_range) + plt.tight_layout() save_figure(fig_fn) plt.close() @@ -404,6 +435,7 @@ def submit(self): finished = False loop_count = 0 + for stat_group in self.stat_groups: metrics_data = [] # gather experiment metrics data for experiment and date range @@ -427,7 +459,8 @@ def submit(self): m_df, self.work_dir, self.fig_base_fn, - self.date_range) + self.date_range, + append_date_range=False) if __name__=='__main__': args = parse_arguments() @@ -439,13 +472,14 @@ def submit(self): style_file_path = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), 'style_lib', style_file) plt.style.use(style_file_path) - for i, plot_control_dict in enumerate([plot_control_dict1, - plot_control_dict2, - plot_control_dict3, - plot_control_dict4, - plot_control_dict5, - plot_control_dict6, - plot_control_dict_ext + for i, plot_control_dict in enumerate([#plot_control_dict1, + #plot_control_dict2, + #plot_control_dict3, + #plot_control_dict4, + #plot_control_dict5, + #plot_control_dict6, + plot_control_dict_ext, + plot_control_dict_forward_ext ]): plot_request = PlotFileCountRequest(plot_control_dict) plot_request.submit() diff --git a/src/score_plotting/core_scripts/plot_increments.py b/src/score_plotting/core_scripts/plot_increments.py index 3719823..24761db 100755 --- a/src/score_plotting/core_scripts/plot_increments.py +++ b/src/score_plotting/core_scripts/plot_increments.py @@ -22,6 +22,7 @@ from score_plotting.core_scripts.plot_innov_stats import PlotInnovStatsRequest HOURS_PER_DAY = 24. # hours +UFS_REPLAY_BUCKET = 'noaa-ufs-gefsv13replay-pds' def parse_arguments(): parser = argparse.ArgumentParser() @@ -60,7 +61,7 @@ def parse_arguments(): 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'increment', 'name': 'replay_stream1', 'wallclock_start': '2023-07-08 16:25:57'}], 'fig_base_fn': 'increment', @@ -82,7 +83,7 @@ def parse_arguments(): 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'increment', 'name': 'replay_stream2', 'wallclock_start': '2023-07-24 17:56:40'}], 'fig_base_fn': 'increment', @@ -104,7 +105,7 @@ def parse_arguments(): 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'increment', 'name': 'replay_stream3', 'wallclock_start': '2023-01-22 09:22:05'}], 'fig_base_fn': 'increment', @@ -125,7 +126,7 @@ def parse_arguments(): 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'increment', 'name': 'replay_stream4', 'wallclock_start': '2023-01-22 09:22:05'}], 'fig_base_fn': 'increment', @@ -146,7 +147,7 @@ def parse_arguments(): 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'increment', 'name': 'replay_stream5', 'wallclock_start': '2023-07-08 06:20:22'}], 'fig_base_fn': 'increment', @@ -167,7 +168,7 @@ def parse_arguments(): 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'increment', 'name': 'replay_stream6', 'wallclock_start': '2023-07-24 20:29:23'}], 'fig_base_fn': 'increment', @@ -183,13 +184,35 @@ def parse_arguments(): 'metric_type_{stat}_{metric}'}], 'work_dir': parse_arguments().figure_output_path} +plot_control_dict_forward_ext = {'date_range': {'datetime_str': '%Y-%m-%d %H:%M:%S', + 'start': '2023-10-01 00:00:00', + 'end': '2025-09-30 00:00:00'}, + 'db_request_name': 'expt_metrics', + 'method': 'GET', + 'experiments': [{'graph_color': 'black', + 'graph_label': 'increment', + 'name': 'ufs_replay_ext', + 'wallclock_start': '2024-10-01 00:00:00'}], + 'fig_base_fn': 'forward_ext_increment', + 'stat_groups': [{'cycles': [0, 21600, 43200, 64800], + 'stats': ['mean', 'RMS'], + 'metrics': ['pt_inc', 's_inc','u_inc_ocn', + 'v_inc_ocn', 'u_inc_atm','v_inc_atm', + 'SSH', 'Salinity', 'Temperature', + 'Speed of Currents', 'o3mr_inc', + 'sphum_inc', 'T_inc', 'delp_inc', + 'delz_inc'], + 'stat_group_frmt_str': + 'metric_type_{stat}_{metric}'}], + 'work_dir': parse_arguments().figure_output_path} + plot_control_dict_ext = {'date_range': {'datetime_str': '%Y-%m-%d %H:%M:%S', - 'start': '2020-10-01 00:00:00', + 'start': '1978-10-01 00:00:00', 'end': '2025-09-30 00:00:00'}, 'db_request_name': 'expt_metrics', 'method': 'GET', 'experiments': [{'graph_color': 'black', - 'graph_label': 'increments', + 'graph_label': 'increment', 'name': 'ufs_replay_ext', 'wallclock_start': '2024-10-01 00:00:00'}], 'fig_base_fn': 'increment', @@ -302,17 +325,17 @@ def build_fig_dest(work_dir, fig_base_fn, stat, metric, date_range, def save_figure(dest_full_path): print(f'saving figure to {dest_full_path}') - plt.savefig(dest_full_path, dpi=600) + plt.savefig(dest_full_path, dpi=300) def plot_increments(experiments, stat, metric, metrics_df, work_dir, fig_base_fn, - date_range): + date_range, append_date_range=False): args = parse_arguments() if args.dark_theme: default_plot_color = 'white'#'#CFB87C' fill_color = '#565A5C' else: - default_plot_color = 'black' + default_plot_color = experiments[0]['graph_color'] fill_color = '#A2A4A3' time_domain = pd.Series( @@ -341,10 +364,6 @@ def plot_increments(experiments, stat, metric, metrics_df, work_dir, fig_base_fn metrics_to_show = metrics_df.drop_duplicates(subset='time_valid', keep='last') expt_name = experiments[0]['name']['exact'] - expt_graph_label = experiments[0]['graph_label'] - - if "_inc" not in metric: - expt_graph_label = stat timestamps = list() labels = list() @@ -383,8 +402,7 @@ def plot_increments(experiments, stat, metric, metrics_df, work_dir, fig_base_fn values_timeseries = pd.Series( data = values, index = timestamps - ).combine_first(time_domain) - + ).combine_first(time_domain) plt.fill_between( values_timeseries.index, @@ -428,25 +446,103 @@ def plot_increments(experiments, stat, metric, metrics_df, work_dir, fig_base_fn zorder=4) format_figure(ax, pa) - if stat == 'RMS': + ''' + ymin, ymax = ax.get_ylim() + if stat == 'RMS' and ymin < 0: ax.set_ylim(0, None) + if stat == 'RMS' and np.nanmin(values) > 0: + ax.set_ylim(np.nanmin(values, None)) + if "_inc" not in metric and np.nanmin(values) > 0: + ax.set_ylim(np.nanmin(values, None)) + ''' + ax.set_ylim(np.nanmin(values), np.nanmax(values)) - plt.title(stat+" "+metric+" " +expt_name, loc = "left") + if "_inc" not in metric: + # this is not an increment + if row.metric_unit == 'C': + if metric=='pt': + plot_basic_title = 'ocean potential temperature' + expt_graph_label = 'Potential temperature ($^\circ$C)' + elif metric=='Temperature': + plot_basic_title = 'ocean temperature' + expt_graph_label = 'Temperature ($^\circ$C)' + elif row.metric_unit == 'm': + if metric =='SSH': + plot_basic_title = 'sea surface height (SSH)' + expt_graph_label = 'SSH (m)' + elif row.metric_unit == 'PSU': + plot_basic_title = 'ocean salinity' + expt_graph_label = 'Salinity (psu)' + elif row.metric_unit == 'm/s': + if metric=='Speed of Currents': + plot_basic_title = 'speed of currents' + expt_graph_label = 'Speed (m s$^{-1}$)' + else: + if row.metric_unit == 'C': + if metric=='pt_inc': + plot_basic_title = 'ocean potential temperature increment' + expt_graph_label = 'Potential temperature increment ($^\circ$C)' + elif metric=='T_inc': + plot_basic_title = 'air temperature increment' + expt_graph_label = 'Temperature increment ($^\circ$C)' + elif row.metric_unit == 'PSU': + plot_basic_title = 'ocean salinity increment' + expt_graph_label = 'Salinity increment (psu)' + elif row.metric_unit == 'm/s': + expt_graph_label = 'Speed increment (m s$^{-1}$)' + if metric=='u_inc_atm': + plot_basic_title = 'westerly wind speed increment' + elif metric=='v_inc_atm': + plot_basic_title = 'southerly wind speed increment' + elif metric=='u_inc_ocn': + plot_basic_title = 'horizontal x-direction ocean speed increment' + elif metric=='v_inc_ocn': + plot_basic_title = 'horizontal y-direction ocean speed increment' + elif row.metric_unit == 'kg/kg': + if metric=='o3mr_inc': + plot_basic_title = 'ozone mixing ratio increment' + expt_graph_label = 'Mixing ratio increment (kg kg$^{-1}$)' + elif metric=='sphum_inc': + plot_basic_title = 'specific humidity increment' + expt_graph_label = 'Specific humidity increment (kg kg$^{-1}$)' + if metric=='delp_inc': + plot_basic_title = 'pressure differential increment' + expt_graph_label = 'Pressure differential increment (Pa)' + elif row.metric_unit == 'm': + if metric=='delz_inc': + plot_basic_title = 'geometric layer height differential increment' + expt_graph_label = 'Layer height differential increment (m)' + + if expt_name == 'ufs_replay_ext': + plot_expt_title = UFS_REPLAY_BUCKET + else: + plot_expt_title = expt_name + + plt.title(f'Global {stat} {plot_basic_title}', loc='left') today = date.today() - plt.title(today, loc = "right") - - plt.ylabel(expt_graph_label+" ("+row.metric_unit+")") + fig.suptitle(plot_expt_title, x=0.02, ha='left') + plt.title(today, loc='right') + plt.ylabel(expt_graph_label) + + locator = mdates.AutoDateLocator(minticks=8, maxticks=16) - locator = mdates.AutoDateLocator(minticks=5, maxticks=10) - month_locator = mdates.MonthLocator() + if pd.Timedelta(date_range.end - date_range.start) > pd.Timedelta(days=10957): + # date range is greater than 30 years; set minor tick interval to 3 months + month_interval = 3 + else: + # date range is less than or equal to 30 years; set minor tick interval to 1 month + month_interval = 1 + + month_locator = mdates.MonthLocator(interval=month_interval) formatter = mdates.ConciseDateFormatter(locator) ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formatter) ax.xaxis.set_minor_locator(month_locator) fig_fn = build_fig_dest(work_dir, fig_base_fn, stat, metric, date_range, - experiment_name=expt_name) + experiment_name=expt_name, append_date_range=append_date_range) + plt.tight_layout() save_figure(fig_fn) plt.close() @@ -489,7 +585,8 @@ def submit(self): m_df, self.work_dir, self.fig_base_fn, - self.date_range) + self.date_range, + append_date_range=False) if __name__=='__main__': args = parse_arguments() @@ -508,6 +605,7 @@ def submit(self): #plot_control_dict4, #plot_control_dict5, #plot_control_dict6 - plot_control_dict_ext]): + plot_control_dict_ext, + plot_control_dict_forward_ext]): plot_request = PlotIncrementRequest(plot_control_dict) plot_request.submit() diff --git a/src/score_plotting/style_lib/dark_theme.mplstyle b/src/score_plotting/style_lib/dark_theme.mplstyle index abc53d0..4a89031 100644 --- a/src/score_plotting/style_lib/dark_theme.mplstyle +++ b/src/score_plotting/style_lib/dark_theme.mplstyle @@ -14,7 +14,7 @@ hatch.color: white ### FONT font.family: serif font.serif: Noto Serif CJK JP -font.size: 11 +font.size: 12 ## *************************************************************************** ## * TEXT * @@ -25,7 +25,7 @@ font.size: 11 text.color: A2A4A3 ### FIGURE -figure.titlesize : 11 +figure.titlesize : 12 ## *************************************************************************** ## * AXES * @@ -36,8 +36,8 @@ figure.titlesize : 11 axes.facecolor: black # axes background color axes.edgecolor: A2A4A3 # axes edge color axes.labelcolor: A2A4A3 -axes.titlesize : 11 -axes.labelsize : 11 +axes.titlesize : 12 +axes.labelsize : 12 ## *************************************************************************** @@ -46,8 +46,8 @@ axes.labelsize : 11 ## See https://matplotlib.org/api/axis_api.html#matplotlib.axis.Tick xtick.color: A2A4A3 # color of the tick labels ytick.color: A2A4A3 # color of the tick labels -xtick.labelsize : 11 -ytick.labelsize : 11 +xtick.labelsize : 12 +ytick.labelsize : 12 ## *************************************************************************** ## * GRIDS * @@ -79,4 +79,4 @@ savefig.edgecolor: black # figure edgecolor when saving savefig.format: png # {png, ps, pdf, svg} ## Legend -legend.fontsize : 11 +legend.fontsize : 12 diff --git a/src/score_plotting/style_lib/half_horizontal.mplstyle b/src/score_plotting/style_lib/half_horizontal.mplstyle index 77c9878..1dfb3ce 100644 --- a/src/score_plotting/style_lib/half_horizontal.mplstyle +++ b/src/score_plotting/style_lib/half_horizontal.mplstyle @@ -1,8 +1,8 @@ #### matplotlib style file for AGU figures ### FONT -font.size: 11 -font.family: sans-serif +font.size: 12 +font.family: serif font.sans-serif: FreeSans, Roboto, Arial, Helvetica font.serif: Noto Serif CJK JP @@ -10,18 +10,18 @@ font.serif: Noto Serif CJK JP #text.usetex : True ### AXES -axes.titlesize : 11 -axes.labelsize : 11 +axes.titlesize : 12 +axes.labelsize : 12 ### TICKS -xtick.labelsize : 11 -ytick.labelsize : 11 +xtick.labelsize : 12 +ytick.labelsize : 12 ### Legend -legend.fontsize : 11 +legend.fontsize : 12 ### FIGURE -figure.titlesize : 11 +figure.titlesize : 12 figure.figsize : 7.48, 4.53 ### SAVING FIGURES From d36628a3ad1052f6128894511f3c14c657917330 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Mon, 7 Apr 2025 22:21:23 +0000 Subject: [PATCH 32/44] increased number of tasks from 12 to 16 for all satellite sensor fit to obs batch plotting --- .../plot_gsi_radiance_fit_to_obs.sbatch | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) mode change 100644 => 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch old mode 100644 new mode 100755 index 1d8f324..1b00647 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch @@ -2,16 +2,17 @@ #SBATCH --job-name=gsi_radiance_plots # Name of the job #SBATCH --output=plot_gsi%j.log # Output log file with job ID #SBATCH --ntasks=12 -#SBATCH --ntasks-per-node=12 # Number of tasks (CPUs) per node +#SBATCH --ntasks-per-node=16 # Number of tasks (CPUs) per node #SBATCH --cpus-per-task=2 #SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) #SBATCH --time=48:00:00 # Max wall time (adjust as needed) -#SBATCH --partition=batch # Partition to use +#SBATCH --partition=bigmem #batch # Partition to use #SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails -#SBATCH --mail-user=Adam.Schneider@noaa.gov # Email address for notifications - -#micromamba activate darr_score_sqlalchemy1_env +#SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications +source $HOME/.bashrc +micromamba activate darr_score_sqlalchemy1_env +which mpirun # Run the MPI Python script using srun -mpirun python plot_gsi_radiance_fit_to_obs.py /media/darr/results/figures/brightness_temperature_error_timeseries2 +/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py /media/darr/results/figures/brightness_temperature_error_timeseries From 2fa3b12c533f9c73bebe7993f7c9b1cd22a0c24e Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Tue, 8 Apr 2025 18:46:46 +0000 Subject: [PATCH 33/44] gsi radiance monitoring plot changes, including increased nominal text size to 12 and time axis label adjustments --- .../core_scripts/plot_gsi_radiance_fit_to_obs.py | 5 +++-- src/score_plotting/style_lib/full_3x3pg.mplstyle | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 266ca8d..b8216d6 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -276,8 +276,9 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, alpha_background=0.5, interactive=False): output_dir = os.path.join(self.config_dict['output_path'], f"{sensor}") - locator = mdates.AutoDateLocator(minticks=5, maxticks=10) + locator = mdates.AutoDateLocator(minticks=8, maxticks=16) formatter = mdates.ConciseDateFormatter(locator) + month_locator = mdates.MonthLocator(interval=1) # Check if the directory exists, and create it if it doesn't @@ -704,9 +705,9 @@ def make_figures(self, sensor, ncols=3, init_datetime=None, if nobs_ylims[0] < 0: axes[row, 2].set_ylim(bottom=0) + fig.suptitle(f"{title_str0}{title_str1}") plt.tight_layout() plt.subplots_adjust(top = 1. - 1.2 / figsize_length) - fig.suptitle(f"{title_str0}{title_str1}") if interactive: plt.show() else: diff --git a/src/score_plotting/style_lib/full_3x3pg.mplstyle b/src/score_plotting/style_lib/full_3x3pg.mplstyle index 141eb91..c58967e 100644 --- a/src/score_plotting/style_lib/full_3x3pg.mplstyle +++ b/src/score_plotting/style_lib/full_3x3pg.mplstyle @@ -1,8 +1,8 @@ #### matplotlib style file for AGU figures ### FONT -font.size : 11 -font.family: sans-serif +font.size : 12 +font.family: serif font.sans-serif: FreeSans, Roboto, Arial font.serif: Noto Serif CJK JP @@ -10,18 +10,18 @@ font.serif: Noto Serif CJK JP #text.usetex : True ### AXES -axes.titlesize : 11 -axes.labelsize : 11 +axes.titlesize : 12 +axes.labelsize : 12 ### TICKS -xtick.labelsize : 11 -ytick.labelsize : 11 +xtick.labelsize : 12 +ytick.labelsize : 12 ### Legend -legend.fontsize : 11 +legend.fontsize : 12 ### FIGURE -figure.titlesize : 11 +figure.titlesize : 12 figure.figsize : 24, 27.18 ### SAVING FIGURES From 32f2843799322cb9548d7f6db1688e9753ffe1c7 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 11 Apr 2025 22:19:51 +0000 Subject: [PATCH 34/44] python wrapper and sbatch scripts for GSI radiance figures to generate subset of sensors for more frequent (i.e., daily) updates --- src/score_plotting/core_scripts/__init__.py | 0 .../core_scripts/instrument_channel_nums.py | 0 .../plot_gsi_radiance_fit_to_obs.py | 8 ++--- .../plot_gsi_radiance_fit_to_obs.sbatch | 2 +- ...ot_gsi_radiance_fit_to_obs_daily_update.py | 32 +++++++++++++++++++ ...si_radiance_fit_to_obs_daily_update.sbatch | 18 +++++++++++ .../core_scripts/satellite_names.py | 0 7 files changed, 54 insertions(+), 6 deletions(-) mode change 100644 => 100755 src/score_plotting/core_scripts/__init__.py mode change 100644 => 100755 src/score_plotting/core_scripts/instrument_channel_nums.py create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.py create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch mode change 100644 => 100755 src/score_plotting/core_scripts/satellite_names.py diff --git a/src/score_plotting/core_scripts/__init__.py b/src/score_plotting/core_scripts/__init__.py old mode 100644 new mode 100755 diff --git a/src/score_plotting/core_scripts/instrument_channel_nums.py b/src/score_plotting/core_scripts/instrument_channel_nums.py old mode 100644 new mode 100755 diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index b8216d6..9b2bda6 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -15,14 +15,12 @@ import pandas as pd from mpi4py import MPI -import gsistats_timeseries -from instrument_channel_nums import get_instrument_channels -import satellite_names +from score_plotting.core_scripts import gsistats_timeseries +from score_plotting.core_scripts.instrument_channel_nums import get_instrument_channels +from score_plotting.core_scripts import satellite_names HOURS_PER_DAY = 24. # hours -import argparse - def config(): """Add experiment name entries to experiment_list and experiment_plot_dict. The order of experiment_list is used diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch index 1b00647..e4d1027 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH --job-name=gsi_radiance_plots # Name of the job #SBATCH --output=plot_gsi%j.log # Output log file with job ID -#SBATCH --ntasks=12 +#SBATCH --ntasks=16 #SBATCH --ntasks-per-node=16 # Number of tasks (CPUs) per node #SBATCH --cpus-per-task=2 #SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.py new file mode 100755 index 0000000..4496b4a --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +""" wrapper to call plot_gsi_radiance_fit_to_obs figure +generation for subset of sensors to be updated daily +""" + +from score_plotting.core_scripts import plot_gsi_radiance_fit_to_obs + +def main(): + plot_gsi_radiance_fit_to_obs.prun( + sensor_list = [ + # microwave sounders: + 'amsua', + 'amsub', + 'atms', + 'ssmi', + 'ssmis', + # infrared sounders: + 'airs', + # TIROS operational vertical sounders (TOVS): + 'hirs2', + 'hirs3', + 'hirs4', + 'ssu', + 'msu' + # Advanced Very-High-Resolution Radiometers: + 'avhrr2', + 'avhrr3' + ] + ) +if __name__=='__main__': + main() diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch new file mode 100755 index 0000000..bf8d386 --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH --job-name=gsi_radiance_plots_daily_update # Name of the job +#SBATCH --output=plot_gsi%j.log # Output log file with job ID +#SBATCH --ntasks=12 +#SBATCH --ntasks-per-node=12 # Number of tasks (CPUs) per node +#SBATCH --cpus-per-task=2 +#SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) +#SBATCH --time=6:00:00 # Max wall time (adjust as needed) +#SBATCH --partition=medmem #batch # Partition to use +#SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails +#SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications + +source $HOME/.bashrc +micromamba activate darr_score_sqlalchemy1_env +which mpirun +# Run the MPI Python script using srun +/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.py /media/darr/results/figures/brightness_temperature_error_timeseries + diff --git a/src/score_plotting/core_scripts/satellite_names.py b/src/score_plotting/core_scripts/satellite_names.py old mode 100644 new mode 100755 From 2133a8ebea1a3e21d910398191d11c4a7bb38a88 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 23 Apr 2025 23:38:47 -0600 Subject: [PATCH 35/44] new scripts and source code changes to add figure generation for replay observer diagnostic overlapping time periods --- MANIFEST.in | 1 + .../plot_gsi_radiance_fit_to_obs.py | 35 ++++++++++++---- ...lot_gsi_radiance_fit_to_obs_rod_overlap.py | 22 ++++++++++ ...gsi_radiance_fit_to_obs_rod_overlap.sbatch | 18 ++++++++ ...nce_fit_to_obs_rod_overlap_daily_update.py | 41 +++++++++++++++++++ ...fit_to_obs_rod_overlap_daily_update.sbatch | 17 ++++++++ .../style_lib/dark_theme.mplstyle | 5 ++- .../style_lib/full_3x3pg.mplstyle | 2 +- .../style_lib/half_horizontal.mplstyle | 2 +- 9 files changed, 130 insertions(+), 13 deletions(-) create mode 100644 MANIFEST.in create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..4ef1eb5 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include src/score_plotting/style_lib/*.mplstyle \ No newline at end of file diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 9b2bda6..f31ca95 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -64,8 +64,12 @@ def config(): 'color' : 'black', 'ls': '-', 'lw': 0.75 - } - + }, + 'replay_observer_diagnostic_overlap' : { + 'color' : 'black', + 'ls': '-', + 'lw': 0.75 + } }, 'sensor_list': get_instrument_channels().keys(), 'start_date': '1978-10-01 00:00:00', @@ -80,6 +84,7 @@ def config(): "NASA_GEOSIT_GSISTATS": "GEOS-IT", "GDAS": "GDAS", "replay_observer_diagnostic_v1": "UFS-replay", + "replay_observer_diagnostic_overlap": "UFS-replay-overlap", "std_GSIstage_1": "STD", "variance_GSIstage_1": "obs error variance", "bias_post_corr_GSIstage_1": "ME", @@ -736,7 +741,7 @@ def run_tovs(sensor_list = ['hirs2', 'hirs3', 'hirs4', 'ssu', 'msu']): def run_avhrr(sensor_list = ['avhrr2', 'avhrr3']): prun(sensor_list=sensor_list) -def prun(sensor_list=None): +def prun(experiment_list=None, sensor_list=None, start_date=None, stop_date=None): args = parse_arguments() gsi_it = args.gsi_stage @@ -747,11 +752,20 @@ def prun(sensor_list=None): # Load global configurations and friendly names global_config_dict, global_friendly_names_dict = config() - + + if experiment_list is None: + experiment_list = global_config_dict['experiment_list'] + + if start_date is None: + start_date = global_config_dict['start_date'] + + if stop_date is None: + stop_date = global_config_dict['stop_date'] + if args.sensor != 'all': sensor_list = [args.sensor] - if sensor_list==None: + if sensor_list is None: sensor_list = list() for sensor in global_config_dict['sensor_list']: sensor_list.append(sensor) @@ -764,10 +778,10 @@ def prun(sensor_list=None): # Rank 0 prepares the data if rank == 0: global_data_frame = get_data_frame( - global_config_dict['experiment_list'], + experiment_list, sensor_list, - start_date=global_config_dict['start_date'], - stop_date=global_config_dict['stop_date'], + start_date=start_date, + stop_date=stop_date, select_sat_name = select_sat_name, sat_name=args.satellite, gsi_it=gsi_it) @@ -821,6 +835,9 @@ def prun(sensor_list=None): if args.channel != 9999: experiment_metrics_timeseries_data.channel_dict = {sensor: [args.channel]} experiment_metrics_timeseries_data.config_dict['sensor_list'] = [sensor] + experiment_metrics_timeseries_data.experiment_list = experiment_list + experiment_metrics_timeseries_data.config_dict['start_date'] = start_date + experiment_metrics_timeseries_data.config_dict['stop_date'] = stop_date experiment_metrics_timeseries_data.build_timeseries(interactive_figure=args.interactive, days_to_smooth=args.days_to_smooth, da_cycle=args.da_cycle, @@ -836,4 +853,4 @@ def main(): prun() if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py new file mode 100755 index 0000000..b37ccff --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +""" wrapper to call plot_gsi_radiance_fit_to_obs figure +generation for a subset of sensors for the replay overlap experiment +""" + +from score_plotting.core_scripts import plot_gsi_radiance_fit_to_obs + +def main(): + plot_gsi_radiance_fit_to_obs.prun( + experiment_list=[ + 'NASA_GEOSIT_GSISTATS', + 'GDAS', + 'replay_observer_diagnostic_v1', + 'replay_observer_diagnostic_overlap' + ], + start_date='2018-10-01 00:00:00', + stop_date='2024-09-30 00:00:00' + ) + +if __name__=='__main__': + main() \ No newline at end of file diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch new file mode 100755 index 0000000..6c45406 --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH --job-name=gsi_radiance_plots # Name of the job +#SBATCH --output=plot_gsi%j.log # Output log file with job ID +#SBATCH --ntasks=16 +#SBATCH --ntasks-per-node=16 # Number of tasks (CPUs) per node +#SBATCH --cpus-per-task=2 +#SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) +#SBATCH --time=48:00:00 # Max wall time (adjust as needed) +#SBATCH --partition=bigmem #batch # Partition to use +#SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails +#SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications + +source $HOME/.bashrc +micromamba activate darr_score_sqlalchemy1_env +which mpirun +# Run the MPI Python script using srun +/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py /media/darr/results/figures/brightness_temperature_error_timeseries/overlap + diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py new file mode 100755 index 0000000..521004b --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +""" wrapper to call plot_gsi_radiance_fit_to_obs figure +generation for a subset of sensors for the replay overlap experiment +""" + +from score_plotting.core_scripts import plot_gsi_radiance_fit_to_obs + +def main(): + plot_gsi_radiance_fit_to_obs.prun( + experiment_list=[ + 'NASA_GEOSIT_GSISTATS', + 'GDAS', + 'replay_observer_diagnostic_v1', + 'replay_observer_diagnostic_overlap' + ], + sensor_list = [ + # microwave sounders: + 'amsua', + 'amsub', + 'atms', + 'ssmi', + 'ssmis', + # infrared sounders: + 'airs', + # TIROS operational vertical sounders (TOVS): + 'hirs2', + 'hirs3', + 'hirs4', + 'ssu', + 'msu' + # Advanced Very-High-Resolution Radiometers: + 'avhrr2', + 'avhrr3' + ], + start_date='2018-10-01 00:00:00', + stop_date='2024-09-30 00:00:00' + ) + +if __name__=='__main__': + main() \ No newline at end of file diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch new file mode 100755 index 0000000..745ae01 --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch @@ -0,0 +1,17 @@ +#!/bin/bash +#SBATCH --job-name=gsi_radiance_plots_daily_update # Name of the job +#SBATCH --output=plot_gsi%j.log # Output log file with job ID +#SBATCH --ntasks=12 +#SBATCH --ntasks-per-node=12 # Number of tasks (CPUs) per node +#SBATCH --cpus-per-task=2 +#SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) +#SBATCH --time=6:00:00 # Max wall time (adjust as needed) +#SBATCH --partition=medmem #batch # Partition to use +#SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails +#SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications + +source $HOME/.bashrc +micromamba activate darr_score_sqlalchemy1_env +which mpirun +# Run the MPI Python script using srun +/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py /media/darr/results/figures/brightness_temperature_error_timeseries/overlap \ No newline at end of file diff --git a/src/score_plotting/style_lib/dark_theme.mplstyle b/src/score_plotting/style_lib/dark_theme.mplstyle index 7dd90a2..c639f67 100644 --- a/src/score_plotting/style_lib/dark_theme.mplstyle +++ b/src/score_plotting/style_lib/dark_theme.mplstyle @@ -12,7 +12,8 @@ patch.edgecolor: white # if forced, or patch is not filled hatch.color: white ### FONT -font.family: serif +font.family: sans-serif +font.sans-serif: Helvetica Neue font.serif: Noto Serif CJK JP font.size: 12 @@ -79,4 +80,4 @@ savefig.edgecolor: black # figure edgecolor when saving savefig.format: png # {png, ps, pdf, svg} ## Legend -legend.fontsize : 12 \ No newline at end of file +legend.fontsize : 12 diff --git a/src/score_plotting/style_lib/full_3x3pg.mplstyle b/src/score_plotting/style_lib/full_3x3pg.mplstyle index c58967e..d2d2ac7 100644 --- a/src/score_plotting/style_lib/full_3x3pg.mplstyle +++ b/src/score_plotting/style_lib/full_3x3pg.mplstyle @@ -3,7 +3,7 @@ ### FONT font.size : 12 font.family: serif -font.sans-serif: FreeSans, Roboto, Arial +font.sans-serif: Helvetica Neue, FreeSans, Roboto, Arial font.serif: Noto Serif CJK JP ### TEXT diff --git a/src/score_plotting/style_lib/half_horizontal.mplstyle b/src/score_plotting/style_lib/half_horizontal.mplstyle index 1dfb3ce..b86449b 100644 --- a/src/score_plotting/style_lib/half_horizontal.mplstyle +++ b/src/score_plotting/style_lib/half_horizontal.mplstyle @@ -3,7 +3,7 @@ ### FONT font.size: 12 font.family: serif -font.sans-serif: FreeSans, Roboto, Arial, Helvetica +font.sans-serif: Helvetica Neue, FreeSans, Roboto, Arial, Helvetica font.serif: Noto Serif CJK JP ### TEXT From 1638860efe4e5d3b5adf41412d540189e3dd8eb6 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 23 Apr 2025 23:38:47 -0600 Subject: [PATCH 36/44] new scripts and source code changes to add figure generation for replay observer diagnostic overlapping time periods --- .../plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch | 3 +-- ...lot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch index 6c45406..c255e12 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch @@ -14,5 +14,4 @@ source $HOME/.bashrc micromamba activate darr_score_sqlalchemy1_env which mpirun # Run the MPI Python script using srun -/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py /media/darr/results/figures/brightness_temperature_error_timeseries/overlap - +/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py /media/darr/results/figures/brightness_temperature_error_timeseries_rod_overlap \ No newline at end of file diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch index 745ae01..a240bd2 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch @@ -14,4 +14,4 @@ source $HOME/.bashrc micromamba activate darr_score_sqlalchemy1_env which mpirun # Run the MPI Python script using srun -/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py /media/darr/results/figures/brightness_temperature_error_timeseries/overlap \ No newline at end of file +/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py /media/darr/results/figures/brightness_temperature_error_timeseries_rod_overlap \ No newline at end of file From 67d5d2c1160fde0c69532deb10ae6a719fe6f958 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Thu, 24 Apr 2025 22:39:52 +0000 Subject: [PATCH 37/44] update environment in sbatch submission scripts --- .../core_scripts/plot_gsi_radiance_fit_to_obs.sbatch | 10 +++++----- .../plot_gsi_radiance_fit_to_obs_daily_update.sbatch | 10 +++++----- .../plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch | 10 +++++----- ...radiance_fit_to_obs_rod_overlap_daily_update.sbatch | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch index e4d1027..50907ab 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch @@ -10,9 +10,9 @@ #SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails #SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications -source $HOME/.bashrc -micromamba activate darr_score_sqlalchemy1_env -which mpirun -# Run the MPI Python script using srun -/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py /media/darr/results/figures/brightness_temperature_error_timeseries +source $HOME/.bash_profile +micromamba activate darr_score_sqlalchemy1_dev_env +#which mpirun +# Run the MPI Python script using mpiexec +mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs /media/darr/results/figures/brightness_temperature_error_timeseries diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch index bf8d386..f5d49ca 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch @@ -10,9 +10,9 @@ #SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails #SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications -source $HOME/.bashrc -micromamba activate darr_score_sqlalchemy1_env -which mpirun -# Run the MPI Python script using srun -/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.py /media/darr/results/figures/brightness_temperature_error_timeseries +source $HOME/.bash_profile +micromamba activate darr_score_sqlalchemy1_dev_env +#which mpirun +# Run the MPI Python script using mpiexec +mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs_daily_update /media/darr/results/figures/brightness_temperature_error_timeseries diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch index c255e12..302851b 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch @@ -10,8 +10,8 @@ #SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails #SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications -source $HOME/.bashrc -micromamba activate darr_score_sqlalchemy1_env -which mpirun -# Run the MPI Python script using srun -/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py /media/darr/results/figures/brightness_temperature_error_timeseries_rod_overlap \ No newline at end of file +source $HOME/.bash_profile +micromamba activate darr_score_sqlalchemy1_dev_env +#which mpiexec +# Run the MPI Python script using mpiexec +mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs_rod_overlap.py /media/darr/results/figures/brightness_temperature_error_timeseries_rod_overlap diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch index a240bd2..15efa20 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch @@ -10,8 +10,8 @@ #SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails #SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications -source $HOME/.bashrc -micromamba activate darr_score_sqlalchemy1_env -which mpirun -# Run the MPI Python script using srun -/media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/mpirun /media/darr/opt/micromamba/envs/darr_score_sqlalchemy1_env/bin/python /media/darr/src/score-plotting/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py /media/darr/results/figures/brightness_temperature_error_timeseries_rod_overlap \ No newline at end of file +source $HOME/.bash_profile +micromamba activate darr_score_sqlalchemy1_dev_env +#which mpiexec +# Run the MPI Python script using mpiexec +mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update /media/darr/results/figures/brightness_temperature_error_timeseries_rod_overlap From 41f469dfdd51c10d9516455aaa49f8bfc2cf7606 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Tue, 3 Jun 2025 23:01:07 +0000 Subject: [PATCH 38/44] Update font for dark theme figures --- src/score_plotting/style_lib/dark_theme.mplstyle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/score_plotting/style_lib/dark_theme.mplstyle b/src/score_plotting/style_lib/dark_theme.mplstyle index c639f67..1f90420 100644 --- a/src/score_plotting/style_lib/dark_theme.mplstyle +++ b/src/score_plotting/style_lib/dark_theme.mplstyle @@ -12,7 +12,7 @@ patch.edgecolor: white # if forced, or patch is not filled hatch.color: white ### FONT -font.family: sans-serif +font.family: serif font.sans-serif: Helvetica Neue font.serif: Noto Serif CJK JP font.size: 12 From dee3d34c1a47f9a71af852480d6a6a9a60c51984 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 13 Jun 2025 22:45:29 +0000 Subject: [PATCH 39/44] support for the first weakly coupled scout run experiment; also moved sbatch scripts out of the python package and into a new top level directory named batch_scripts --- .../plot_gsi_radiance_fit_to_obs.sbatch | 0 ...si_radiance_fit_to_obs_daily_update.sbatch | 0 ...gsi_radiance_fit_to_obs_rod_overlap.sbatch | 0 ...fit_to_obs_rod_overlap_daily_update.sbatch | 0 ...nce_fit_to_obs_weakly_coupled_scout.sbatch | 17 ++++++++++++++ .../core_scripts/plot_file_counts.py | 20 +++++++++++++++- .../plot_gsi_radiance_fit_to_obs.py | 16 +++++++++---- ...adiance_fit_to_obs_weakly_coupled_scout.py | 23 +++++++++++++++++++ 8 files changed, 71 insertions(+), 5 deletions(-) rename {src/score_plotting/core_scripts => batch_scripts}/plot_gsi_radiance_fit_to_obs.sbatch (100%) rename {src/score_plotting/core_scripts => batch_scripts}/plot_gsi_radiance_fit_to_obs_daily_update.sbatch (100%) rename {src/score_plotting/core_scripts => batch_scripts}/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch (100%) rename {src/score_plotting/core_scripts => batch_scripts}/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch (100%) create mode 100755 batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch create mode 100755 src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.py diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch similarity index 100% rename from src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.sbatch rename to batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch similarity index 100% rename from src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch rename to batch_scripts/plot_gsi_radiance_fit_to_obs_daily_update.sbatch diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch similarity index 100% rename from src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch rename to batch_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.sbatch diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch similarity index 100% rename from src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch rename to batch_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.sbatch diff --git a/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch new file mode 100755 index 0000000..d9e20b2 --- /dev/null +++ b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch @@ -0,0 +1,17 @@ +#!/bin/bash +#SBATCH --job-name=gsi_radiance_plots # Name of the job +#SBATCH --output=plot_gsi%j.log # Output log file with job ID +#SBATCH --ntasks=16 +#SBATCH --ntasks-per-node=16 # Number of tasks (CPUs) per node +#SBATCH --cpus-per-task=2 +#SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) +#SBATCH --time=48:00:00 # Max wall time (adjust as needed) +#SBATCH --partition=batch # Partition to use +#SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails +#SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications + +source $HOME/.bash_profile +micromamba activate darr_score_sqlalchemy1_env +#which mpiexec +# Run the MPI Python script using mpiexec +mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs_weakly_coupled_scout /media/darr/results/figures/brightness_temperature_error_timeseries_weakly_coupled_scout --dark_theme diff --git a/src/score_plotting/core_scripts/plot_file_counts.py b/src/score_plotting/core_scripts/plot_file_counts.py index e5fe86e..25541fb 100755 --- a/src/score_plotting/core_scripts/plot_file_counts.py +++ b/src/score_plotting/core_scripts/plot_file_counts.py @@ -179,6 +179,23 @@ def parse_arguments(): 'file_{metric}'}], 'work_dir': parse_arguments().figure_output_path} +plot_control_dict_weakly_coupled_scout = {'date_range': {'datetime_str': '%Y-%m-%d %H:%M:%S', + 'end': '1979-02-01 00:00:00', + 'start': '1979-01-01 00:00:00'}, + 'db_request_name': 'expt_metrics', + 'method': 'GET', + 'experiments': [{'graph_color': 'black', + 'graph_label': 'Number of objects uploaded per cycle', + 'name': '3dvar_coupledreanl_scoutrun_1979streamv1_test', + 'wallclock_start': '2025-06-05 17:00:00'}], + 'fig_base_fn': 'files', + 'stat_groups': [{'cycles': [0, 21600, 43200, 64800], + 'metrics': ['count'], + 'stat_group_frmt_str': + 'file_{metric}'}], + 'work_dir': parse_arguments().figure_output_path} + + def get_experiment_file_counts(request_data): expt_metric_name = request_data.metric_format_str.replace( @@ -479,7 +496,8 @@ def submit(self): #plot_control_dict5, #plot_control_dict6, plot_control_dict_ext, - plot_control_dict_forward_ext + plot_control_dict_forward_ext, + plot_control_dict_weakly_coupled_scout, ]): plot_request = PlotFileCountRequest(plot_control_dict) plot_request.submit() diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index f31ca95..6b5f219 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -63,13 +63,18 @@ def config(): 'scout_run_v1' : { 'color' : 'black', 'ls': '-', - 'lw': 0.75 + 'lw': 1.5 }, 'replay_observer_diagnostic_overlap' : { 'color' : 'black', 'ls': '-', 'lw': 0.75 - } + }, + '3dvar_coupledreanl_scoutrun_1979streamv1_test' : { + 'color' : '#003087', + 'ls': '-', + 'lw': 1.25 + } }, 'sensor_list': get_instrument_channels().keys(), 'start_date': '1978-10-01 00:00:00', @@ -80,11 +85,12 @@ def config(): could this be done by string matching for the std/bias etc part? we could have a basic friendly dict for that ''' - friendly_names_dict={"scout_run_v1": "scout run (3DVar)", + friendly_names_dict={"scout_run_v1": "atmosphere scout (3DVar)", "NASA_GEOSIT_GSISTATS": "GEOS-IT", "GDAS": "GDAS", "replay_observer_diagnostic_v1": "UFS-replay", "replay_observer_diagnostic_overlap": "UFS-replay-overlap", + "3dvar_coupledreanl_scoutrun_1979streamv1_test":"weakly coupled scout (3DVar)", "std_GSIstage_1": "STD", "variance_GSIstage_1": "obs error variance", "bias_post_corr_GSIstage_1": "ME", @@ -201,6 +207,8 @@ def config_figure_params(self, days_to_smooth=1.): for expt_name in self.config_dict['experiment_plot_dict'].keys(): if self.config_dict['experiment_plot_dict'][expt_name]['color'] == 'black': self.config_dict['experiment_plot_dict'][expt_name]['color'] = self.default_plot_color + elif self.config_dict['experiment_plot_dict'][expt_name]['color'] == '#003087': + self.config_dict['experiment_plot_dict'][expt_name]['color'] = 'white' else: self.default_plot_color = 'black' self.fill_color = '#A2A4A3' @@ -853,4 +861,4 @@ def main(): prun() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.py new file mode 100755 index 0000000..1d2c8eb --- /dev/null +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +""" wrapper to call plot_gsi_radiance_fit_to_obs figure +generation for a subset of sensors for the replay overlap experiment +""" + +from score_plotting.core_scripts import plot_gsi_radiance_fit_to_obs + +def main(): + plot_gsi_radiance_fit_to_obs.prun( + experiment_list=[ + 'NASA_GEOSIT_GSISTATS', + 'GDAS', + 'replay_observer_diagnostic_v1', + 'scout_run_v1', + '3dvar_coupledreanl_scoutrun_1979streamv1_test' + ], + start_date='1979-01-01 00:00:00', + stop_date='1979-02-01 00:00:00' + ) + +if __name__=='__main__': + main() From 99ceeea8dd67dc32e4a6db711c903165b723e56f Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Thu, 26 Jun 2025 21:16:11 +0000 Subject: [PATCH 40/44] corrected MetOp name and added weakly coly coupled scout run to standard list of experiments for radiance monitoring webpage --- batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch | 2 +- ...si_radiance_fit_to_obs_weakly_coupled_scout.sbatch | 2 +- src/score_plotting/attrs/file_counts_plot_attrs.py | 2 +- .../core_scripts/plot_gsi_radiance_fit_to_obs.py | 3 ++- src/score_plotting/core_scripts/satellite_names.py | 11 ++++------- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch index 50907ab..d78cc7b 100755 --- a/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch +++ b/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch @@ -5,7 +5,7 @@ #SBATCH --ntasks-per-node=16 # Number of tasks (CPUs) per node #SBATCH --cpus-per-task=2 #SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) -#SBATCH --time=48:00:00 # Max wall time (adjust as needed) +#SBATCH --time=12:00:00 # Max wall time (adjust as needed) #SBATCH --partition=bigmem #batch # Partition to use #SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails #SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications diff --git a/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch index d9e20b2..26da41d 100755 --- a/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch +++ b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch @@ -5,7 +5,7 @@ #SBATCH --ntasks-per-node=16 # Number of tasks (CPUs) per node #SBATCH --cpus-per-task=2 #SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) -#SBATCH --time=48:00:00 # Max wall time (adjust as needed) +#SBATCH --time=12:00:00 # Max wall time (adjust as needed) #SBATCH --partition=batch # Partition to use #SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails #SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications diff --git a/src/score_plotting/attrs/file_counts_plot_attrs.py b/src/score_plotting/attrs/file_counts_plot_attrs.py index efc4629..abe71cb 100755 --- a/src/score_plotting/attrs/file_counts_plot_attrs.py +++ b/src/score_plotting/attrs/file_counts_plot_attrs.py @@ -12,7 +12,7 @@ AxesLabel = namedtuple('AxesLabel', ['axis', 'label', 'horizontalalignment']) -DEFAULT_LEGEND_ATTRS = LegendData(loc='upper left', fancybox=None, +DEFAULT_LEGEND_ATTRS = LegendData(loc='upper right', fancybox=None, edgecolor=None, framealpha=None, shadow=None, fontsize='small', facecolor=None) diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index 6b5f219..c92c900 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -41,7 +41,8 @@ def config(): 'experiment_list': ['NASA_GEOSIT_GSISTATS', 'GDAS', 'replay_observer_diagnostic_v1', - 'scout_run_v1' + 'scout_run_v1', + '3dvar_coupledreanl_scoutrun_1979streamv1_test' ], 'experiment_plot_dict': { diff --git a/src/score_plotting/core_scripts/satellite_names.py b/src/score_plotting/core_scripts/satellite_names.py index fab0cb4..311c26f 100755 --- a/src/score_plotting/core_scripts/satellite_names.py +++ b/src/score_plotting/core_scripts/satellite_names.py @@ -17,12 +17,9 @@ def get_satellite_names(): "NOAA 19": {"sat_id": 719, "short_name": "n19"}, "NOAA 20": {"sat_id": 720, "short_name": "n20"}, "NOAA 21": {"sat_id": 721, "short_name": "n21"}, - "METOP-1": {"sat_id": 3, "short_name": "metop-b"}, - "METOP-2": {"sat_id": 4, "short_name": "metop-a"}, - "METOP-3": {"sat_id": 5, "short_name": "metop-c"}, - "METOP-1 (Metop-A": {"sat_id": 4, "short_name": "metop-b"}, - "METOP-2 (Metop-B": {"sat_id": 3, "short_name": "metop-a"}, - "METOP-3 (Metop-C": {"sat_id": 5, "short_name": "metop-c"}, + "MetOp-B": {"sat_id": 3, "short_name": "metop-b"}, + "MetOp-A": {"sat_id": 4, "short_name": "metop-a"}, + "MetOp-C": {"sat_id": 5, "short_name": "metop-c"}, "AQUA": {"sat_id": 784, "short_name": "aqua"}, "NPP": {"sat_id": 224, "short_name": "npp"}, "GOES 7": {"sat_id": 251, "short_name": "g07"}, @@ -112,4 +109,4 @@ def get_longname(short_name): for top_level_key, inner_dict in dict_of_dicts.items(): if 'short_name' in inner_dict and inner_dict['short_name'] == short_name: return top_level_key - return short_name # return short_name if no match is found \ No newline at end of file + return short_name # return short_name if no match is found From 3c836a2429904e84e35265f41f7c249273e3bcdb Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Fri, 27 Jun 2025 19:08:44 +0000 Subject: [PATCH 41/44] sat name corrections --- .../core_scripts/satellite_names.py | 193 +++++++++--------- 1 file changed, 97 insertions(+), 96 deletions(-) diff --git a/src/score_plotting/core_scripts/satellite_names.py b/src/score_plotting/core_scripts/satellite_names.py index 311c26f..263038a 100755 --- a/src/score_plotting/core_scripts/satellite_names.py +++ b/src/score_plotting/core_scripts/satellite_names.py @@ -1,106 +1,107 @@ def get_satellite_names(): sats = { - "NOAA 5": {"sat_id": 705, "short_name": "n05"}, - "NOAA 6": {"sat_id": 706, "short_name": "n06"}, - "NOAA 7": {"sat_id": 707, "short_name": "n07"}, - "NOAA 8": {"sat_id": 708, "short_name": "n08"}, - "NOAA 9": {"sat_id": 709, "short_name": "n09"}, - "NOAA 10": {"sat_id": 710, "short_name": "n10"}, - "NOAA 11": {"sat_id": 711, "short_name": "n11"}, - "NOAA 12": {"sat_id": 712, "short_name": "n12"}, - "NOAA 13": {"sat_id": 713, "short_name": "n13"}, - "NOAA 14": {"sat_id": 714, "short_name": "n14"}, - "NOAA 15": {"sat_id": 715, "short_name": "n15"}, - "NOAA 16": {"sat_id": 716, "short_name": "n16"}, - "NOAA 17": {"sat_id": 717, "short_name": "n17"}, - "NOAA 18": {"sat_id": 718, "short_name": "n18"}, - "NOAA 19": {"sat_id": 719, "short_name": "n19"}, - "NOAA 20": {"sat_id": 720, "short_name": "n20"}, - "NOAA 21": {"sat_id": 721, "short_name": "n21"}, - "MetOp-B": {"sat_id": 3, "short_name": "metop-b"}, - "MetOp-A": {"sat_id": 4, "short_name": "metop-a"}, + "NOAA-5": {"sat_id": 705, "short_name": "n05"}, + "NOAA-6": {"sat_id": 706, "short_name": "n06"}, + "NOAA-7": {"sat_id": 707, "short_name": "n07"}, + "NOAA-8": {"sat_id": 708, "short_name": "n08"}, + "NOAA-9": {"sat_id": 709, "short_name": "n09"}, + "NOAA-10": {"sat_id": 710, "short_name": "n10"}, + "NOAA-11": {"sat_id": 711, "short_name": "n11"}, + "NOAA-12": {"sat_id": 712, "short_name": "n12"}, + "NOAA-13": {"sat_id": 713, "short_name": "n13"}, + "NOAA-14": {"sat_id": 714, "short_name": "n14"}, + "NOAA-15": {"sat_id": 715, "short_name": "n15"}, + "NOAA-16": {"sat_id": 716, "short_name": "n16"}, + "NOAA-17": {"sat_id": 717, "short_name": "n17"}, + "NOAA-18": {"sat_id": 718, "short_name": "n18"}, + "NOAA-19": {"sat_id": 719, "short_name": "n19"}, + "NOAA-20": {"sat_id": 720, "short_name": "n20"}, + "NOAA-21": {"sat_id": 721, "short_name": "n21"}, + "MetOp-A": {"sat_id": 3, "short_name": "metop-a"}, + "MetOp-B": {"sat_id": 4, "short_name": "metop-b"}, "MetOp-C": {"sat_id": 5, "short_name": "metop-c"}, - "AQUA": {"sat_id": 784, "short_name": "aqua"}, - "NPP": {"sat_id": 224, "short_name": "npp"}, - "GOES 7": {"sat_id": 251, "short_name": "g07"}, - "GOES 8": {"sat_id": 252, "short_name": "g08"}, - "GOES 9": {"sat_id": 253, "short_name": "g09"}, - "GOES 10": {"sat_id": 254, "short_name": "g10"}, - "GOES 11": {"sat_id": 255, "short_name": "g11"}, - "GOES 12": {"sat_id": 256, "short_name": "g12"}, - "GOES 13": {"sat_id": 257, "short_name": "g13"}, - "GOES 14": {"sat_id": 258, "short_name": "g14"}, - "GOES 15": {"sat_id": 259, "short_name": "g15"}, - "GOES 16": {"sat_id": 270, "short_name": "g16"}, - "GOES 17": {"sat_id": 271, "short_name": "g17"}, - "GOES 18": {"sat_id": 272, "short_name": "g18"}, - "MTSAT-2": {"sat_id": 172, "short_name": "MTSAT-2"}, - "MTSAT-1R": {"sat_id": 171, "short_name": "MTSAT-1R"}, - "METEOSAT 2": {"sat_id": 59, "short_name": "m02"}, - "METEOSAT 3": {"sat_id": 51, "short_name": "m03"}, - "METEOSAT 4": {"sat_id": 52, "short_name": "m04"}, - "METEOSAT 5": {"sat_id": 53, "short_name": "m05"}, - "METEOSAT 6": {"sat_id": 54, "short_name": "m06"}, - "METEOSAT 7": {"sat_id": 55, "short_name": "m07"}, - "METEOSAT 8": {"sat_id": 56, "short_name": "m08"}, - "METEOSAT 9": {"sat_id": 57, "short_name": "m09"}, - "METEOSAT 10": {"sat_id": 67, "short_name": "m10"}, - "METEOSAT 11": {"sat_id": 70, "short_name": "m11"}, - "DMSP 8": {"sat_id": 241, "short_name": "f08"}, - "DMSP 9": {"sat_id": 242, "short_name": "f09"}, - "DMSP 10": {"sat_id": 243, "short_name": "f10"}, - "DMSP 11": {"sat_id": 244, "short_name": "f11"}, - "DMSP 12": {"sat_id": 245, "short_name": "f12"}, - "DMSP 13": {"sat_id": 246, "short_name": "f13"}, - "DMSP 14": {"sat_id": 247, "short_name": "f14"}, - "DMSP 15": {"sat_id": 248, "short_name": "f15"}, - "DMSP 16": {"sat_id": 249, "short_name": "f16"}, - "DMSP17": {"sat_id": 285, "short_name": "f17"}, - "DMSP18": {"sat_id": 286, "short_name": "f18"}, - "DMSP-19": {"sat_id": 287, "short_name": "f19"}, - "DMSP20": {"sat_id": 'xxx', "short_name": "f20"}, - "CHAMP": {"sat_id": 41, "short_name": "CHAMP"}, - "COSMIC-1": {"sat_id": 740, "short_name": "COSMIC-1"}, - "COSMIC-2": {"sat_id": 741, "short_name": "COSMIC-2"}, - "COSMIC-3": {"sat_id": 742, "short_name": "COSMIC-3"}, - "COSMIC-4": {"sat_id": 743, "short_name": "COSMIC-4"}, - "COSMIC-5": {"sat_id": 744, "short_name": "COSMIC-5"}, - "COSMIC-6": {"sat_id": 745, "short_name": "COSMIC-6"}, - "COSMIC-2 E1": {"sat_id": 750, "short_name": "COSMIC-2 E1"}, - "COSMIC-2 E2": {"sat_id": 751, "short_name": "COSMIC-2 E2"}, - "COSMIC-2 E3": {"sat_id": 752, "short_name": "COSMIC-2 E3"}, - "COSMIC-2 E4": {"sat_id": 753, "short_name": "COSMIC-2 E4"}, - "COSMIC-2 E5": {"sat_id": 754, "short_name": "COSMIC-2 E5"}, - "COSMIC-2 E6": {"sat_id": 755, "short_name": "COSMIC-2 E6"}, - "GRACE A": {"sat_id": 722, "short_name": "GRACE A"}, - "GRACE B": {"sat_id": 723, "short_name": "GRACE B"}, - "GRACE C (GRACE-F": {"sat_id": 803, "short_name": "GRACE C"}, - "GRACE D (GRACE-F": {"sat_id": 804, "short_name": "GRACE D"}, - "SAC-C": {"sat_id": 820, "short_name": "SAC C"}, - "TerraSAR-X": {"sat_id": 42, "short_name": "TerraSAR-X"}, - "TERRA": {"sat_id": 783, "short_name": "TERRA"}, - "ERS 2": {"sat_id": 2, "short_name": "ERS 2"}, - "GMS 3": {"sat_id": 150, "short_name": "GMS 3"}, - "GMS 4": {"sat_id": 151, "short_name": "GMS 4"}, - "GMS 5": {"sat_id": 152, "short_name": "GMS 5"}, - "INSAT 3A": {"sat_id": 470, "short_name": "INSAT 3A"}, - "INSAT 3D": {"sat_id": 471, "short_name": "INSAT 3D"}, - "INSAT 3DR": {"sat_id": 472, "short_name": "INSAT 3DR"}, + "Aqua": {"sat_id": 784, "short_name": "aqua"}, + "Suomi NPP": {"sat_id": 224, "short_name": "npp"}, + "GOES-7": {"sat_id": 251, "short_name": "g07"}, + "GOES-8": {"sat_id": 252, "short_name": "g08"}, + "GOES-9": {"sat_id": 253, "short_name": "g09"}, + "GOES-10": {"sat_id": 254, "short_name": "g10"}, + "GOES-11": {"sat_id": 255, "short_name": "g11"}, + "GOES-12": {"sat_id": 256, "short_name": "g12"}, + "GOES-13": {"sat_id": 257, "short_name": "g13"}, + "GOES-14": {"sat_id": 258, "short_name": "g14"}, + "GOES-15": {"sat_id": 259, "short_name": "g15"}, + "GOES-16": {"sat_id": 270, "short_name": "g16"}, + "GOES-17": {"sat_id": 271, "short_name": "g17"}, + "GOES-18": {"sat_id": 272, "short_name": "g18"}, + #"MTSAT-2": {"sat_id": 172, "short_name": "MTSAT-2"}, + #"MTSAT-1R": {"sat_id": 171, "short_name": "MTSAT-1R"}, + "Meteosat-2": {"sat_id": 59, "short_name": "m02"}, + "Meteosat-3": {"sat_id": 51, "short_name": "m03"}, + "Meteosat-4": {"sat_id": 52, "short_name": "m04"}, + "Meteosat-5": {"sat_id": 53, "short_name": "m05"}, + "Meteosat-6": {"sat_id": 54, "short_name": "m06"}, + "Meteosat-7": {"sat_id": 55, "short_name": "m07"}, + "Meteosat-8": {"sat_id": 56, "short_name": "m08"}, + "Meteosat-9": {"sat_id": 57, "short_name": "m09"}, + "Meteosat-10": {"sat_id": 67, "short_name": "m10"}, + "Meteosat-11": {"sat_id": 70, "short_name": "m11"}, + "DMSP-F08": {"sat_id": 241, "short_name": "f08"}, + "DMSP-F09": {"sat_id": 242, "short_name": "f09"}, + "DMSP-F10": {"sat_id": 243, "short_name": "f10"}, + "DMSP-F11": {"sat_id": 244, "short_name": "f11"}, + "DMSP-F12": {"sat_id": 245, "short_name": "f12"}, + "DMSP-F13": {"sat_id": 246, "short_name": "f13"}, + "DMSP-F14": {"sat_id": 247, "short_name": "f14"}, + "DMSP-F15": {"sat_id": 248, "short_name": "f15"}, + "DMSP-F16": {"sat_id": 249, "short_name": "f16"}, + "DMSP-F17": {"sat_id": 285, "short_name": "f17"}, + "DMSP-F18": {"sat_id": 286, "short_name": "f18"}, + "DMSP-F19": {"sat_id": 39630, "short_name": "f19"}, + "DMSP-F20": {"sat_id": 41705, "short_name": "f20"}, + #"CHAMP": {"sat_id": 41, "short_name": "CHAMP"}, + #"COSMIC-1": {"sat_id": 740, "short_name": "COSMIC-1"}, + #"COSMIC-2": {"sat_id": 741, "short_name": "COSMIC-2"}, + #"COSMIC-3": {"sat_id": 742, "short_name": "COSMIC-3"}, + #"COSMIC-4": {"sat_id": 743, "short_name": "COSMIC-4"}, + #"COSMIC-5": {"sat_id": 744, "short_name": "COSMIC-5"}, + #"COSMIC-6": {"sat_id": 745, "short_name": "COSMIC-6"}, + #"COSMIC-2 E1": {"sat_id": 750, "short_name": "COSMIC-2 E1"}, + #"COSMIC-2 E2": {"sat_id": 751, "short_name": "COSMIC-2 E2"}, + #"COSMIC-2 E3": {"sat_id": 752, "short_name": "COSMIC-2 E3"}, + #"COSMIC-2 E4": {"sat_id": 753, "short_name": "COSMIC-2 E4"}, + #"COSMIC-2 E5": {"sat_id": 754, "short_name": "COSMIC-2 E5"}, + #"COSMIC-2 E6": {"sat_id": 755, "short_name": "COSMIC-2 E6"}, + #"GRACE A": {"sat_id": 722, "short_name": "GRACE A"}, + #"GRACE B": {"sat_id": 723, "short_name": "GRACE B"}, + #"GRACE C (GRACE-F": {"sat_id": 803, "short_name": "GRACE C"}, + #"GRACE D (GRACE-F": {"sat_id": 804, "short_name": "GRACE D"}, + #"SAC-C": {"sat_id": 820, "short_name": "SAC C"}, + #"TerraSAR-X": {"sat_id": 42, "short_name": "TerraSAR-X"}, + #"Terra": {"sat_id": 783, "short_name": "TERRA"}, + #"ERS 2": {"sat_id": 2, "short_name": "ERS 2"}, + #"GMS-3": {"sat_id": 150, "short_name": "GMS 3"}, + #"GMS-4": {"sat_id": 151, "short_name": "GMS 4"}, + #"GMS-5": {"sat_id": 152, "short_name": "GMS 5"}, + #"INSAT 3A": {"sat_id": 470, "short_name": "INSAT 3A"}, + #"INSAT 3D": {"sat_id": 471, "short_name": "INSAT 3D"}, + #"INSAT 3DR": {"sat_id": 472, "short_name": "INSAT 3DR"}, "TIROS-N": {"sat_id": 254, "short_name": "tirosn"}, "Megha-Tropiques": {"sat_id": 367, "short_name": "meghat"}, - "TanDEM-X": {"sat_id": 551, "short_name": "TanDEM-X"}, - "PAZ": {"sat_id": 431, "short_name": "PAZ"}, - "KOMPSAT-5": {"sat_id": 536, "short_name": "KOMPSAT-5"}, - "LANDSAT 5": {"sat_id": 207, "short_name": "LANDSAT 5"}, - "GPM-core": {"sat_id": 371, "short_name": "gpm"}, - "TRMM": {"sat_id": 241, "short_name": "TRMM"}, + #"TanDEM-X": {"sat_id": 551, "short_name": "TanDEM-X"}, + #"PAZ": {"sat_id": 431, "short_name": "PAZ"}, + #"KOMPSAT-5": {"sat_id": 536, "short_name": "KOMPSAT-5"}, + #"LANDSAT 5": {"sat_id": 207, "short_name": "LANDSAT 5"}, + "GPM Core Observatory": {"sat_id": 371, "short_name": "gpm"}, + "TRMM": {"sat_id": 241, "short_name": "trmm"}, "Himawari-8": {"sat_id": 370, "short_name": "himawari8"}, "Himawari-9": {"sat_id": 372, "short_name": "himawari9"}, - "Spire Lemur 3U C": {"sat_id": 409, "short_name": "Spire L3UC"}, - "Sentinel 6A": {"sat_id": 835, "short_name": "Sentinel 6A"}, - "PlanetiQ GNOMES-": {"sat_id": 687, "short_name": "PlanetiQ GNOMES"}, - "AURA": {"sat_id": 296, "short_name": "AURA"}, - "NIMBUS 7": {"sat_id": 16, "short_name": "nim07"} + #"Spire Lemur 3U C": {"sat_id": 409, "short_name": "Spire L3UC"}, + #"Sentinel 6A": {"sat_id": 835, "short_name": "Sentinel 6A"}, + #"PlanetiQ GNOMES-": {"sat_id": 687, "short_name": "PlanetiQ GNOMES"}, + #"Aura": {"sat_id": 296, "short_name": "AURA"}, + #"NIMBUS 7": {"sat_id": 16, "short_name": "nim07"}, + "GCOM-W": {"sat_id":'xxx', "short_name": "gcom-w1"}, } return sats From 9c7aa91f56fac0ab75f5e1c70f1df61c40924e6b Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Thu, 10 Jul 2025 23:14:21 +0000 Subject: [PATCH 42/44] Adjusted figure end dates for default plotting scripts --- ...plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch | 2 +- .../core_scripts/plot_gsi_radiance_fit_to_obs.py | 2 +- .../plot_gsi_radiance_fit_to_obs_rod_overlap.py | 6 +++--- ...plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch index 26da41d..0688d1a 100755 --- a/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch +++ b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch @@ -14,4 +14,4 @@ source $HOME/.bash_profile micromamba activate darr_score_sqlalchemy1_env #which mpiexec # Run the MPI Python script using mpiexec -mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs_weakly_coupled_scout /media/darr/results/figures/brightness_temperature_error_timeseries_weakly_coupled_scout --dark_theme +mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs_weakly_coupled_scout /media/darr/results/figures/brightness_temperature_error_timeseries_weakly_coupled_scout diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py index c92c900..8a74494 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs.py @@ -79,7 +79,7 @@ def config(): }, 'sensor_list': get_instrument_channels().keys(), 'start_date': '1978-10-01 00:00:00', - 'stop_date': '2025-09-30 00:00:00', + 'stop_date': '2025-09-30 23:59:59', } ''' diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py index b37ccff..3ce1b4f 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap.py @@ -14,9 +14,9 @@ def main(): 'replay_observer_diagnostic_v1', 'replay_observer_diagnostic_overlap' ], - start_date='2018-10-01 00:00:00', - stop_date='2024-09-30 00:00:00' + start_date='2017-10-01 00:00:00', + stop_date='2023-09-30 23:59:59' ) if __name__=='__main__': - main() \ No newline at end of file + main() diff --git a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py index 521004b..4967afe 100755 --- a/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py +++ b/src/score_plotting/core_scripts/plot_gsi_radiance_fit_to_obs_rod_overlap_daily_update.py @@ -33,9 +33,9 @@ def main(): 'avhrr2', 'avhrr3' ], - start_date='2018-10-01 00:00:00', - stop_date='2024-09-30 00:00:00' + start_date='2017-10-01 00:00:00', + stop_date='2023-09-30 23:59:59' ) if __name__=='__main__': - main() \ No newline at end of file + main() From ad36db29ee7e30028d3b5c976da6052ab4365cd4 Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 13 Aug 2025 19:08:05 +0000 Subject: [PATCH 43/44] add timeout to full GSI radiance plotting batch submission script to prevent endless hangs --- batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch | 4 ++-- .../plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch index d78cc7b..4dadedb 100755 --- a/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch +++ b/batch_scripts/plot_gsi_radiance_fit_to_obs.sbatch @@ -14,5 +14,5 @@ source $HOME/.bash_profile micromamba activate darr_score_sqlalchemy1_dev_env #which mpirun # Run the MPI Python script using mpiexec -mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs /media/darr/results/figures/brightness_temperature_error_timeseries - +timeout 21600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs /media/darr/results/figures/brightness_temperature_error_timeseries + diff --git a/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch index 0688d1a..d88180f 100755 --- a/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch +++ b/batch_scripts/plot_gsi_radiance_fit_to_obs_weakly_coupled_scout.sbatch @@ -11,7 +11,7 @@ #SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications source $HOME/.bash_profile -micromamba activate darr_score_sqlalchemy1_env +micromamba activate darr_score_sqlalchemy1_dev_env #which mpiexec # Run the MPI Python script using mpiexec mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs_weakly_coupled_scout /media/darr/results/figures/brightness_temperature_error_timeseries_weakly_coupled_scout From 4b49dd3c7b81ccb369fc5bf1e268ad00cd03887a Mon Sep 17 00:00:00 2001 From: Adam Schneider Date: Wed, 13 Aug 2025 19:11:02 +0000 Subject: [PATCH 44/44] batch submission script to launch GSI radiance plotting for individual sensors --- ...plot_gsi_radiance_fit_to_obs_serial.sbatch | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 batch_scripts/plot_gsi_radiance_fit_to_obs_serial.sbatch diff --git a/batch_scripts/plot_gsi_radiance_fit_to_obs_serial.sbatch b/batch_scripts/plot_gsi_radiance_fit_to_obs_serial.sbatch new file mode 100755 index 0000000..7be0997 --- /dev/null +++ b/batch_scripts/plot_gsi_radiance_fit_to_obs_serial.sbatch @@ -0,0 +1,30 @@ +#!/bin/bash +#SBATCH --job-name=gsi_radiance_plots # Name of the job +#SBATCH --output=plot_gsi%j.log # Output log file with job ID +#SBATCH --ntasks=16 +#SBATCH --ntasks-per-node=16 # Number of tasks (CPUs) per node +#SBATCH --cpus-per-task=2 +#SBATCH --nodes=1 # Number of nodes (adjust based on your requirements) +#SBATCH --time=12:00:00 # Max wall time (adjust as needed) +#SBATCH --partition=bigmem #batch # Partition to use +#SBATCH --mail-type=ALL # Send an email when the job starts, ends, or fails +#SBATCH --mail-user=Chesley.Mccoll@noaa.gov # Email address for notifications + +source $HOME/.bash_profile +micromamba activate darr_score_sqlalchemy1_dev_env +#which mpirun +# Run the MPI Python script using mpiexec +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor msu /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor gmi /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor mhs /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor saphir /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor sndr /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor sndrD1 /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor sndrD2 /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor sndrD3 /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor sndrD4 /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor avhrr2 /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor amsre /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor amsr2 /media/darr/results/figures/brightness_temperature_error_timeseries +timeout 3600 mpiexec python -m score_plotting.core_scripts.plot_gsi_radiance_fit_to_obs --sensor tmi /media/darr/results/figures/brightness_temperature_error_timeseries +