From 003315bb91f1d0324db6480bb818a3a3460a929f Mon Sep 17 00:00:00 2001 From: prasanna <maddilaprasanna10@gmail.com> Date: Wed, 24 Apr 2024 21:07:29 +0200 Subject: [PATCH 1/3] Simplified the training_iteration_metric_plot function --- examples/plots/generate_plots.py | 90 +++++++++----------------------- 1 file changed, 24 insertions(+), 66 deletions(-) diff --git a/examples/plots/generate_plots.py b/examples/plots/generate_plots.py index cc494bb..6395733 100644 --- a/examples/plots/generate_plots.py +++ b/examples/plots/generate_plots.py @@ -48,30 +48,10 @@ def generate_report(pdf_plots, params, path, num_samples): + " }" ) latex += add_line("\end{table}") - # latex += add_line(pd.DataFrame.from_dict(params[key]).to_latex()) - # latex += add_line("For each of the 5 environments, we sampled %d choice configurations where we sampled the following choices independently and uniformly from the following ranges:" % (num_wus // 5)) latex += add_line( "For each of the sampled choice configurations, we train %d agents with different random seeds and compute the performance metrics." % num_samples ) - # latex += add_line("\begin{figure}[ht]") - # latex += add_line("\begin{center}") - # latex += add_line("\centerline{\includegraphics[width=0.45\textwidth]{final_arch/perf__gin_study_design_choice_value_policy_and_value_function_network.pdf}\hspace{1cm}\includegraphics[width=0.45\textwidth]{final_arch/frequency__gin_study_design_choice_value_policy_and_value_function_network.pdf}}") - # latex += add_line("\caption{Analysis of choice \choicet{mlpshared}: "+str(percentile)+ "th percentile of performance scores conditioned on choice (left) and distribution of choices in top "+str(100-percentile)+"\% of configurations (right).}") - # latex += add_line("\label{fig:final_arch__mlpshared}") - # latex += add_line("\end{center}") - # latex += add_line("\end{figure}") - # latex += add_line("\subsection{Results}") - # latex += add_line("\label{exp_results_EXPNAME}") - # latex += add_line("We report aggregate statistics of the experiment in Table~\ref{tab:EXPNAME_overview} as well as training curves in Figure~\ref{fig:EXPNAME_training_curves}.") - # last = "fig:EXPNAME_"+params[-1].replace(".","_") - # if ename == 'final_setup': - # last = "fig:final_setup2__gin_study_design_choice_value_batch_mode" - # latex += add_line("For each of the investigated choices in this experiment, we further provide a per-choice analysis in Figures~\ref{fig:EXPNAME_"+params[0].replace(".","_")+"}-\ref{"+last+"}.") - # t, pdf = plot_training_curves(frame) - # html += t - # pdf_plots.append(pdf) - # atex += add_line("") latex += add_line("\section{Training plots}") latex += add_line( "Plots that consider training iterations.\\footnote{A training iteration includes parallel sample collection by the environment workers as well as loss calculation on the collected batch and a model update.}" @@ -84,7 +64,6 @@ def generate_report(pdf_plots, params, path, num_samples): latex += add_line( "\caption{Training curves. Mean over different runs and 95\% confidence intervals bands. \label{fig:training_curves}}" ) - # latex += add_line("\label{fig:training_curves}") latex += add_line("\end{center}") latex += add_line("\end{figure}") latex += add_line( @@ -206,39 +185,24 @@ def training_iteration_metric_plot( counter = 0 for ind in results.index: for policy in results["info"][ind]["learner"].keys(): + # Get the metric title + _row_results = results["info"][ind]["learner"][policy] + row_metric = None if ( - "learner_stats" - in results["info"][ind]["learner"][policy].keys() + "learner_stats" in _row_results.keys() + and metric in _row_results["learner_stats"] ): - if ( - metric - in results["info"][ind]["learner"][policy][ - "learner_stats" - ].keys() - ): - new_row = { - "Training iteration": counter, - metric_title: results["info"][ind]["learner"][ - policy - ]["learner_stats"][metric], - "Configuration": conf, - "Policy": policy, - } - df.loc[len(df)] = new_row - else: - if ( - metric - in results["info"][ind]["learner"][policy].keys() - ): - new_row = { - "Training iteration": counter, - metric_title: results["info"][ind]["learner"][ - policy - ][metric], - "Configuration": conf, - "Policy": policy, - } - df.loc[len(df)] = new_row + row_metric = _row_results["learner_stats"][metric] + elif metric in _row_results.keys(): + row_metric = _row_results[metric] + + # Define the new row + df.loc[len(df)] = { + "Training iteration": counter, + metric_title: row_metric, + "Configuration": conf, + "Policy": policy, + } counter += 1 plt.figure(figsize=(16, 6)) if df["Policy"].nunique() == 1: # single-RL @@ -262,15 +226,6 @@ def training_iteration_metric_plot( return pdf -def get_policies(all_results): - for conf in all_results.keys(): - conf_results = all_results[conf] - for key in conf_results.keys(): - results = conf_results[key] - # for ind in results.index: - # print(results['config'][ind]['policies']) - - def training_policies_reward_plot(all_results): df = pd.DataFrame( columns=[ @@ -299,8 +254,10 @@ def training_policies_reward_plot(all_results): ) >= n_episodes_iter ), f"Found number of episodes rewards in training iterations lower than the number of episodes in the iteration {ind}" - # This can happen because rllib adds older historical episodes in order to reach the required smooting window "metrics_num_episodes_for_smoothing" - # I saw in the code that these episodes are added at the beginning of the list + # This can happen because rllib adds older historical + # episodes in order to reach the required smooting + # window "metrics_num_episodes_for_smoothing". I saw in + # the code that these episodes are added at the beginning of the list diff_n_episodes = ( len( results["sampler_results"][ind]["hist_stats"][ @@ -362,7 +319,8 @@ def evaluation_policies_mean_reward(all_results): for key in conf_results.keys(): results = conf_results[key] # Plot the reward for the evaluation graph - # We get the evaluation interval to make sure that we only get evaluation data in iterations for which we performed evaluation + # We get the evaluation interval to make sure that we only get + # evaluation data in iterations for which we performed evaluation evaluation_interval = results["config"][0]["evaluation_interval"] # We get also the train_batch_size train_batch_size = results["config"][0]["train_batch_size"] @@ -465,7 +423,8 @@ def evaluation_exploitability(all_results): for key in conf_results.keys(): results = conf_results[key] # Plot the reward for the evaluation graph - # We get the evaluation interval to make sure that we only get evaluation data in iterations for which we performed evaluation + # We get the evaluation interval to make sure that we only get + # evaluation data in iterations for which we performed evaluation evaluation_interval = results["config"][0]["evaluation_interval"] i = 1 evaluation_iteration = 1 @@ -584,7 +543,6 @@ if __name__ == "__main__": all_results[logdir] = results num_samples = len(results) - get_policies(all_results) # Check that each logdir provided exists # for logdir in logdirs: # print("Log directory ", logdir, " exists ", os.path.isdir(logdir)) -- GitLab From 73df2c610d405b0a64e5992da1af7d1359a67c8e Mon Sep 17 00:00:00 2001 From: prasanna <maddilaprasanna10@gmail.com> Date: Wed, 24 Apr 2024 22:53:25 +0200 Subject: [PATCH 2/3] TODO: make evaluation_exploitability work Currently, the update to RLlib has moved the storage location of exploitability in the results directory. This means the exploitability results/dataframes are always empty. This needs to be fixed. --- examples/plots/generate_plots.py | 121 ++++++++++++++++--------------- 1 file changed, 63 insertions(+), 58 deletions(-) diff --git a/examples/plots/generate_plots.py b/examples/plots/generate_plots.py index 6395733..2fc625a 100644 --- a/examples/plots/generate_plots.py +++ b/examples/plots/generate_plots.py @@ -175,35 +175,39 @@ def publish_report(pdf_plots, path): def training_iteration_metric_plot( metric, metric_title, graph_title, all_results ): - df = pd.DataFrame( - columns=["Training iteration", metric_title, "Configuration", "Policy"] - ) - for conf in all_results.keys(): - conf_results = all_results[conf] - for key in conf_results.keys(): - results = conf_results[key] + df = [] + for conf, conf_results in all_results.items(): + for key, results in conf_results.items(): counter = 0 for ind in results.index: for policy in results["info"][ind]["learner"].keys(): - # Get the metric title + # Get the metric title, or skip if keys not found _row_results = results["info"][ind]["learner"][policy] + _result_keys = getattr(_row_results, "keys", None) + if not _result_keys: + continue + + # Now, get row_metric to fill column row_metric = None if ( - "learner_stats" in _row_results.keys() + "learner_stats" in _result_keys() and metric in _row_results["learner_stats"] ): row_metric = _row_results["learner_stats"][metric] elif metric in _row_results.keys(): row_metric = _row_results[metric] - # Define the new row - df.loc[len(df)] = { - "Training iteration": counter, - metric_title: row_metric, - "Configuration": conf, - "Policy": policy, - } + # Define the new row and append it to df + df.append( + { + "Training iteration": counter, + metric_title: row_metric, + "Configuration": conf, + "Policy": policy, + } + ) counter += 1 + df = pd.DataFrame.from_dict(df) # Convert to DataFrame plt.figure(figsize=(16, 6)) if df["Policy"].nunique() == 1: # single-RL sns.lineplot( @@ -408,7 +412,7 @@ def evaluation_policies_mean_reward(all_results): def evaluation_exploitability(all_results): - # dfs = [] + dfi, max_evaluation_iteration = [], 0 df = pd.DataFrame( columns=[ "Evaluation iteration", @@ -417,52 +421,53 @@ def evaluation_exploitability(all_results): "Configuration", ] ) - max_evaluation_iteration = 0 - for conf in all_results.keys(): - conf_results = all_results[conf] - for key in conf_results.keys(): - results = conf_results[key] + breakpoint() + for conf, conf_results in all_results.items(): + for key, results in conf_results.items(): # Plot the reward for the evaluation graph # We get the evaluation interval to make sure that we only get # evaluation data in iterations for which we performed evaluation + i, evaluation_iteration = 1, 1 evaluation_interval = results["config"][0]["evaluation_interval"] - i = 1 - evaluation_iteration = 1 - if "evaluation" in results.keys(): - for ind in results["evaluation"].index: - if i == evaluation_interval: - i = 1 - if ( - "policy_exploitability" - in results["evaluation"][ind].keys() - ): - for policy in results["evaluation"][ind][ + if "evaluation" not in results.keys(): + continue + eval_results = results["evaluation"] + for ind in eval_results.index: + # If not an eval iteration, continue + if i != evaluation_interval: + i = i + 1 + continue + + # else, reset the evaluation_iteration count + # and check if exploitability data was stored. + i = 1 + if not ("policy_exploitability" in eval_results[ind].keys()): + continue # Skip, exploitability not here... + + # Now processing exploitability data ... + for policy in eval_results[ind][ + "policy_exploitability" + ].keys(): + dfi.append( + { + "Evaluation iteration": np.full( + eval_results[ind]["episodes_this_iter"], + evaluation_iteration, + ), + "Policy exploitability": eval_results[ind][ "policy_exploitability" - ].keys(): - dfi = pd.DataFrame( - { - "Evaluation iteration": np.full( - results["evaluation"][ind][ - "episodes_this_iter" - ], - evaluation_iteration, - ), - "Policy exploitability": results[ - "evaluation" - ][ind]["policy_exploitability"][ - policy - ], - "Policy": policy, - "Configuration": conf, - } - ) - df = pd.concat([df, dfi], ignore_index=True) - evaluation_iteration = evaluation_iteration + 1 - else: - i = i + 1 - max_evaluation_iteration = max( - max_evaluation_iteration, evaluation_iteration - ) + ][policy], + "Policy": policy, + "Configuration": conf, + } + ) + evaluation_iteration = evaluation_iteration + 1 + max_evaluation_iteration = max( + max_evaluation_iteration, evaluation_iteration + ) + + # TODO: df is always empty; expl moved to info.learner etc. + df = pd.DataFrame.from_dict(dfi) plt.figure(figsize=(16, 6)) sns.lineplot( data=df, -- GitLab From 2386f8e9c9179d85849ab72f2b1fc831a58e3953 Mon Sep 17 00:00:00 2001 From: ssmaddila <siva-sri-prasanna.maddila@inrae.fr> Date: Thu, 25 Apr 2024 10:00:35 +0200 Subject: [PATCH 3/3] exploitability graph fixed --- examples/plots/generate_plots.py | 40 ++++++++++++++------------------ 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/examples/plots/generate_plots.py b/examples/plots/generate_plots.py index 2fc625a..0a95261 100644 --- a/examples/plots/generate_plots.py +++ b/examples/plots/generate_plots.py @@ -413,15 +413,13 @@ def evaluation_policies_mean_reward(all_results): def evaluation_exploitability(all_results): dfi, max_evaluation_iteration = [], 0 - df = pd.DataFrame( - columns=[ - "Evaluation iteration", - "Policy exploitability", - "Policy", - "Configuration", - ] - ) - breakpoint() + df_columns = [ + "Evaluation iteration", + "Policy exploitability", + "Policy", + "Configuration", + ] + df = pd.DataFrame(columns=df_columns) for conf, conf_results in all_results.items(): for key, results in conf_results.items(): # Plot the reward for the evaluation graph @@ -431,8 +429,8 @@ def evaluation_exploitability(all_results): evaluation_interval = results["config"][0]["evaluation_interval"] if "evaluation" not in results.keys(): continue - eval_results = results["evaluation"] - for ind in eval_results.index: + expl_results = results["info"] + for ind in expl_results.index: # If not an eval iteration, continue if i != evaluation_interval: i = i + 1 @@ -441,22 +439,21 @@ def evaluation_exploitability(all_results): # else, reset the evaluation_iteration count # and check if exploitability data was stored. i = 1 - if not ("policy_exploitability" in eval_results[ind].keys()): + if not "policy_exploitability" in expl_results[ind]["learner"]: continue # Skip, exploitability not here... # Now processing exploitability data ... - for policy in eval_results[ind][ + for policy in expl_results[ind]["learner"][ "policy_exploitability" ].keys(): dfi.append( { - "Evaluation iteration": np.full( - eval_results[ind]["episodes_this_iter"], - evaluation_iteration, - ), - "Policy exploitability": eval_results[ind][ - "policy_exploitability" - ][policy], + "Evaluation iteration": results[ + "num_env_steps_sampled" + ][ind], + "Policy exploitability": expl_results[ind][ + "learner" + ]["policy_exploitability"][policy], "Policy": policy, "Configuration": conf, } @@ -466,8 +463,7 @@ def evaluation_exploitability(all_results): max_evaluation_iteration, evaluation_iteration ) - # TODO: df is always empty; expl moved to info.learner etc. - df = pd.DataFrame.from_dict(dfi) + df = pd.concat([df, pd.DataFrame.from_dict(dfi)]) plt.figure(figsize=(16, 6)) sns.lineplot( data=df, -- GitLab