Source code for sustaingym.envs.evcharging.plot_utils

"""
This module implements utility functions for plotting.
"""
from __future__ import annotations

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("darkgrid")

from sustaingym.envs.evcharging.utils import SiteStr, DefaultPeriodStr


[docs] def read_baseline(site: SiteStr, period: DefaultPeriodStr, algorithm: str) -> pd.DataFrame: """Read reward results from csv files.""" return pd.read_csv(f'logs/baselines/{site}_{period}_{algorithm}.csv', compression='gzip')
[docs] def read_rl(experiment_identifier: int, rl_algorithm: str, csv_file: str = 'test_results') -> pd.DataFrame: """Read reward results from csv files.""" return pd.read_csv(f'logs/{rl_algorithm}/exp{experiment_identifier}/{csv_file}.csv', compression='gzip')
[docs] def plot_lines(site: SiteStr, period: DefaultPeriodStr) -> None: """Plot line plots for baselines over evaluation period.""" algs = ['offline_optimal', 'greedy', 'random', 'random_discrete', 'mpc_6'] records = [] for alg in algs: df = read_baseline(site, period, alg).rolling(7).median() df['day'] = df.index day = list(np.array(df.day)) reward = list(np.array(df.reward)) for r, di in zip(reward, day): records.append((alg, r, di)) df = pd.DataFrame.from_records(records, columns=['alg', 'reward', 'day']) sns.lineplot(data=df, x="day", y="reward", hue="alg") plt.title('7-Day Rolling Median of Rewards') plt.ylabel("reward ($)") plt.savefig(f'plots/lines_{site}_{period}.png', dpi=300, pad_inches=0)
[docs] def plot_violins(site: SiteStr, period: DefaultPeriodStr) -> None: """Plot violin plots for baselines.""" algs = ['offline_optimal', 'greedy', 'random', 'random_discrete'] # Find best MPC best_window, best_mean_reward = 1, -100_000 for window in [1, 3, 6, 12, 24]: df = read_baseline(site, period, f'mpc_{window}') mean_reward = np.mean(df.reward) if mean_reward > best_mean_reward: best_window, best_mean_reward = window, mean_reward algs.append(f'mpc_{best_window}') records = [] # Read baselines for alg in algs: df = read_baseline(site, period, alg) reward = list(np.array(df.reward)) for r in reward: alg_label = alg.replace('random', 'rand').replace('_', '\n') records.append((alg_label, r, 2021)) # baselines are neither in dist or out of dist # method, in dist vs. out of dist for alg in ['A2C', 'A2C_discrete', 'SAC', 'PPO', 'PPO_discrete']: algs.append(alg) for rl in ['A2C', 'SAC', 'PPO']: for exps in [(1000, 1002), (1003, 1005), (1006, 1008), (1009, 1011)]: if rl == 'SAC' and exps[0] >= 1006: continue # no discrete SAC # Find best performing experiment of the three best_mean_reward = -100_000 for exp in range(exps[0], exps[1] + 1): rld = rl if (exp <= 1005) else rl + '_discrete' df = read_rl(exp, rld) if np.mean(df.reward) > best_mean_reward: best_mean_reward, reward = np.mean(df.reward), df.reward # Add reward to records reward = list(np.array(reward)) for r in reward: rld_spaces = rld.replace('_', '\n') if exps[0] == 1000: records.append((rld_spaces, r, 2019)) elif exps[0] == 1003: records.append((rld_spaces, r, 2021)) elif exps[0] == 1006: records.append((rld_spaces, r, 2019)) elif exps[0] == 1009: records.append((rld_spaces, r, 2021)) else: raise ValueError fig, ax = plt.subplots(figsize=(6, 4), tight_layout=True) df = pd.DataFrame.from_records(records, columns=['alg', 'reward', 'in_dist']) print(df.head()) print(df.tail()) sns.violinplot(data=df, x='alg', y='reward', hue='in_dist', ax=ax) ax.set(xlabel='Algorithm', ylabel="Daily Return ($)") # ax.set_xticklabels(algs, rotation=30) print(f"Save to: 'plots/violins_{site}_{period}.png'") fig.savefig(f'plots/violins_{site}_{period}.png', dpi=300, pad_inches=0)
[docs] def reward_curve_separate() -> None: """Plot reward curves separately for 'caltech' site in Summer 2021.""" # reward curve w/o action projection on 2-week sample in period fig, axes = plt.subplots(2, 3, figsize=(12, 8), sharey=True) algs = { ('A2C', 'out of dist'): [1000, 1001, 1002], ('PPO', 'out of dist'): [1000, 1001, 1002], ('SAC', 'out of dist'): [1000, 1001, 1002], ('A2C', 'in dist'): [1003, 1004, 1005], ('PPO', 'in dist'): [1003, 1004, 1005], ('SAC', 'in dist'): [1003, 1004, 1005], } best = {} # determine best in each group for alg in algs: best_exp, best_mean_reward = 0, -100_000 for exp in algs[alg]: df = read_rl(exp, alg[0]) if np.mean(df.reward) > best_mean_reward: best_exp, best_mean_reward = exp, np.mean(df.reward) best[alg] = best_exp for i, group in enumerate(best): evals = read_rl(best[group], group[0], csv_file='evaluations') timesteps = evals.timestep.unique() profit_means, profit_stds, cc_means, cc_stds, rew_means, rew_stds = [], [], [], [], [], [] for timestep in timesteps: profit_means.append(evals[evals.timestep == timestep].profit.mean()) profit_stds.append(evals[evals.timestep == timestep].profit.std()) cc_means.append(evals[evals.timestep == timestep].carbon_cost.mean()) cc_stds.append(evals[evals.timestep == timestep].carbon_cost.std()) rew_means.append(evals[evals.timestep == timestep].reward.mean()) rew_stds.append(evals[evals.timestep == timestep].reward.std()) profit_means = np.array(profit_means) profit_stds = np.array(profit_stds) cc_means = np.array(cc_means) cc_stds = np.array(cc_stds) rew_means = np.array(rew_means) rew_stds = np.array(rew_stds) r = i // 3 c = i % 3 axes[0][0].set_ylabel('reward') axes[1][0].set_ylabel('reward') axes[r][c].plot(timesteps, profit_means, label='profit') axes[r][c].fill_between(timesteps, profit_means-profit_stds, profit_means+profit_stds, alpha=0.2) axes[r][c].plot(timesteps, cc_means, label='carbon cost') axes[r][c].fill_between(timesteps, cc_means-cc_stds, cc_means+cc_stds, alpha=0.2) axes[r][c].plot(timesteps, rew_means, label='reward') axes[r][c].fill_between(timesteps, rew_means-rew_stds, rew_means+rew_stds, alpha=0.2) axes[r][c].set_xlabel('timesteps') axes[r][c].legend() axes[r][c].set_title(f'{group[1]} {group[0]}') fig.suptitle('Reward Breakdown on 07/05/2021-07/18/2021 During Training') fig.tight_layout() print("Save to: 'plots/training_curves_separate.png'") fig.savefig('plots/training_curves_separate.png', dpi=300, bbox_inches='tight')
[docs] def reward_curve_all(): """Plot reward curves together for 'caltech' site in Summer 2021.""" # reward curve w/o action projection on 2-week sample in period algs = { ('A2C', 'out of dist'): [1000, 1001, 1002], ('A2C', 'in dist'): [1003, 1004, 1005], ('PPO', 'out of dist'): [1000, 1001, 1002], ('PPO', 'in dist'): [1003, 1004, 1005], } best = {} # determine best in each group for alg in algs: best_exp, best_mean_reward = 0, -100_000 for exp in algs[alg]: df = read_rl(exp, alg[0]) if np.mean(df.reward) > best_mean_reward: best_exp, best_mean_reward = exp, np.mean(df.reward) best[alg] = best_exp for group in best: evals = read_rl(best[group], group[0], 'evaluations') timesteps = evals.timestep.unique() reward_means, reward_stds = [], [] for timestep in timesteps: timestep_rewards = evals[evals.timestep == timestep].reward reward_means.append(timestep_rewards.mean()) reward_stds.append(timestep_rewards.std()) reward_means, reward_stds = np.array(reward_means), np.array(reward_stds) plt.plot(timesteps, reward_means, label=f'{group[1]} {group[0]}') plt.fill_between(timesteps, reward_means-reward_stds, reward_means+reward_stds, alpha=0.2) plt.title('Reward on 07/05/2021-07/18/2021 During Training') plt.xlabel('timesteps') plt.ylabel('reward') plt.legend() print("Save to: 'plots/training_curves_all.png'") plt.savefig('plots/training_curves_all.png', dpi=300)
if __name__ == '__main__': # # Plot violin plot # plot_violins('caltech', 'Summer 2021') # # Plot line plot # plot_lines('caltech', 'Summer 2021') # # Plot training curves in one place # reward_curve_all() # Plot training curves separately reward_curve_separate()