| { |
| "train": { |
| "project": "LunarLanderContinuous-v2-QGPO-VPSDE", |
| "device": "cuda", |
| "wandb": { |
| "project": "IQL-LunarLanderContinuous-v2-QGPO-VPSDE" |
| }, |
| "simulator": { |
| "type": "GymEnvSimulator", |
| "args": { |
| "env_id": "LunarLanderContinuous-v2" |
| } |
| }, |
| "model": { |
| "QGPOPolicy": { |
| "device": "cuda", |
| "critic": { |
| "device": "cuda", |
| "q_alpha": 1.0, |
| "DoubleQNetwork": { |
| "backbone": { |
| "type": "ConcatenateMLP", |
| "args": { |
| "hidden_sizes": [ |
| 10, |
| 256, |
| 256 |
| ], |
| "output_size": 1, |
| "activation": "relu" |
| } |
| } |
| } |
| }, |
| "diffusion_model": { |
| "device": "cuda", |
| "x_size": 2, |
| "alpha": 1.0, |
| "solver": { |
| "type": "DPMSolver", |
| "args": { |
| "order": 2, |
| "device": "cuda", |
| "steps": 17 |
| } |
| }, |
| "path": { |
| "type": "linear_vp_sde", |
| "beta_0": 0.1, |
| "beta_1": 20.0 |
| }, |
| "reverse_path": { |
| "type": "linear_vp_sde", |
| "beta_0": 0.1, |
| "beta_1": 20.0 |
| }, |
| "model": { |
| "type": "noise_function", |
| "args": { |
| "t_encoder": { |
| "type": "GaussianFourierProjectionTimeEncoder", |
| "args": { |
| "embed_dim": 32, |
| "scale": 30.0 |
| } |
| }, |
| "backbone": { |
| "type": "TemporalSpatialResidualNet", |
| "args": { |
| "hidden_sizes": [ |
| 512, |
| 256, |
| 128 |
| ], |
| "output_dim": 2, |
| "t_dim": 32, |
| "condition_dim": 8, |
| "condition_hidden_dim": 32, |
| "t_condition_hidden_dim": 128 |
| } |
| } |
| } |
| }, |
| "energy_guidance": { |
| "t_encoder": { |
| "type": "GaussianFourierProjectionTimeEncoder", |
| "args": { |
| "embed_dim": 32, |
| "scale": 30.0 |
| } |
| }, |
| "backbone": { |
| "type": "ConcatenateMLP", |
| "args": { |
| "hidden_sizes": [ |
| 42, |
| 256, |
| 256 |
| ], |
| "output_size": 1, |
| "activation": "silu" |
| } |
| } |
| } |
| } |
| } |
| }, |
| "parameter": { |
| "behaviour_policy": { |
| "batch_size": 1024, |
| "learning_rate": 0.0001, |
| "epochs": 500 |
| }, |
| "action_augment_num": 16, |
| "fake_data_t_span": null, |
| "energy_guided_policy": { |
| "batch_size": 256 |
| }, |
| "critic": { |
| "stop_training_epochs": 500, |
| "learning_rate": 0.0001, |
| "discount_factor": 0.99, |
| "update_momentum": 0.005 |
| }, |
| "energy_guidance": { |
| "epochs": 1000, |
| "learning_rate": 0.0001 |
| }, |
| "evaluation": { |
| "evaluation_interval": 50, |
| "guidance_scale": [ |
| 0.0, |
| 1.0, |
| 2.0 |
| ] |
| }, |
| "checkpoint_path": "./LunarLanderContinuous-v2-QGPO" |
| } |
| }, |
| "deploy": { |
| "device": "cuda", |
| "env": { |
| "env_id": "LunarLanderContinuous-v2", |
| "seed": 0 |
| }, |
| "num_deploy_steps": 1000, |
| "t_span": null |
| } |
| } |