OpenDILabCommunity
/

LunarLanderContinuous-v2-QGPO

Reinforcement Learning

Generative Model

LunarLanderContinuous-v2

Eval Results (legacy)

Model card Files Files and versions

LunarLanderContinuous-v2-QGPO / policy_config.json

zjowowen's picture

Upload policy_config.json with huggingface_hub

ad95118 verified over 1 year ago

history blame contribute delete

5.22 kB

	{
	"train": {
	"project": "LunarLanderContinuous-v2-QGPO-VPSDE",
	"device": "cuda",
	"wandb": {
	"project": "IQL-LunarLanderContinuous-v2-QGPO-VPSDE"
	},
	"simulator": {
	"type": "GymEnvSimulator",
	"args": {
	"env_id": "LunarLanderContinuous-v2"
	}
	},
	"model": {
	"QGPOPolicy": {
	"device": "cuda",
	"critic": {
	"device": "cuda",
	"q_alpha": 1.0,
	"DoubleQNetwork": {
	"backbone": {
	"type": "ConcatenateMLP",
	"args": {
	"hidden_sizes": [
	10,
	256,
	256
	],
	"output_size": 1,
	"activation": "relu"
	}
	}
	}
	},
	"diffusion_model": {
	"device": "cuda",
	"x_size": 2,
	"alpha": 1.0,
	"solver": {
	"type": "DPMSolver",
	"args": {
	"order": 2,
	"device": "cuda",
	"steps": 17
	}
	},
	"path": {
	"type": "linear_vp_sde",
	"beta_0": 0.1,
	"beta_1": 20.0
	},
	"reverse_path": {
	"type": "linear_vp_sde",
	"beta_0": 0.1,
	"beta_1": 20.0
	},
	"model": {
	"type": "noise_function",
	"args": {
	"t_encoder": {
	"type": "GaussianFourierProjectionTimeEncoder",
	"args": {
	"embed_dim": 32,
	"scale": 30.0
	}
	},
	"backbone": {
	"type": "TemporalSpatialResidualNet",
	"args": {
	"hidden_sizes": [
	512,
	256,
	128
	],
	"output_dim": 2,
	"t_dim": 32,
	"condition_dim": 8,
	"condition_hidden_dim": 32,
	"t_condition_hidden_dim": 128
	}
	}
	}
	},
	"energy_guidance": {
	"t_encoder": {
	"type": "GaussianFourierProjectionTimeEncoder",
	"args": {
	"embed_dim": 32,
	"scale": 30.0
	}
	},
	"backbone": {
	"type": "ConcatenateMLP",
	"args": {
	"hidden_sizes": [
	42,
	256,
	256
	],
	"output_size": 1,
	"activation": "silu"
	}
	}
	}
	}
	}
	},
	"parameter": {
	"behaviour_policy": {
	"batch_size": 1024,
	"learning_rate": 0.0001,
	"epochs": 500
	},
	"action_augment_num": 16,
	"fake_data_t_span": null,
	"energy_guided_policy": {
	"batch_size": 256
	},
	"critic": {
	"stop_training_epochs": 500,
	"learning_rate": 0.0001,
	"discount_factor": 0.99,
	"update_momentum": 0.005
	},
	"energy_guidance": {
	"epochs": 1000,
	"learning_rate": 0.0001
	},
	"evaluation": {
	"evaluation_interval": 50,
	"guidance_scale": [
	0.0,
	1.0,
	2.0
	]
	},
	"checkpoint_path": "./LunarLanderContinuous-v2-QGPO"
	}
	},
	"deploy": {
	"device": "cuda",
	"env": {
	"env_id": "LunarLanderContinuous-v2",
	"seed": 0
	},
	"num_deploy_steps": 1000,
	"t_span": null
	}
	}