Source code for marl.marl

import os
import marl
from .agent import TrainableAgent, Agent

[docs]class MAS(object): """ The class of multi-agent "system". :param agents_list: (list) The list of agents in the MAS :param name: (str) The name of the system """ def __init__(self, agents_list=[], name="mas"): self.name = name self.agents = agents_list
[docs] def append(self, agent): """ Add an agent to the system. :param agent: (Agent) The agents to be added """ self.agents.append(agent)
[docs] def action(self, observation): """ Return the joint action. :param observation: The joint observation """ return [Agent.action(ag, obs) for ag, obs in zip(self.agents, observation)]
[docs] def get_by_name(self, name): for ag in self.agents: if ag.name == name: return ag return None
[docs] def get_by_id(self, id): for ag in self.agents: if ag.id == id: return ag return None
def __len__(self): return len(self.agents)
[docs]class MARL(TrainableAgent, MAS): """ The class for a multi-agent reinforcement learning. :param agents_list: (list) The list of agents in the MARL model :param name: (str) The name of the system """ def __init__(self, agents_list=[], name='marl'): MAS.__init__(self, agents_list=agents_list, name=name) self.experience = marl.experience.make("ReplayMemory", capacity=10000)
[docs] def store_experience(self, *args): TrainableAgent.store_experience(self, *args) observation, action, reward, next_observation, done = args for i, ag in enumerate(self.agents): if isinstance(ag, TrainableAgent): ag.store_experience(observation[i], action[i], reward[i], next_observation[i], done[i])
[docs] def update_model(self, t): # TrainableAgent.update_model(self, t) for ag in self.agents: if isinstance(ag, TrainableAgent): ag.update_model(t)
[docs] def reset_exploration(self, nb_timesteps): # TrainableAgent.update_exploration(self, nb_timesteps) for ag in self.agents: if isinstance(ag, TrainableAgent): ag.reset_exploration(nb_timesteps)
[docs] def update_exploration(self, t): # TrainableAgent.update_exploration(self, t) for ag in self.agents: if isinstance(ag, TrainableAgent): ag.exploration.update(t)
[docs] def action(self, observation): return [ag.action(obs) for ag, obs in zip(self.agents, observation)]
[docs] def greedy_action(self, observation): return [Agent.action(ag, obs) for ag, obs in zip(self.agents, observation)]
[docs] def save_policy(self, folder='.', filename='', timestep=None): """ Save the policy in a file called '<filename>-<agent_name>-<timestep>'. :param folder: (str) The path to the directory where to save the model(s) :param filename: (str) A specific name for the file (ex: 'test2') :param timestep: (int) The current timestep """ if not os.path.exists(folder): os.makedirs(folder) filename_tmp = "{}-{}".format(filename, self.name) if filename is not '' else "{}".format(self.name) for ag in self.agents: if isinstance(ag, TrainableAgent): ag.save_policy(folder=folder, filename=filename_tmp, timestep=timestep)
[docs] def load_model(self, filename): for ag in self.agents: if isinstance(ag, TrainableAgent): ag.load_model(filename)