| import numpy as np |
|
|
| class SecondaryAgent: |
| def __init__(self, model, specialty): |
| self.model = model |
| self.specialty = specialty |
| |
| def predict(self, state): |
| return self.model.predict(state) |
|
|
| class PrimeAgent: |
| def __init__(self, gating_network, experts): |
| self.gating_network = gating_network |
| self.experts = experts |
| |
| def act(self, state): |
| gating_weights = self.gating_network.predict(state) |
| expert_outputs = [expert.predict(state) for expert in self.experts] |
| |
| |
| combined_output = np.sum([weight * output for weight, output in zip(gating_weights[0], expert_outputs)], axis=0) |
| action = np.argmax(combined_output) |
| return action |
| |
| def train(self, states, actions, rewards): |
| self.gating_network.fit(states, actions, sample_weight=rewards, epochs=1, verbose=0) |
| for expert in self.experts: |
| expert.model.fit(states, actions, sample_weight=rewards, epochs=1, verbose=0) |
|
|