import ast
import re
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.patches import Ellipse, Circle
[docs]
def animate_simulation_by_df(df, regime, start_frame, end_frame, grid_size=11, interval=100):
"""
Create an animation for a given regime and frame range that supports multiple agents.
Parameters:
df : DataFrame containing columns with agent positions, reward_loc,
steps_without_reward, activated, collected, terminated, etc.
Expected agent columns follow the format: a1x, a1y, a2x, a2y, ...
regime : The regime index to animate.
start_frame: Starting frame number (inclusive).
end_frame : Ending frame number (inclusive).
grid_size : The size of the board (default 11).
interval : Interval (in ms) between frames.
Returns:
ani : The matplotlib.animation.FuncAnimation object.
"""
def rotate_point(x, y, center_x, center_y, angle):
"""Rotate a point (x, y) around a center (center_x, center_y) by a given angle (in degrees)."""
angle_rad = np.radians(angle)
x -= center_x
y -= center_y
x_new = x * np.cos(angle_rad) - y * np.sin(angle_rad)
y_new = x * np.sin(angle_rad) + y * np.cos(angle_rad)
return x_new + center_x, y_new + center_y
def get_reward_coord(direction, grid_size):
"""
Given a direction string ('up', 'down', 'left', 'right'),
return the corresponding board coordinate.
- 'up' => top center: (grid_size//2, grid_size - 1)
- 'down' => bottom center: (grid_size//2, 0)
- 'left' => left center: (0, grid_size//2)
- 'right' => right center: (grid_size - 1, grid_size//2)
"""
if direction == 'u':
return (grid_size // 2, grid_size - 1)
elif direction == 'd':
return (grid_size // 2, 0)
elif direction == 'l':
return (0, grid_size // 2)
elif direction == 'r':
return (grid_size - 1, grid_size // 2)
else:
return None
# Filter by regime and frame range.
subdf = df[(df['regime_idx'] == regime) &
(df['frame_idx'] >= start_frame) &
(df['frame_idx'] <= end_frame)].sort_values('frame_idx')
# Determine the number of agents by looking for columns like "a{i}x"
agent_nums = [int(re.findall(r'\d+', col)[0]) for col in subdf.columns if re.match(r'a\d+x', col)]
num_agents = max(agent_nums) if agent_nums else 0
if num_agents == 0:
raise ValueError("No agent position columns found.")
# Build the list of columns to extract in order.
cols = []
for i in range(num_agents):
cols.append(f"a{i+1}x")
cols.append(f"a{i+1}y")
cols.extend(["reward_loc", "steps_without_reward", "activated", "collected", "terminated", "r1"])
# Pre-extract necessary columns into a record array.
steps = subdf[cols].to_records(index=False)
# Set up the figure and axis.
fig, ax = plt.subplots(figsize=(6,6), dpi=80)
ax.set_xlim(-0.5, grid_size-0.5)
ax.set_ylim(-0.5, grid_size-0.5)
ax.set_xticks(np.arange(-0.5, grid_size, 1))
ax.set_yticks(np.arange(-0.5, grid_size, 1))
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(True, color='gray')
# Create agent shapes dynamically.
# Define a list of colors to cycle through.
colors = ['lightblue', 'coral', 'green', 'orange', 'purple', 'pink', 'cyan']
# For simplicity, use a default angle: even-indexed agents rotate by 45° and odd-indexed by -45°.
agent_patches = []
for i in range(num_agents):
color = colors[i % len(colors)]
default_angle = 45 if i % 2 == 0 else -45
mouse = Ellipse((0, 0), 0.15, 0.4, angle=default_angle, color=color, alpha=0.75, label=f"Agent {i+1}")
ear1 = Circle((0, 0), 0.08, color=color, alpha=0.75)
ear2 = Circle((0, 0), 0.08, color=color, alpha=0.75)
tail = Ellipse((0, 0), 0.03, 0.25, angle=default_angle, color=color, alpha=0.75)
agent_patches.append({
'mouse': mouse,
'ear1': ear1,
'ear2': ear2,
'tail': tail,
'default_angle': default_angle
})
ax.add_patch(mouse)
ax.add_patch(ear1)
ax.add_patch(ear2)
ax.add_patch(tail)
# Create reward markers (two possible locations) and the center activation marker.
reward_marker1, = ax.plot([], [], 'go', alpha=0.5, markersize=16, label="Reward", antialiased=False)
reward_marker2, = ax.plot([], [], 'go', alpha=0.5, markersize=16, label="Reward", antialiased=False)
center_marker, = ax.plot([], [], 'ys', alpha=0.5, markersize=16, label="Center", antialiased=False)
center_marker.set_data([grid_size//2], [grid_size//2])
# Create a text object for metadata.
meta_text = ax.text(0.05, grid_size-0.5, '', fontsize=10, color='black',
transform=ax.transData, verticalalignment='top')
def init():
# Reset all agent patch positions.
for patches in agent_patches:
patches['mouse'].set_center((0, 0))
patches['ear1'].set_center((0, 0))
patches['ear2'].set_center((0, 0))
patches['tail'].set_center((0, 0))
reward_marker1.set_data([], [])
reward_marker2.set_data([], [])
center_marker.set_data([grid_size//2], [grid_size//2])
meta_text.set_text('')
# Return all drawn objects for blitting.
return ([reward_marker1, reward_marker2, center_marker, meta_text] +
[patch for agent in agent_patches for patch in [agent['mouse'], agent['ear1'], agent['ear2'], agent['tail']]])
def update(frame):
step = steps[frame]
# Update each agent's patches.
for i in range(num_agents):
x = step[f"a{i+1}x"]
y = step[f"a{i+1}y"]
angle = agent_patches[i]['default_angle']
ear1_x, ear1_y = rotate_point(x + 0.1, y + 0.1, x, y, angle)
ear2_x, ear2_y = rotate_point(x - 0.1, y + 0.1, x, y, angle)
tail_x, tail_y = rotate_point(x, y - 0.25, x, y, angle)
agent_patches[i]['mouse'].set_center((x, y))
agent_patches[i]['ear1'].set_center((ear1_x, ear1_y))
agent_patches[i]['ear2'].set_center((ear2_x, ear2_y))
agent_patches[i]['tail'].set_center((tail_x, tail_y))
# Update reward markers and activation zone.
reward_loc = step['reward_loc']
steps_without_reward = step['steps_without_reward']
activated = step['activated']
collected = step['collected']
terminated = step['terminated']
reward = step['r1']
# print(reward_loc)
if reward_loc != None:
# If stored as a string, try parsing it.
reward_tuple = reward_loc
if reward_tuple is not None:
# First reward direction.
if len(reward_tuple) > 0:
coord1 = get_reward_coord(reward_tuple[0], grid_size)
if coord1 is not None:
reward_marker1.set_data([coord1[0]], [coord1[1]])
else:
reward_marker1.set_data([], [])
else:
reward_marker1.set_data([], [])
# Second reward direction.
if len(reward_tuple) > 1:
coord2 = get_reward_coord(reward_tuple[1], grid_size)
if coord2 is not None:
reward_marker2.set_data([coord2[0]], [coord2[1]])
else:
reward_marker2.set_data([], [])
else:
reward_marker2.set_data([], [])
else:
reward_marker1.set_data([], [])
reward_marker2.set_data([], [])
# Hide the center marker when rewards are active.
center_marker.set_data([], [])
else:
reward_marker1.set_data([], [])
reward_marker2.set_data([], [])
center_marker.set_data([grid_size//2], [grid_size//2])
meta_str = (f"steps_without_reward: {steps_without_reward}\n"
f"activated: {activated}\n"
f"collected: {collected}\n"
f"terminated: {terminated}\n"
f"reward: {reward}")
meta_text.set_text(meta_str)
# Change board face color when reward is collected.
facecolor = 'green' if collected else 'white'
ax.set_facecolor(f'xkcd:{facecolor}')
# Return all objects that have been updated.
return ([reward_marker1, reward_marker2, center_marker, meta_text] +
[patch for agent in agent_patches for patch in [agent['mouse'], agent['ear1'], agent['ear2'], agent['tail']]])
ani = animation.FuncAnimation(fig, update, frames=len(steps), init_func=init,
blit=True, interval=interval)
return ani