"""
Simple baseline agents for establishing performance lower bounds.

Provides non-learning baseline policies:
- RandomAgent: Uniformly random actions
- ZeroAgent: Always outputs zero actions
"""

from pathlib import Path
import numpy as np
import gymnasium as gym


class RandomAgent:
    """
    Agent that takes uniformly random actions within the action space bounds.
    
    Useful as a lower bound baseline to verify that trained agents learn
    something meaningful beyond random exploration.
    """
    
    def __init__(self, action_space: gym.Space):
        """
        Args:
            action_space: Gymnasium action space (must be Box for continuous control)
        """
        self.action_space = action_space
    
    def get_action(self, obs: np.ndarray, deterministic: bool = False) -> np.ndarray:
        """
        Sample random action from action space.
        
        Args:
            obs: Observation (unused, kept for interface consistency)
            deterministic: Unused (random agent is always stochastic)
            
        Returns:
            Random action array
        """
        return self.action_space.sample()
    
    def save(self, path: Path) -> None:
        """No state to save for random agent."""
        pass
    
    def load(self, path: Path) -> None:
        """No state to load for random agent."""
        pass


class ZeroAgent:
    """
    Agent that always outputs zero actions.
    
    Useful for debugging and as an absolute baseline (do nothing).
    """
    
    def __init__(self, action_dim: int):
        """
        Args:
            action_dim: Dimensionality of action space
        """
        self.action_dim = action_dim
    
    def get_action(self, obs: np.ndarray, deterministic: bool = False) -> np.ndarray:
        """
        Return zero action.
        
        Args:
            obs: Observation (unused)
            deterministic: Unused
            
        Returns:
            Zero action array
        """
        return np.zeros(self.action_dim)
    
    def save(self, path: Path) -> None:
        """No state to save for zero agent."""
        pass
    
    def load(self, path: Path) -> None:
        """No state to load for zero agent."""
        pass

