import numpy as np
from collections import deque

class Agent:
    
    directions = [(0, 1), (1, 0), (0, -1), (-1, 0)] # cardinal directions (up, right, down, left)
    index_cnt = 1
    all_agents = []

    def __init__(self, x, y, board, size=3, momentum=0.8, explore=0.9, max_tail=10, home=2, strategy_type=1):
        '''
        size: size of the agent's starting territory
        momentum: how likely it is for agent to continue moving in current direction
        explore: how likely it is for agent to move out of its territory and capture new territory
        max_tail: how long an agent will move outside its territory before starting to head back
        home: controls the strength of the bias that directs agents to return back to its territory
        '''
        
        self.alive = True
        self.x = x
        self.y = y
        self.index = Agent.index_cnt
        self.strategy_type = strategy_type

        # initialize starting territory
        for nx in range(x-size//2, x+size//2 + size%2):
            for ny in range(y-size//2, y+size//2 + size%2):
                board[nx, ny] = self.index
        
        Agent.index_cnt += 1
        Agent.all_agents.append(self)
        
        self.tail = []
        self.cur_dir = np.random.choice(len(Agent.directions)) # start with random direction

        self.momentum = momentum
        self.explore = explore
        self.max_tail = max_tail
        self.home = home
        
    def move(self, board):
        start_x = self.x
        start_y = self.y

        # decide whether or not movement direction will change
        r = np.random.random()
        r1 = np.random.random()
        if r < self.momentum: # momentum to go in current direction
            self.x += Agent.directions[self.cur_dir][0]
            self.y += Agent.directions[self.cur_dir][1]
        elif board[self.x, self.y] == self.index and r1 < self.explore: # moving in straight line guarantees leaving territory
            self.x += Agent.directions[self.cur_dir][0]
            self.y += Agent.directions[self.cur_dir][1]
        else: # changing directions (we have chosen not to go in current direction)
            if len(self.tail) > self.max_tail: # switches agent to returning home mode if tail too long
                weights = []
                for dx, dy in Agent.directions: # rank how good each direction is based on Manhattan Distance
                    nx = self.x + dx
                    ny = self.y + dy

                    x_list, y_list = np.where(board == self.index) # "home" is any territory marked by agent
                    min_dist = np.min(np.abs(x_list - nx) + np.abs(y_list - ny)) # get distance to closest territory
                    weights.append(-min_dist) # assign larger weight (less negative) to direction closer to home

                weights[np.mod(self.cur_dir + 2, len(Agent.directions))] = -np.inf # agent should not move backwards, only left, right, straight

                # calculate probabilities using softmax
                probs = np.exp(np.array(weights) * self.home)
                probs = probs / probs.sum()
                self.cur_dir = np.random.choice(len(Agent.directions), p=probs)
            else:
                r = np.random.random()
                # choose left or right with equal probability
                if r < 0.5:
                    self.cur_dir = np.mod(self.cur_dir - 1, len(Agent.directions))
                else:
                    self.cur_dir = np.mod(self.cur_dir + 1, len(Agent.directions))
            
            self.x += Agent.directions[self.cur_dir][0]
            self.y += Agent.directions[self.cur_dir][1]

        if not self.inbound(self.x, self.y, board, padding=1): # boundary conditions when at border
            self.x = start_x
            self.y = start_y
            self.cur_dir = np.mod(self.cur_dir + 1, len(Agent.directions)) # rotate 90 degrees
            self.move(board) # look for new move
            return

        if board[self.x, self.y] < 0: # hit tail
            if board[self.x, self.y] != -self.index: # hit enemy tail, eliminate enemy
                self.eliminate(self.x, self.y, board)
                self.tail.append((self.x, self.y, board[self.x, self.y]))
                board[self.x, self.y] = -self.index
        else:
            if board[self.x, self.y] == self.index:
                if len(self.tail) > 0: # agent successfully returned home, fill captured territory
                    self.floodfill(board)
            else:
                self.tail.append((self.x, self.y, board[self.x, self.y])) # add to tail, keep track of original territory in case agent dies
                board[self.x, self.y] = -self.index

    def floodfill(self, board):
        '''
        algorithm to fill captured zone: floodfill from the outside and treat the agent's territory as impassable areas
        after floodfill, all traversed areas are outside the captured territory
        all non-traversed areas are inside the captured territory
        '''
        
        for (x, y, old) in self.tail: # turn tail into captured zone to create zone border
            board[x, y] = self.index
        self.tail = []

        new_board = np.copy(board)

        # implementing standard floodfill
        # there is a one square border surrounding the board that agent cannot go to
        # this guarantees that an outside zone will always exist
        dq = deque([(0, 0)])
        new_board[0, 0] = self.index
        while dq:
            x, y = dq.popleft()
            for dx, dy in Agent.directions:
                nx = x + dx
                ny = y + dy
                if self.inbound(nx, ny, new_board) and new_board[nx, ny] != self.index:
                    new_board[nx, ny] = self.index
                    dq.append((nx, ny))

        # flip the floodfilled zone to get the inside zone
        for x in range(len(board)):
            for y in range(len(board[x])):
                if new_board[x, y] != self.index:
                    if board[x, y] < 0: # in case agent managed to enclose an enemy completely
                        self.eliminate(x, y, board)
                    board[x, y] = self.index

    def inbound(self, x, y, board, padding=0):
        return x >=padding and y >= padding and x < len(board)-padding and y < len(board[x])-padding

    def remove(self, board):
        for x, y, old in self.tail:
            board[x, y] = old # makes sure to revert back to original territory, since having a path does not count as capture yet
        self.tail = []
        self.alive= False
    
    def eliminate(self, x, y, board):
        target = int(abs(board[x, y]))
        Agent.all_agents[target - 1].remove(board)

    @classmethod
    def reset(cls):
        cls.index_cnt = 1
        cls.all_agents = []