deep-learning state reinforcement-learning actor ddpg

Complex state in re-inforcement learning

In an RL code, the states are complex numbers. I pass it to the actor function, whose code is as follows:

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, max_action):
        super(Actor, self).__init__()

        hidden_dim = 1 if state_dim == 0 else 2 ** (state_dim - 1).bit_length()
        print("hidden_dim",hidden_dim)

        self.l1 = nn.Linear(state_dim, hidden_dim , dtype=torch.complex64)
        self.l2 = nn.Linear( hidden_dim, hidden_dim , dtype=torch.complex64)
        self.l3 = nn.Linear( hidden_dim, action_dim, dtype=torch.complex64)
        # 
        self.l1.weight = self.l1.weight.to(torch.complex64)
        self.l2.weight = self.l2.weight.to(torch.complex64)
        self.l3.weight = self.l3.weight.to(torch.complex64)

        # for param in self.parameters():
        #   param.data = param.data.to(torch.complex64)

        self.max_action = max_action

    
    def forward(self, state):
        a = F.relu(self.l1(state))
        a = F.relu(self.l2(a))
        return self.max_action * torch.tanh(self.l3(a))

    def select_action(self, state):
        #state = state.astype(np.float32)state = state_tensor.to(device)
        state_tensor = torch.tensor(state.reshape(1, -1))
        state = state_tensor.to(device)
        return self.actor(state).cpu().data.numpy().flatten()

but I have get this error:

File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 must have the same dtype, but got ComplexDouble and ComplexFloat

I don't know how I should solve this problem. I would be very grateful if anyone could help me.

I want to give the state and select actions but my code doesn't work.

Solution

There were a couple of problems with your code:

To solve the type mismatch, create the input state_tensor as `dtype=torch.complex64.
ReLU doesn't work with complex numbers. So I create an alternative ComplexReLu to support this.

This is a working example:

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class ComplexReLU(nn.Module):
    def __init__(self):
        super(ComplexReLU, self).__init__()

    def forward(self, input):
        return F.relu(input.real) + 1j * F.relu(input.imag)

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, max_action):
        super(Actor, self).__init__()

        hidden_dim = 1 if state_dim == 0 else 2 ** (state_dim - 1).bit_length()
        print("hidden_dim", hidden_dim)

        self.l1 = nn.Linear(state_dim, hidden_dim , dtype=torch.complex64)
        self.l2 = nn.Linear( hidden_dim, hidden_dim , dtype=torch.complex64)
        self.l3 = nn.Linear( hidden_dim, action_dim, dtype=torch.complex64)

        self.l1.weight = self.l1.weight.to(torch.complex64)
        self.l2.weight = self.l2.weight.to(torch.complex64)
        self.l3.weight = self.l3.weight.to(torch.complex64)


        self.max_action = max_action
        self.complex_relu = ComplexReLU()

    def forward(self, state):
        a = self.complex_relu(self.l1(state))
        a = self.complex_relu(self.l2(a))
        return self.max_action * torch.tanh(self.l3(a))

    def select_action(self, state):
        state_tensor = torch.tensor(state.reshape(1, -1), dtype=torch.complex64).to(next(self.parameters()).device)
        return self.forward(state_tensor).cpu().data.numpy().flatten()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

state_dim = 2
action_dim = 1
max_action = 1.0

actor = Actor(state_dim, action_dim, max_action).to(device)

sample_state = np.array([1+2j, 3+4j])

action = actor.select_action(sample_state)

print("Selected action:", action)