deep-learningstatereinforcement-learningactorddpg

Complex state in re-inforcement learning


In an RL code, the states are complex numbers. I pass it to the actor function, whose code is as follows:

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, max_action):
        super(Actor, self).__init__()

        hidden_dim = 1 if state_dim == 0 else 2 ** (state_dim - 1).bit_length()
        print("hidden_dim",hidden_dim)

        self.l1 = nn.Linear(state_dim, hidden_dim , dtype=torch.complex64)
        self.l2 = nn.Linear( hidden_dim, hidden_dim , dtype=torch.complex64)
        self.l3 = nn.Linear( hidden_dim, action_dim, dtype=torch.complex64)
        # 
        self.l1.weight = self.l1.weight.to(torch.complex64)
        self.l2.weight = self.l2.weight.to(torch.complex64)
        self.l3.weight = self.l3.weight.to(torch.complex64)

        # for param in self.parameters():
        #   param.data = param.data.to(torch.complex64)

        self.max_action = max_action

    
    def forward(self, state):
        a = F.relu(self.l1(state))
        a = F.relu(self.l2(a))
        return self.max_action * torch.tanh(self.l3(a))

    def select_action(self, state):
        #state = state.astype(np.float32)state = state_tensor.to(device)
        state_tensor = torch.tensor(state.reshape(1, -1))
        state = state_tensor.to(device)
        return self.actor(state).cpu().data.numpy().flatten()

but I have get this error:

File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py", line 116, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 must have the same dtype, but got ComplexDouble and ComplexFloat

I don't know how I should solve this problem. I would be very grateful if anyone could help me.

I want to give the state and select actions but my code doesn't work.


Solution

  • There were a couple of problems with your code:

    1. To solve the type mismatch, create the input state_tensor as `dtype=torch.complex64.
    2. ReLU doesn't work with complex numbers. So I create an alternative ComplexReLu to support this.

    This is a working example:

    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import numpy as np
    
    class ComplexReLU(nn.Module):
        def __init__(self):
            super(ComplexReLU, self).__init__()
    
        def forward(self, input):
            return F.relu(input.real) + 1j * F.relu(input.imag)
    
    class Actor(nn.Module):
        def __init__(self, state_dim, action_dim, max_action):
            super(Actor, self).__init__()
    
            hidden_dim = 1 if state_dim == 0 else 2 ** (state_dim - 1).bit_length()
            print("hidden_dim", hidden_dim)
    
            self.l1 = nn.Linear(state_dim, hidden_dim , dtype=torch.complex64)
            self.l2 = nn.Linear( hidden_dim, hidden_dim , dtype=torch.complex64)
            self.l3 = nn.Linear( hidden_dim, action_dim, dtype=torch.complex64)
    
            self.l1.weight = self.l1.weight.to(torch.complex64)
            self.l2.weight = self.l2.weight.to(torch.complex64)
            self.l3.weight = self.l3.weight.to(torch.complex64)
    
    
            self.max_action = max_action
            self.complex_relu = ComplexReLU()
    
        def forward(self, state):
            a = self.complex_relu(self.l1(state))
            a = self.complex_relu(self.l2(a))
            return self.max_action * torch.tanh(self.l3(a))
    
        def select_action(self, state):
            state_tensor = torch.tensor(state.reshape(1, -1), dtype=torch.complex64).to(next(self.parameters()).device)
            return self.forward(state_tensor).cpu().data.numpy().flatten()
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    state_dim = 2
    action_dim = 1
    max_action = 1.0
    
    actor = Actor(state_dim, action_dim, max_action).to(device)
    
    sample_state = np.array([1+2j, 3+4j])
    
    action = actor.select_action(sample_state)
    
    print("Selected action:", action)