I have code like this:
class MyModule(nn.Module):
def __init__(self, channel, reduction=16, n_segment=8):
super(MyModule, self).__init__()
self.channel = channel
self.reduction = reduction
self.n_segment = n_segment
self.conv1 = nn.Conv2d(in_channels=self.channel, out_channels=self.channel//self.reduction, kernel_size=1, bias=False)
self.conv2 = nn.Conv2d(in_channels=self.channel, out_channels=self.channel//self.reduction, kernel_size=1, bias=False)
self.conv3 = nn.Conv2d(in_channels=self.channel, out_channels=self.channel//self.reduction, kernel_size=1, bias=False)
#whatever
# learnable weight
self.W_1 = nn.Parameter(torch.randn(1), requires_grad=True)
self.W_2 = nn.Parameter(torch.randn(1), requires_grad=True)
self.W_3 = nn.Parameter(torch.randn(1), requires_grad=True)
def forward(self, x):
# whatever
## branch1
bottleneck_1 = self.conv1(x)
## branch2
bottleneck_2 = self.conv2(x)
## branch3
bottleneck_3 = self.conv3(x)
## summation
output = self.avg_pool(self.W_1*bottleneck_1 +
self.W_2*bottleneck_2 +
self.W_3*bottleneck_3)
return output
As you see, 3 learnable scalars (W_1
, W_2
, and W_3
) are used for weighting purpose. But, this approach will not guarantee that the sum of those scalars is 1. How to make the summation of my learnable scalars equals to 1 in Pytorch? Thanks
Keep it simple:
## summation
WSum = self.W_1 + self.W_2 + self.W_3
output = self.avg_pool( self.W_1/WSum *bottleneck_1 +
self.W_2/WSum *bottleneck_2 +
self.W_3/WSum *bottleneck_3)
Also, one can use distributivity law:
output = self.avg_pool(self.W_1*bottleneck_1 +
self.W_2*bottleneck_2 +
self.W_3*bottleneck_3) /WSum