databaseazurenosqlgriddb

My GridDB nodes do not want to join into the same cluster


I have successfully started three nodes on three different Azure CentOS instances. Each node is pointing to the default notification address (239.0.0.1) and are on the same virtual network on Azure (address space 10.2.0.0/24). The nodes are all joined in on the same cluster name ("temperature" in my specific case).

Based on this, the nodes should all be in the same cluster; the problem is, when I run gs_stat, they're all clearly joined into individual clusters:

-bash-4.2$ gs_stat -u admin/password
{
    "checkpoint": {
        "endTime": 1542823670774, 
        "mode": "NORMAL_CHECKPOINT", 
        "normalCheckpointOperation": 1, 
        "pendingPartition": 0, 
        "requestedCheckpointOperation": 0, 
        "startTime": 1542823670486
    }, 
    "cluster": {
        "activeCount": 1, 
        "clusterName": "temperature", 
        "clusterStatus": "MASTER", 
        "designatedCount": 1, 
        "loadBalancer": "ACTIVE", 
        "master": {
            "address": "10.2.0.5", 
            "port": 10040
        }, 
        "nodeList": [
            {
                "address": "10.2.0.5", 
                "port": 10040
            }
        ], 
        "nodeStatus": "ACTIVE", 
        "notificationMode": "MULTICAST", 
        "partitionStatus": "NORMAL", 
        "startupTime": "2018-11-21T18:06:49Z", 
        "syncCount": 2
    }, 
    "currentTime": "2018-11-21T18:08:33Z", 
    "performance": {
        "backupCount": 0, 
        "batchFree": 0, 
        "checkpointFileAllocateSize": 262144, 
        "checkpointFileSize": 262144, 
        "checkpointFileUsageRate": 0, 
        "checkpointMemory": 0, 
        "checkpointMemoryLimit": 1073741824, 
        "checkpointWriteSize": 0, 
        "checkpointWriteTime": 0, 
        "currentCheckpointWriteBufferSize": 0, 
        "currentTime": 1542823713412, 
        "numBackground": 0, 
        "numConnection": 2, 
        "numNoExpireTxn": 0, 
        "numSession": 0, 
        "numTxn": 0, 
        "ownerCount": 128, 
        "peakProcessMemory": 72777728, 
        "processMemory": 72777728, 
        "recoveryReadSize": 262144, 
        "recoveryReadTime": 32, 
        "storeCompressionMode": "NO_BLOCK_COMPRESSION", 
        "storeDetail": {
            "batchFreeMapData": {
                "storeMemory": 0, 
                "storeUse": 0, 
                "swapRead": 0, 
                "swapWrite": 0
            }, 
            "batchFreeRowData": {
                "storeMemory": 0, 
                "storeUse": 0, 
                "swapRead": 0, 
                "swapWrite": 0
            }, 
            "mapData": {
                "storeMemory": 0, 
                "storeUse": 0, 
                "swapRead": 0, 
                "swapWrite": 0
            }, 
            "metaData": {
                "storeMemory": 0, 
                "storeUse": 0, 
                "swapRead": 0, 
                "swapWrite": 0
            }, 
            "rowData": {
                "storeMemory": 0, 
                "storeUse": 0, 
                "swapRead": 0, 
                "swapWrite": 0
            }
        }, 
        "storeMemory": 0, 
        "storeMemoryLimit": 1073741824, 
        "storeTotalUse": 0, 
        "swapRead": 0, 
        "swapReadSize": 0, 
        "swapReadTime": 0, 
        "swapWrite": 0, 
        "swapWriteSize": 0, 
        "swapWriteTime": 0, 
        "syncReadSize": 0, 
        "syncReadTime": 0, 
        "totalBackupLsn": 0, 
        "totalLockConflictCount": 0, 
        "totalOtherLsn": 0, 
        "totalOwnerLsn": 0, 
        "totalReadOperation": 0, 
        "totalRowRead": 0, 
        "totalRowWrite": 0, 
        "totalWriteOperation": 0
    }, 
    "recovery": {
        "progressRate": 1
    }, 
    "version": "4.0.0-33128 CE"
}

Is there a proper way to troubleshoot this? Is there a reason the nodes can't communicate?


Solution

  • It looks like you’re using GridDB with multicast. This works if you’ve got local machines but don’t seem to work on Azure (or other cloud services). The solution is to change to fixed-list mode. This will give explicit addresses for the griddb nodes to join in on as a cluster.

    More info here: https://griddb.net/en/blog/griddb-using-fixed-list-or-multicast-clustering/