azureazure-data-factoryazure-bicepazure-storage-accountazure-rbac

Creating a ADLS Linked Service with Bicep for a Blob Container?


I can't get this ADLS Linked Service(LS) to work.

I tried publicAccess blob, container, and private for the container. I have allowBlobPublicAccess:true for the storage account. I was able to create functional LS for Kaggle with bicep. I'm running this on VS code. I've tried creating a ManagedID and assigning it roles for the storage and container in CLI.

The LS shows up in the data factory, but it fails when I test the connection.

Currently trying this:

resource adlsLinkedService 'Microsoft.DataFactory/factories/linkedservices@2018-06-01' = {
  parent: dataFactory
  name: 'AdlsLinkedService'
  properties: {
    type: 'AzureBlobStorage'
    typeProperties: {
      servicePrincipalId: servicePrincipalId
      servicePrincipalKey: {
        type: 'SecureString'
        value: servicePrincipalKey
      }
      tenant: tenantId
      serviceEndpoint: 'https://${storageAccountName}.blob.core.windows.net'
      }
  }
}

**This is the deployment of the relevent resources **

// Deploy the storage account (ADLS Gen2) for storing data
resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = {
  name: storageAccountName
  location: location
  sku: {
    name: 'Standard_LRS'
  }
  kind: 'StorageV2' 
  properties: {
    accessTier: 'Hot' 
    isHnsEnabled: true 
    allowBlobPublicAccess: true 
  }
}

resource blobService 'Microsoft.Storage/storageAccounts/blobServices@2021-08-01' = {
  parent: storageAccount
  name: 'default'
  properties: {}
}

// Create a container inside the ADLS Gen2 account for storing binary files
resource binaryContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = {
  parent: blobService
  name: containerName // Name of the container
  properties: {
    publicAccess: 'Container' // Public read access for the container
  }
}

resource dataFactory 'Microsoft.DataFactory/factories@2018-06-01' = {
  name: dataFactoryName
  location: location
  identity: {
    type: 'SystemAssigned' // **Is this the problem?**
  }
}

Solution

  • Using the managed identity, you would need to grant it permission to access the storage:

    // storage-account-role-assignment.bicep
    
    param storageAccountName string
    param roleId string
    param principalId string
    param principalType string = 'ServicePrincipal'
    
    // reference to storage account
    resource storageAccount 'Microsoft.Storage/storageAccounts@2023-01-01' existing = {
      name: storageAccountName
    }
    
    resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
      name: guid(subscription().subscriptionId, resourceGroup().name, storageAccountName, roleId, principalId)
      scope: storageAccount
      properties: {
        roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', roleId)
        principalId: principalId
        principalType: principalType
      }
    }
    

    Then your main deployment could look like that:

    // main.bicep
    param location string = resourceGroup().location
    param dataFactoryName string = 'df-thomastest-001'
    param storageAccountName string = 'stthomastest001'
    param containerName string = 'binary'
    
    // Create data factory
    resource dataFactory 'Microsoft.DataFactory/factories@2018-06-01' = {
      name: dataFactoryName
      location: location
      identity: {
        type: 'SystemAssigned'
      }
    }
    
    // Create a storage account
    resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = {
      name: storageAccountName
      location: location
      sku: {
        name: 'Standard_LRS'
      }
      kind: 'StorageV2'
      properties: {
        accessTier: 'Hot'
        isHnsEnabled: true
        supportsHttpsTrafficOnly: true
        minimumTlsVersion: 'TLS1_2'
        allowBlobPublicAccess: false
      }
    }
    
    // Associated blob service
    resource blobService 'Microsoft.Storage/storageAccounts/blobServices@2021-08-01' = {
      parent: storageAccount
      name: 'default'
      properties: {}
    }
    
    // Create a container inside the ADLS Gen2 account for storing binary files
    resource binaryContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = {
      parent: blobService
      name: containerName // Name of the container
      properties: {
        publicAccess: 'None'
      }
    }
    
    // Grant permission to storage
    module dataFactoryStorageRbac 'storage-account-role-assignment.bicep' = {
      name: '${dataFactoryName}-${storageAccountName}-rbac'
      params: {
        storageAccountName: storageAccount.name
        principalId: dataFactory.identity.principalId
        roleId: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor
      }
    }
    
    // Create link servie to blob
    resource adlsLinkedService 'Microsoft.DataFactory/factories/linkedservices@2018-06-01' = {
      parent: dataFactory
      name: 'AdlsLinkedService'
      properties: {
        type: 'AzureBlobStorage'
        typeProperties: {
          accountKind: 'StorageV2'
          serviceEndpoint: storageAccount.properties.primaryEndpoints.blob
        }
      }
    }
    

    Here we created a Blob Storage linked service, if you want to create an Data lake storage linked service, you can use this:

    // Create link servie to data lake storage
    resource adlsLinkedService 'Microsoft.DataFactory/factories/linkedservices@2018-06-01' = {
      parent: dataFactory
      name: 'AdlsLinkedService'
      properties: {
        type: 'AzureBlobFS'
        typeProperties: {
          url: storageAccount.properties.primaryEndpoints.dfs
        }
      }
    }
    

    Also you can create the linked service from data factory studio: enter image description here

    Publish then export the ARM template to see what has been generated and required: enter image description here