I have a python code snippet to connect to Azure. The functions are to connect, get the containers, and get the blobs of the container.
The get_containers()
can already list the up to the first folder, problem is I cannot get the subfolders.
The directory structure in Azure looks something like (this is also my desired output):
container1
container1/folder1/subfolder2
container1/folder1/subfolder2/subfolder3
container1/folder1/subfolder2/subfolder3/subfolder4
container1/folder2
container1/folder3/subfolder5
container2
container2/folder4
container2/folder5/subfolder6
container2/folder6
container3
container3/folder7/subfolder7
container4
container4/folder8
container4/folder9
and it could possible change. I need to list or print the containers/folders/subfolders in a dynamic way.
Here is the snippet of the functions:
def _authenticate(self):
"""Authenticates using ClientSecretCredential and returns BlobServiceClient."""
try:
credential = ClientSecretCredential(
tenant_id=self.tenant_id,
client_id=self.client_id,
client_secret=self.client_secret
)
blob_service_client = BlobServiceClient(
account_url=self.account_url,
credential=credential
)
print("Successfully connected to Azure Blob Storage!")
return blob_service_client
except Exception as e:
print(f'Failed to authenticate: {str(e)}')
return None
def get_containers(self):
"""Lists containers and their subdirectories in Azure Blob Storage."""
if self.blob_service_client:
try:
container_list = []
containers = self.blob_service_client.list_containers()
for container in containers:
container_name = container['name']
container_list.append(container_name)
# List blobs and subdirectories in each container
container_client = self.blob_service_client.get_container_client(container_name)
blobs = container_client.walk_blobs()
for blob in blobs:
container_list.append(f'{container_name}/{blob.name}')
return container_list
except Exception as e:
print(f'Failed to list containers: {str(e)}')
return []
else:
print("Service Client not initialized")
return []
def get_blobs(self, container_name: str, directory: str = "/") -> list:
"""Lists all blobs in the specified container and directory."""
if self.blob_service_client:
try:
container_client = self.blob_service_client.get_container_client(container_name)
blobs = container_client.walk_blobs(name_starts_with=directory)
blob_list = []
for blob in blobs:
blob_list.append(f'{container_name}/{blob.name}')
return blob_list
except Exception as e:
print(f'Failed to list blobs in {container_name}: {str(e)}')
return []
else:
print("Service Client not initialized")
return []
The output I got is:
container1
container1/folder1
container1/folder2
container1/folder3
container2
container2/folder4
container2/folder5
container2/folder6
container3
container3/folder7
container4
container4/folder8
container4/folder9
is there someething missing from my get_containers()
that I only get up to the first folder and not the subfolders?
You are on the right track. To list all folders and subfolders, you would need to call your get_blobs
method recursively.
Please see the code below:
depth = 0
indent = " "
def list_blobs_hierarchical(self, container_client: ContainerClient, prefix):
for blob in container_client.walk_blobs(name_starts_with=prefix, delimiter='/'):
if isinstance(blob, BlobPrefix):# check if the blob you got is a folder.
# Indentation is only added to show nesting in the output
print(f"{self.indent * self.depth}{blob.name}")
self.depth += 1
self.list_blobs_hierarchical(container_client, prefix=blob.name)
self.depth -= 1
else:
# this is the actual blob in the folder. you can choose to ignore it.
print(f"{self.indent * self.depth}{blob.name}")