pythonmicrosoft-graph-apionedrivemicrosoft-graph-sdks

How to get all the files in OneDrive account using the graph SDK?


I'm trying to use a service account to pull all files from a OneDrive business account using the MS Graph Python SDK.

import asyncio
from msgraph import GraphServiceClient
from azure.identity import ClientSecretCredential

microsoft_tenant_id = '123abc'
client_id = '123abc'
client_secret = '123abc'

SCOPES = ['https://graph.microsoft.com/.default']

credential = ClientSecretCredential(microsoft_tenant_id, client_id, client_secret)
graph_client = GraphServiceClient(credential, SCOPES)

user_id = 'myemail@companyname.com'


async def get_drive_count():

# What do I use after .drives?

    response = await graph_client.users.by_user_id(user_id).drives... # not sure what to use next
    

asyncio.run(get_drive_count())

I can't find any examples on how to use the graph client to pull one drive files.

I've tried using .root.children.get() but the SDK doesn't have any of those methods.

Does anyone know how to pull all OneDrives files using their SDK?


Solution

  • You need to get user's drive id and then recursively iterate through the all folders in that drive and count number of items.

    I'm not familiar with python, but something like the code below should work.

    Be aware that if a folder has more than 200 items, you need to use paging to retrieve all items.

    import asyncio
    from typing import List
    from msgraph import GraphServiceClient
    from azure.identity import ClientSecretCredential
    
    microsoft_tenant_id = '123abc'
    client_id = '123abc'
    client_secret = '123abc'
    
    SCOPES = ['https://graph.microsoft.com/.default']
    
    credential = ClientSecretCredential(microsoft_tenant_id, client_id, client_secret)
    graph_client = GraphServiceClient(credential, SCOPES)
    
    user_id = 'myemail@companyname.com'
    
    items_count = 0
    
    async def get_child_items_count(driveId: str, driveItemId: str) -> None:
        all_drive_item_folders_ids: List[str] = []
        # process first page
        childItems = await graph_client.drives.by_drive_id(driveId).items.by_drive_item_id(driveItemId).children.get()
        if childItems:
            # increment global counter
            items_count+=len(childItems.value)
            # store folders ids
            for i in range(len(childItems.value)):
                child_item = childItems.value[i]
                if child_item.folder is not None and child_item.folder.child_count > 0:
                    all_drive_item_folders_ids.append(child_item.id)
        
        # process other pages
        while childItems is not None and childItems.odata_next_link is not None:
            childItems = await graph_client.drives.by_drive_id(driveId).items.by_drive_item_id(driveItemId).children.with_url(childItems.odata_next_link).get()
            if childItems:
                # increment global counter
                items_count+=len(childItems.value)
                # store folders ids
                for i in range(len(childItems.value)):
                    child_item = childItems.value[i]
                    if child_item.folder is not None and child_item.folder.child_count > 0:
                        all_drive_item_folders_ids.append(child_item.id)
    
        # counts items in subfolders
        for i in range(len(all_drive_item_folders_ids)):
            await get_child_items_count(driveId, all_drive_item_folders_ids[i])  
    
    async def get_drive_count():
        # get drive id
        response = await graph_client.users.by_user_id(user_id).drive.get()
        driveId = response.id
        # start with root item
        await get_child_items_count(driveId, 'root')
        
        print(f"count: {items_count}")
    
    asyncio.run(get_drive_count())