azureazure-functionspapermill

Python Papermill - execute_notebook - Azure Python function


I can create an Azure Functions project (V2) - Python 3.11 After publishing all functions work - they all work on my local dev laptop - however there is one function that does not work when hosted as an Azure Function.

papermill ..

I have the python file downloaded, I can verify that it is in the tmp folder where the Azure Function downloads the file, as well as I have the metadata file there < - same as when debugging locally.

however, when i call execute_notebook function in papermill I get this error immediately?

Kernel died before replying to kernel_info

what does this mean? and how can I make this work when running on Azure Function hosted?


Solution

  • I managed to make Papermill work by installing the kernel each time the function starts up - since I want to keep on using Azure Serverless (consumption plan):

    def RunJupyterNotebookPapermill(inputFile, outputFile): 
        inputFile = GetAbsoluteFilePath(inputFile)
        outputFile = GetAbsoluteFilePath(outputFile)
    
        pm.execute_notebook(
        inputFile,
        outputFile,
        kernel_name="python3"
        )    
        pass 
    

    Here is a more detailed flow, of some of the key elements, which allow me to download notebooks from kaggle, run them in azure serverless function and then push them back to Kaggle, tested it and it is working ;-)

    remember to configure KAGGLE_USERNAME and KAGGLE_KEY with your API in local.settings.json - and ALSO in your Azure Functions Project Config in the Portal - or your functions won't show up online until you add those two configuration keys and values.

    import logging 
    import azure.functions as func 
    import os
    
    from kaggle.api.kaggle_api_extended import KaggleApi
    
    import tempfile
    
    import papermill as pm
    import subprocess 
    import json 
    
    
    
    BlueprintJupyter = func.Blueprint() 
    
    @BlueprintJupyter.route(route="KaggleRefresh", methods=['POST'])
    def KaggleRefresh(req: func.HttpRequest) -> func.HttpResponse: 
    
        try:
            #no longer required
            #req_body = json.loads(req.get_body().decode())
            #papermill = req_body.get('Papermill')
           
            # install jupyter kernel 
            InstallJupyterKernel()
    
            # prepare files 
            tempPath = GetTempPath()
            inputFile = GetFilePath(tempPath, "schedule-run-test.ipynb")
            outputFile =  GetFilePath(tempPath, "schedule-run-test-output.ipynb")
    
            # pull
            KagglePull(tempPath) 
    
            files = "" + ListFiles(tempPath)
            files = files + "\n" 
    
            files = files + "\n" + "Absolute Paths:"
            files = files + "\n" + GetAbsoluteFilePath(inputFile)
            files = files + "\n" + GetAbsoluteFilePath(outputFile)
    
            # execute
            RunJupyterNotebookPapermill(inputFile, outputFile)
    
            # push 
            KagglePush(tempPath) 
    
            return func.HttpResponse( 
                f"Hello, This HTTP-triggered function " 
                f"executed successfully."
                f"Files: {files}")         
        except Exception as ex: 
            return func.HttpResponse( 
                f"This HTTP-triggered function failed." 
                f"{str(ex)}" 
                f"Files: {files}",
                status_code=200 
            )    
        
    def KagglePull(tempPath): 
        api = KaggleApi()
        api.authenticate()    
    
        api.kernels_pull('myusername/schedule-run-test', path=tempPath, metadata=True)
    
        pass 
    
    def KagglePush(tempPath):  
        api = KaggleApi()
        api.authenticate()
    
        #api.kernels_push(tempPath)
        api.kernels_push_cli(tempPath)
        pass
    
    def RunJupyterNotebookPapermill(inputFile, outputFile): 
        inputFile = GetAbsoluteFilePath(inputFile)
        outputFile = GetAbsoluteFilePath(outputFile)
    
        pm.execute_notebook(
        inputFile,
        outputFile,
        kernel_name="python3"
        )    
        pass 
    
    def GetTempPath(): 
        temp_dir = tempfile.gettempdir()
        subfolder_name = "tmpnotebook"
        full_path = os.path.join(temp_dir, subfolder_name)
        return full_path 
    
    def GetFilePath(tempPath, fileName): 
        return os.path.join(tempPath, fileName)
    
    def GetAbsoluteFilePath(fileName): 
        return os.path.abspath(fileName)
    
    def ListFiles(tempPath):
        files = ""
        fileList = [] 
        
        for x in os.listdir(tempPath): 
            y = os.path.join(tempPath, x)
            if (os.path.isfile(y)): 
                fileList.append(y)
    
        files = '' + '\n' + '; \n'.join(fileList)
    
        return files
    
    def InstallJupyterKernel():
        os.system('pip install ipykernel')
        os.system('python -m ipykernel install --user --name python3')
        pass