I can create an Azure Functions project (V2) - Python 3.11 After publishing all functions work - they all work on my local dev laptop - however there is one function that does not work when hosted as an Azure Function.
papermill ..
I have the python file downloaded, I can verify that it is in the tmp folder where the Azure Function downloads the file, as well as I have the metadata file there < - same as when debugging locally.
however, when i call execute_notebook function in papermill I get this error immediately?
Kernel died before replying to kernel_info
what does this mean? and how can I make this work when running on Azure Function hosted?
I managed to make Papermill work by installing the kernel each time the function starts up - since I want to keep on using Azure Serverless (consumption plan):
def RunJupyterNotebookPapermill(inputFile, outputFile):
inputFile = GetAbsoluteFilePath(inputFile)
outputFile = GetAbsoluteFilePath(outputFile)
pm.execute_notebook(
inputFile,
outputFile,
kernel_name="python3"
)
pass
Here is a more detailed flow, of some of the key elements, which allow me to download notebooks from kaggle, run them in azure serverless function and then push them back to Kaggle, tested it and it is working ;-)
remember to configure KAGGLE_USERNAME and KAGGLE_KEY with your API in local.settings.json - and ALSO in your Azure Functions Project Config in the Portal - or your functions won't show up online until you add those two configuration keys and values.
import logging
import azure.functions as func
import os
from kaggle.api.kaggle_api_extended import KaggleApi
import tempfile
import papermill as pm
import subprocess
import json
BlueprintJupyter = func.Blueprint()
@BlueprintJupyter.route(route="KaggleRefresh", methods=['POST'])
def KaggleRefresh(req: func.HttpRequest) -> func.HttpResponse:
try:
#no longer required
#req_body = json.loads(req.get_body().decode())
#papermill = req_body.get('Papermill')
# install jupyter kernel
InstallJupyterKernel()
# prepare files
tempPath = GetTempPath()
inputFile = GetFilePath(tempPath, "schedule-run-test.ipynb")
outputFile = GetFilePath(tempPath, "schedule-run-test-output.ipynb")
# pull
KagglePull(tempPath)
files = "" + ListFiles(tempPath)
files = files + "\n"
files = files + "\n" + "Absolute Paths:"
files = files + "\n" + GetAbsoluteFilePath(inputFile)
files = files + "\n" + GetAbsoluteFilePath(outputFile)
# execute
RunJupyterNotebookPapermill(inputFile, outputFile)
# push
KagglePush(tempPath)
return func.HttpResponse(
f"Hello, This HTTP-triggered function "
f"executed successfully."
f"Files: {files}")
except Exception as ex:
return func.HttpResponse(
f"This HTTP-triggered function failed."
f"{str(ex)}"
f"Files: {files}",
status_code=200
)
def KagglePull(tempPath):
api = KaggleApi()
api.authenticate()
api.kernels_pull('myusername/schedule-run-test', path=tempPath, metadata=True)
pass
def KagglePush(tempPath):
api = KaggleApi()
api.authenticate()
#api.kernels_push(tempPath)
api.kernels_push_cli(tempPath)
pass
def RunJupyterNotebookPapermill(inputFile, outputFile):
inputFile = GetAbsoluteFilePath(inputFile)
outputFile = GetAbsoluteFilePath(outputFile)
pm.execute_notebook(
inputFile,
outputFile,
kernel_name="python3"
)
pass
def GetTempPath():
temp_dir = tempfile.gettempdir()
subfolder_name = "tmpnotebook"
full_path = os.path.join(temp_dir, subfolder_name)
return full_path
def GetFilePath(tempPath, fileName):
return os.path.join(tempPath, fileName)
def GetAbsoluteFilePath(fileName):
return os.path.abspath(fileName)
def ListFiles(tempPath):
files = ""
fileList = []
for x in os.listdir(tempPath):
y = os.path.join(tempPath, x)
if (os.path.isfile(y)):
fileList.append(y)
files = '' + '\n' + '; \n'.join(fileList)
return files
def InstallJupyterKernel():
os.system('pip install ipykernel')
os.system('python -m ipykernel install --user --name python3')
pass