python https google-cloud-run large-language-model model-context-protocol

How to make the LLM call MCP functions hosted on Google Cloud Run with Python

I have hosted a function on Google Could run and am able to call it with FastMCPClient. Thank you for the help with my earlier question.

This is my MCP Server code. This is deployed as a docker image on Google Cloud Run.

import asyncio
import os
from fastmcp import FastMCP, Context
mcp = FastMCP("MCP Server on Cloud Run")

@mcp.tool()
'''Call this function when there are 2 numbers to add. Pass the 2 numbers as parameters'''
async def add(a: int, b: int, ctx: Context) -> int:
    await ctx.debug(f"[add] {a}+{b}")
    result = a+b
    await ctx.debug(f"result={result}")
    return result


if __name__ == "__main__":
    asyncio.run(
        mcp.run_async(
            transport="streamable-http", 
            host="0.0.0.0", 
            port=os.getenv("PORT", 8080),
        )
    )

The below code works and I am able to call the MCP tool to add 2 numbers.

from fastmcp import Client
import asyncio
import google.oauth2.id_token
import google.auth.transport.requests
import os
import sys

args = sys.argv
if len(args) != 3:
    sys.stderr.write(f"Usage: python {args[0]} <a> <b>\n")
    sys.exit(1)

a = args[1]
b = args[2]

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\\Path\\to\\file.json'
audience = "https://mcp-server-url-from-cloud-run"

request = google.auth.transport.requests.Request()
token = google.oauth2.id_token.fetch_id_token(request, audience)

config = {
    "mcpServers": {
        "cloud-run":{
            "transport": "streamable-http",
            "url": f"{audience}/mcp/",
            "headers": {
                "Authorization": "Bearer token",
            },
            "auth": token,
        }
    }
}

client = Client(config)


async def run():
    async with client:
        print("Connected")
        aint=int(a)
        bint=int(b)
        result = await client.call_tool(
            name="add",
            arguments={"a":aint, "b":bint},
        )
        print(result)


if __name__ == "__main__":
    asyncio.run(run())

My intention was to expose this tool to my llm so it can decide when to call the tools at it's disposal. For example, if I say "add 5 and 4" in the prompt, the LLM should call the add function and return 9. Just using the call_tool() function does not add much value when unstructured data is involved.

I could use the below code to make the LLM access the mcp tools when the MCP server was a local .py file.

os.environ["OPENAI_API_KEY"] = "Open_API_Key"
# Instantiate Google Gemini LLM with deterministic output and retry logic
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    #api_key=""
    # base_url="...",
    # organization="...",
    # other params...
)
server_script = sys.argv[1]
# Configure MCP server startup parameters
server_params = StdioServerParameters(
    command="python" if server_script.endswith(".py") else "node",
    args=[server_script],
)    
mcp_client = None

async def run_agent():
        global mcp_client
        async with stdio_client(server_params) as (read, write):
            async with ClientSession(read, write) as session:
                await session.initialize()
                mcp_client = type("MCPClientHolder", (), {"session": session})()
                tools = await load_mcp_tools(session)
                agent = create_react_agent(llm, tools,prompt=system_message_obj)
                print("MCP Client Started! Type 'quit' to exit.")
                while True:
                    query = input("\\nQuery: ").strip()
                    if query.lower() == "quit":
                        break
                    # Send user query to agent and print formatted response
                    response = await agent.ainvoke({"messages": query})
                    try:
                        formatted = json.dumps(response, indent=2, cls=CustomEncoder)
                    except Exception:
                        formatted = str(response)
                    print("\\nResponse:")
                    print(formatted)
        return

Is there a way to expose the tools from my Google Cloud MCP server (called in my first code) to the LLM python Client using the Cloud Run URL and secure json Key ? Like how it is done in the second code with local .py file. This might be a basic question but I could not find any answers so far. Any help will be really appreciated. Due to scalability concerns I do not want to use local Cloud proxies for authentication.

Solution

I found the solution. The key was in using the MultiServerMCPClient
to access the MCP server and providing the Auth token as a header. 

import google.oauth2.id_token
import google.auth.transport.requests
import os
from langgraph.prebuilt import create_react_agent
from langchain_openai import ChatOpenAI
from langchain_mcp_adapters.client import MultiServerMCPClient

os.environ["OPENAI_API_KEY"] ="OpenAI_API_Key"
#initialize LLM Client
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    #api_key=""
    # base_url="...",
    # organization="...",
    # other params...
)
#Get Auth token from JSON Key in environment
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\\Path\\To\\file.json'
request = google.auth.transport.requests.Request()
token = google.oauth2.id_token.fetch_id_token(request, audience)
#initialize Multi-Server MCP Client with token
audience = "https://mcp-server-url"
config = {
        "cloud-run":{
            "transport": "streamable_http",
            "url": f"{audience}/mcp/",
            "headers": {
                "Authorization": "Bearer "+token,
            }
        }
}
client = MultiServerMCPClient(config)

# Function to make LLM Clinet call tools through prompt 
async def run():
#To load tools into LLM Client
    tools = await client.get_tools()
    query="What is 4 + 8"    
    agent = create_react_agent(llm, tools)
# Call LLM Agent    
    response = await agent.ainvoke({"messages": query})
    final_ai_message=None
# To fetch the last AiMessage from the Response    
    for message in response['messages']:
        #print(type(message))
        if("AIMessage" in str(type(message))):
            final_ai_message=message
    print(final_ai_message.content)

if __name__ == "__main__":
    asyncio.run(run())

#The above function call gives the response: 4+8=12