pythonhttpsgoogle-cloud-runlarge-language-modelmodel-context-protocol

How to make the LLM call MCP functions hosted on Google Cloud Run with Python


I have hosted a function on Google Could run and am able to call it with FastMCPClient. Thank you for the help with my earlier question.

This is my MCP Server code. This is deployed as a docker image on Google Cloud Run.

import asyncio
import os
from fastmcp import FastMCP, Context
mcp = FastMCP("MCP Server on Cloud Run")

@mcp.tool()
'''Call this function when there are 2 numbers to add. Pass the 2 numbers as parameters'''
async def add(a: int, b: int, ctx: Context) -> int:
    await ctx.debug(f"[add] {a}+{b}")
    result = a+b
    await ctx.debug(f"result={result}")
    return result


if __name__ == "__main__":
    asyncio.run(
        mcp.run_async(
            transport="streamable-http", 
            host="0.0.0.0", 
            port=os.getenv("PORT", 8080),
        )
    )

The below code works and I am able to call the MCP tool to add 2 numbers.

from fastmcp import Client
import asyncio
import google.oauth2.id_token
import google.auth.transport.requests
import os
import sys

args = sys.argv
if len(args) != 3:
    sys.stderr.write(f"Usage: python {args[0]} <a> <b>\n")
    sys.exit(1)

a = args[1]
b = args[2]

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\\Path\\to\\file.json'
audience = "https://mcp-server-url-from-cloud-run"

request = google.auth.transport.requests.Request()
token = google.oauth2.id_token.fetch_id_token(request, audience)

config = {
    "mcpServers": {
        "cloud-run":{
            "transport": "streamable-http",
            "url": f"{audience}/mcp/",
            "headers": {
                "Authorization": "Bearer token",
            },
            "auth": token,
        }
    }
}

client = Client(config)


async def run():
    async with client:
        print("Connected")
        aint=int(a)
        bint=int(b)
        result = await client.call_tool(
            name="add",
            arguments={"a":aint, "b":bint},
        )
        print(result)


if __name__ == "__main__":
    asyncio.run(run())

My intention was to expose this tool to my llm so it can decide when to call the tools at it's disposal. For example, if I say "add 5 and 4" in the prompt, the LLM should call the add function and return 9. Just using the call_tool() function does not add much value when unstructured data is involved.

I could use the below code to make the LLM access the mcp tools when the MCP server was a local .py file.

os.environ["OPENAI_API_KEY"] = "Open_API_Key"
# Instantiate Google Gemini LLM with deterministic output and retry logic
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    #api_key=""
    # base_url="...",
    # organization="...",
    # other params...
)
server_script = sys.argv[1]
# Configure MCP server startup parameters
server_params = StdioServerParameters(
    command="python" if server_script.endswith(".py") else "node",
    args=[server_script],
)    
mcp_client = None

async def run_agent():
        global mcp_client
        async with stdio_client(server_params) as (read, write):
            async with ClientSession(read, write) as session:
                await session.initialize()
                mcp_client = type("MCPClientHolder", (), {"session": session})()
                tools = await load_mcp_tools(session)
                agent = create_react_agent(llm, tools,prompt=system_message_obj)
                print("MCP Client Started! Type 'quit' to exit.")
                while True:
                    query = input("\\nQuery: ").strip()
                    if query.lower() == "quit":
                        break
                    # Send user query to agent and print formatted response
                    response = await agent.ainvoke({"messages": query})
                    try:
                        formatted = json.dumps(response, indent=2, cls=CustomEncoder)
                    except Exception:
                        formatted = str(response)
                    print("\\nResponse:")
                    print(formatted)
        return

Is there a way to expose the tools from my Google Cloud MCP server (called in my first code) to the LLM python Client using the Cloud Run URL and secure json Key ? Like how it is done in the second code with local .py file. This might be a basic question but I could not find any answers so far. Any help will be really appreciated. Due to scalability concerns I do not want to use local Cloud proxies for authentication.


Solution

  • I found the solution. The key was in using the MultiServerMCPClient
    to access the MCP server and providing the Auth token as a header. 
    
    import google.oauth2.id_token
    import google.auth.transport.requests
    import os
    from langgraph.prebuilt import create_react_agent
    from langchain_openai import ChatOpenAI
    from langchain_mcp_adapters.client import MultiServerMCPClient
    
    os.environ["OPENAI_API_KEY"] ="OpenAI_API_Key"
    #initialize LLM Client
    llm = ChatOpenAI(
        model="gpt-4o",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        #api_key=""
        # base_url="...",
        # organization="...",
        # other params...
    )
    #Get Auth token from JSON Key in environment
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\\Path\\To\\file.json'
    request = google.auth.transport.requests.Request()
    token = google.oauth2.id_token.fetch_id_token(request, audience)
    #initialize Multi-Server MCP Client with token
    audience = "https://mcp-server-url"
    config = {
            "cloud-run":{
                "transport": "streamable_http",
                "url": f"{audience}/mcp/",
                "headers": {
                    "Authorization": "Bearer "+token,
                }
            }
    }
    client = MultiServerMCPClient(config)
    
    # Function to make LLM Clinet call tools through prompt 
    async def run():
    #To load tools into LLM Client
        tools = await client.get_tools()
        query="What is 4 + 8"    
        agent = create_react_agent(llm, tools)
    # Call LLM Agent    
        response = await agent.ainvoke({"messages": query})
        final_ai_message=None
    # To fetch the last AiMessage from the Response    
        for message in response['messages']:
            #print(type(message))
            if("AIMessage" in str(type(message))):
                final_ai_message=message
        print(final_ai_message.content)
    
    if __name__ == "__main__":
        asyncio.run(run())
    
    #The above function call gives the response: 4+8=12