openai-apiagentazure-openailitellm

Implementing a custom LiteLlm for Google agent development kit that supports tool calls


I'm trying to implement a custom LiteLlm wrapper to access an LLM endpoint. This is the code I have:

import os
import litellm
import requests
from dotenv import load_dotenv
from litellm import CustomLLM
from litellm.types.utils import ModelResponse


class MyLiteLlm(CustomLLM):
    def __init__(self):
        super().__init__()
        load_dotenv()
        self._endpoint = os.getenv("ENDPOINT")
        self._key = os.getenv("KEY")
        self._model = os.getenv("MODEL")
        self._api_version = os.getenv("API_VERSION")

    def process_request(self, *args, **kwargs) -> litellm.ModelResponse:
        messages = kwargs.get("messages", [])
        if not messages:
            raise ValueError("The 'messages' parameter is required.")

        # Get the latest user message from the message history.
        latest_user_message = next(
            (msg for msg in reversed(messages) if msg.get("role") == "user"),
            None,
        )
        if latest_user_message:
            user_question = latest_user_message.get("content")
        else:
            user_question = "Tell the user to ask a question."

        headers = ...
        query_params = ...
        body = {
            'messages': [
                {'role': 'user', 'content': user_question}
            ]
        }
        full_path = ...
        response = requests.post(...)
        response.raise_for_status()
        api_response_data = response.json()
        model_response = ModelResponse(
            id=api_response_data.get("id"),
            choices=[
                litellm.Choices(
                    finish_reason=choice.get("finish_reason"),
                    index=choice.get("index"),
                    message=litellm.Message(
                        content=choice["message"]["content"],
                        role=choice["message"]["role"],
                    ),
                )
                for choice in api_response_data.get("choices")
            ],
            model=self._eyq_model,
            usage=litellm.Usage(
                prompt_tokens=api_response_data.get("usage", {}).get("prompt_tokens"),
                completion_tokens=api_response_data.get("usage", {}).get("completion_tokens"),
                total_tokens=api_response_data.get("usage", {}).get("total_tokens"),
            )
        )
        return model_response

    def completion(self, *args, **kwargs) -> litellm.ModelResponse:
        return self.process_request(*args, **kwargs)

    async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
        return self.process_request(*args, **kwargs)

This works just fine with an ADK agent:

my_lite_llm = MyLiteLlm()
litellm.custom_provider_map = [
    {"provider": 'my-lite-llm', "custom_handler": my_lite_llm}
]
MODEL = LiteLlm(model="eyq-lite-llm/eyq-model")

I can pass that MODEL to an ADK (Agent Development Kit) agent and it calls the wrapper and gets the expected response. However, if the agent needs to call a tool, this is just not working - my tool never gets called. If I use one of the LiteLlm custom wrappers, the tools are called without problem.

What am I missing???


Solution

  • I ran into the same issue when writing a custom LiteLLM/ADK integration. The problem is that your wrapper only maps plain message.content but doesn’t propagate tool calls back into LiteLLM’s ModelResponse. Without that, the agent never sees the tool_calls array and therefore never triggers your tool.

    This is my solution. I have also a corp custom API that exposes many LLMs (gemini, chatgpt...).
    I managed to make it working with tools. You have to define how to call tools with you custom LLMs.

    This is my code, you have to change it with your use case.

    import os
    import json
    import aiohttp
    import logging
    from typing import Any, Dict, AsyncGenerator, Optional
    
    from google.adk.models.base_llm import BaseLlm
    from google.adk.models.llm_request import LlmRequest
    from google.adk.models.llm_response import LlmResponse
    from google.genai import types  # ADK uses this for structured parts
    
    logger = logging.getLogger(__name__)
    
    
    class MyCustomClient:
        """Minimal HTTP client for a custom LLM endpoint."""
    
        def __init__(self, base_url: str, api_key: str):
            self.base_url = base_url.rstrip("/")
            self.api_key = api_key
            self.session: Optional[aiohttp.ClientSession] = None
    
        async def _get_session(self) -> aiohttp.ClientSession:
            if self.session is None or self.session.closed:
                self.session = aiohttp.ClientSession()
            return self.session
    
        async def close(self):
            if self.session and not self.session.closed:
                await self.session.close()
    
        async def generate(self, payload: Dict[str, Any]) -> Dict[str, Any]:
            session = await self._get_session()
            headers = {"Authorization": f"Bearer {self.api_key}"}
            url = f"{self.base_url}/v1/chat/completions"
    
            async with session.post(url, json=payload, headers=headers) as response:
                response.raise_for_status()
                return await response.json()
    
    
    class MyLiteLlm(BaseLlm):
        """Custom LLM implementation that supports tool calls."""
    
        def __init__(self, model: str, base_url: str, api_key: str, **kwargs):
            super().__init__(model=model, **kwargs)
            self._client = MyCustomClient(base_url, api_key)
            self._model = model
    
        async def generate_content_async(
            self, llm_request: LlmRequest, stream: bool = False
        ) -> AsyncGenerator[LlmResponse, None]:
            if not llm_request.contents:
                raise ValueError("LlmRequest must contain contents")
    
            payload = {
                "model": self._model,
                "messages": [c.model_dump(exclude_none=True) for c in llm_request.contents],
            }
    
            # optional: include tool declarations if present
            if llm_request.tools:
                payload["tools"] = [t.model_dump(exclude_none=True) for t in llm_request.tools]
    
            api_response = await self._client.generate(payload)
    
            # 🔑 Convert response into ADK's LlmResponse, preserving tool calls
            parts = []
            for choice in api_response.get("choices", []):
                msg = choice.get("message", {})
                if "content" in msg and msg["content"]:
                    parts.append(types.Part.from_text(text=msg["content"]))
                if "tool_calls" in msg:
                    for tc in msg["tool_calls"]:
                        parts.append(
                            types.Part(
                                function_call=types.FunctionCall(
                                    name=tc.get("function", {}).get("name", ""),
                                    args=tc.get("function", {}).get("arguments", {}),
                                )
                            )
                        )
    
            llm_response = LlmResponse(
                content=types.Content(
                    role=api_response.get("role", "model"),
                    parts=parts,
                ),
                partial=False,
            )
            yield llm_response
    
        async def __aenter__(self):
            return self
    
        async def __aexit__(self, exc_type, exc_val, exc_tb):
            await self._client.close()