I'm trying to implement a custom LiteLlm wrapper to access an LLM endpoint. This is the code I have:
import os
import litellm
import requests
from dotenv import load_dotenv
from litellm import CustomLLM
from litellm.types.utils import ModelResponse
class MyLiteLlm(CustomLLM):
def __init__(self):
super().__init__()
load_dotenv()
self._endpoint = os.getenv("ENDPOINT")
self._key = os.getenv("KEY")
self._model = os.getenv("MODEL")
self._api_version = os.getenv("API_VERSION")
def process_request(self, *args, **kwargs) -> litellm.ModelResponse:
messages = kwargs.get("messages", [])
if not messages:
raise ValueError("The 'messages' parameter is required.")
# Get the latest user message from the message history.
latest_user_message = next(
(msg for msg in reversed(messages) if msg.get("role") == "user"),
None,
)
if latest_user_message:
user_question = latest_user_message.get("content")
else:
user_question = "Tell the user to ask a question."
headers = ...
query_params = ...
body = {
'messages': [
{'role': 'user', 'content': user_question}
]
}
full_path = ...
response = requests.post(...)
response.raise_for_status()
api_response_data = response.json()
model_response = ModelResponse(
id=api_response_data.get("id"),
choices=[
litellm.Choices(
finish_reason=choice.get("finish_reason"),
index=choice.get("index"),
message=litellm.Message(
content=choice["message"]["content"],
role=choice["message"]["role"],
),
)
for choice in api_response_data.get("choices")
],
model=self._eyq_model,
usage=litellm.Usage(
prompt_tokens=api_response_data.get("usage", {}).get("prompt_tokens"),
completion_tokens=api_response_data.get("usage", {}).get("completion_tokens"),
total_tokens=api_response_data.get("usage", {}).get("total_tokens"),
)
)
return model_response
def completion(self, *args, **kwargs) -> litellm.ModelResponse:
return self.process_request(*args, **kwargs)
async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
return self.process_request(*args, **kwargs)
This works just fine with an ADK agent:
my_lite_llm = MyLiteLlm()
litellm.custom_provider_map = [
{"provider": 'my-lite-llm', "custom_handler": my_lite_llm}
]
MODEL = LiteLlm(model="eyq-lite-llm/eyq-model")
I can pass that MODEL to an ADK (Agent Development Kit) agent and it calls the wrapper and gets the expected response. However, if the agent needs to call a tool, this is just not working - my tool never gets called. If I use one of the LiteLlm custom wrappers, the tools are called without problem.
What am I missing???
I ran into the same issue when writing a custom LiteLLM/ADK integration. The problem is that your wrapper only maps plain message.content but doesn’t propagate tool calls back into LiteLLM’s ModelResponse. Without that, the agent never sees the tool_calls array and therefore never triggers your tool.
This is my solution. I have also a corp custom API that exposes many LLMs (gemini, chatgpt...).
I managed to make it working with tools. You have to define how to call tools with you custom LLMs.
This is my code, you have to change it with your use case.
import os
import json
import aiohttp
import logging
from typing import Any, Dict, AsyncGenerator, Optional
from google.adk.models.base_llm import BaseLlm
from google.adk.models.llm_request import LlmRequest
from google.adk.models.llm_response import LlmResponse
from google.genai import types # ADK uses this for structured parts
logger = logging.getLogger(__name__)
class MyCustomClient:
"""Minimal HTTP client for a custom LLM endpoint."""
def __init__(self, base_url: str, api_key: str):
self.base_url = base_url.rstrip("/")
self.api_key = api_key
self.session: Optional[aiohttp.ClientSession] = None
async def _get_session(self) -> aiohttp.ClientSession:
if self.session is None or self.session.closed:
self.session = aiohttp.ClientSession()
return self.session
async def close(self):
if self.session and not self.session.closed:
await self.session.close()
async def generate(self, payload: Dict[str, Any]) -> Dict[str, Any]:
session = await self._get_session()
headers = {"Authorization": f"Bearer {self.api_key}"}
url = f"{self.base_url}/v1/chat/completions"
async with session.post(url, json=payload, headers=headers) as response:
response.raise_for_status()
return await response.json()
class MyLiteLlm(BaseLlm):
"""Custom LLM implementation that supports tool calls."""
def __init__(self, model: str, base_url: str, api_key: str, **kwargs):
super().__init__(model=model, **kwargs)
self._client = MyCustomClient(base_url, api_key)
self._model = model
async def generate_content_async(
self, llm_request: LlmRequest, stream: bool = False
) -> AsyncGenerator[LlmResponse, None]:
if not llm_request.contents:
raise ValueError("LlmRequest must contain contents")
payload = {
"model": self._model,
"messages": [c.model_dump(exclude_none=True) for c in llm_request.contents],
}
# optional: include tool declarations if present
if llm_request.tools:
payload["tools"] = [t.model_dump(exclude_none=True) for t in llm_request.tools]
api_response = await self._client.generate(payload)
# 🔑 Convert response into ADK's LlmResponse, preserving tool calls
parts = []
for choice in api_response.get("choices", []):
msg = choice.get("message", {})
if "content" in msg and msg["content"]:
parts.append(types.Part.from_text(text=msg["content"]))
if "tool_calls" in msg:
for tc in msg["tool_calls"]:
parts.append(
types.Part(
function_call=types.FunctionCall(
name=tc.get("function", {}).get("name", ""),
args=tc.get("function", {}).get("arguments", {}),
)
)
)
llm_response = LlmResponse(
content=types.Content(
role=api_response.get("role", "model"),
parts=parts,
),
partial=False,
)
yield llm_response
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self._client.close()