Skip to content

Responses API

responses(model, input, temperature=None, max_tokens=None, num_retries=3, retry_strategy='exponential_backoff_retry', cache=None, api_key=None, api_base=None, timeout=600.0, **kwargs)

Make a request using OpenAI's Responses API format.

Compatible with litellm.responses() API.

Parameters:

Name Type Description Default
model str

Model name in format "provider/model-name"

required
input list[Dict[str, Any]]

List of input messages in Responses API format

required
temperature Optional[float]

Sampling temperature

None
max_tokens Optional[int]

Maximum tokens to generate

None
num_retries int

Number of retries

3
retry_strategy str

Retry strategy

'exponential_backoff_retry'
cache Optional[Dict[str, Any]]

Cache control (not used)

None
api_key Optional[str]

API key

None
api_base Optional[str]

API base URL

None
timeout float

Request timeout

600.0
**kwargs Any

Additional parameters

{}

Returns:

Type Description
ModelResponse

ModelResponse

Source code in ullm/main.py
def responses(
    model: str,
    input: list[Dict[str, Any]],
    temperature: Optional[float] = None,
    max_tokens: Optional[int] = None,
    num_retries: int = 3,
    retry_strategy: str = "exponential_backoff_retry",
    cache: Optional[Dict[str, Any]] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    timeout: float = 600.0,
    **kwargs: Any,
) -> ModelResponse:
    """
    Make a request using OpenAI's Responses API format.

    Compatible with litellm.responses() API.

    Args:
        model: Model name in format "provider/model-name"
        input: List of input messages in Responses API format
        temperature: Sampling temperature
        max_tokens: Maximum tokens to generate
        num_retries: Number of retries
        retry_strategy: Retry strategy
        cache: Cache control (not used)
        api_key: API key
        api_base: API base URL
        timeout: Request timeout
        **kwargs: Additional parameters

    Returns:
        ModelResponse
    """
    provider, model_name = parse_model_name(model)

    client = _get_client(
        provider,
        api_key=api_key,
        api_base=api_base,
        timeout=timeout,
        **kwargs,
    )

    # Only OpenAI client has responses method
    if not hasattr(client, "responses"):
        # Fall back to converting to messages format
        messages = []
        for item in input:
            role = item.get("role", "user")
            content_blocks = item.get("content", [])

            if isinstance(content_blocks, list):
                content = " ".join(
                    block.get("text", "") if isinstance(block, dict) else str(block) for block in content_blocks
                )
            else:
                content = str(content_blocks)

            messages.append({"role": role, "content": content})

        return completion(
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            num_retries=num_retries,
            api_key=api_key,
            api_base=api_base,
            timeout=timeout,
            **kwargs,
        )

    # Use native responses method
    if num_retries > 0:

        @_create_retry_decorator(num_retries)
        def _make_request():
            return client.responses(
                model=model_name, input=input, temperature=temperature, max_tokens=max_tokens, **kwargs
            )

        return _make_request()
    else:
        return client.responses(model=model_name, input=input, temperature=temperature, max_tokens=max_tokens, **kwargs)

aresponses(model, input, temperature=None, max_tokens=None, num_retries=3, retry_strategy='exponential_backoff_retry', cache=None, api_key=None, api_base=None, timeout=600.0, **kwargs) async

Make an async request using OpenAI's Responses API format.

Compatible with litellm.aresponses() API.

Returns:

Type Description
ModelResponse

ModelResponse

Source code in ullm/main.py
async def aresponses(
    model: str,
    input: list[Dict[str, Any]],
    temperature: Optional[float] = None,
    max_tokens: Optional[int] = None,
    num_retries: int = 3,
    retry_strategy: str = "exponential_backoff_retry",
    cache: Optional[Dict[str, Any]] = None,
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    timeout: float = 600.0,
    **kwargs: Any,
) -> ModelResponse:
    """
    Make an async request using OpenAI's Responses API format.

    Compatible with litellm.aresponses() API.

    Args:
        Same as responses()

    Returns:
        ModelResponse
    """
    provider, model_name = parse_model_name(model)

    client = _get_client(
        provider,
        api_key=api_key,
        api_base=api_base,
        timeout=timeout,
        **kwargs,
    )

    # Only OpenAI client has aresponses method
    if not hasattr(client, "aresponses"):
        # Fall back to converting to messages format
        messages = []
        for item in input:
            role = item.get("role", "user")
            content_blocks = item.get("content", [])

            if isinstance(content_blocks, list):
                content = " ".join(
                    block.get("text", "") if isinstance(block, dict) else str(block) for block in content_blocks
                )
            else:
                content = str(content_blocks)

            messages.append({"role": role, "content": content})

        return await acompletion(
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            num_retries=num_retries,
            api_key=api_key,
            api_base=api_base,
            timeout=timeout,
            **kwargs,
        )

    # Use native aresponses method
    if num_retries > 0:

        @_create_retry_decorator(num_retries)
        async def _make_request():
            return await client.aresponses(
                model=model_name, input=input, temperature=temperature, max_tokens=max_tokens, **kwargs
            )

        return await _make_request()
    else:
        return await client.aresponses(
            model=model_name, input=input, temperature=temperature, max_tokens=max_tokens, **kwargs
        )

(Full API reference coming soon)