Error Handling

This document covers error handling in vLLM Client.

VllmError Enum

All errors in vLLM Client are represented by the VllmError enum:

#![allow(unused)]
fn main() {
use thiserror::Error;

#[derive(Debug, Error, Clone)]
pub enum VllmError {
    #[error("HTTP request failed: {0}")]
    Http(String),

    #[error("JSON error: {0}")]
    Json(String),

    #[error("API error (status {status_code}): {message}")]
    ApiError {
        status_code: u16,
        message: String,
        error_type: Option<String>,
    },

    #[error("Stream error: {0}")]
    Stream(String),

    #[error("Connection timeout")]
    Timeout,

    #[error("Model not found: {0}")]
    ModelNotFound(String),

    #[error("Missing required parameter: {0}")]
    MissingParameter(String),

    #[error("No response content")]
    NoContent,

    #[error("Invalid response format: {0}")]
    InvalidResponse(String),

    #[error("{0}")]
    Other(String),
}
}

Error Types

VariantWhen It Occurs
HttpNetwork errors, connection failures
JsonSerialization/deserialization errors
ApiErrorServer returned an error response
StreamErrors during streaming response
TimeoutRequest timed out
ModelNotFoundSpecified model doesn't exist
MissingParameterRequired parameter not provided
NoContentResponse has no content
InvalidResponseUnexpected response format
OtherMiscellaneous errors

Basic Error Handling

use vllm_client::{VllmClient, json, VllmError};

async fn chat(prompt: &str) -> Result<String, VllmError> {
    let client = VllmClient::new("http://localhost:8000/v1");

    let response = client
        .chat
        .completions()
        .create()
        .model("Qwen/Qwen2.5-7B-Instruct")
        .messages(json!([{"role": "user", "content": prompt}]))
        .send()
        .await?;

    Ok(response.content.unwrap_or_default())
}

#[tokio::main]
async fn main() {
    match chat("Hello!").await {
        Ok(text) => println!("Response: {}", text),
        Err(e) => eprintln!("Error: {}", e),
    }
}

Detailed Error Handling

Handle specific error types differently:

use vllm_client::{VllmClient, json, VllmError};

#[tokio::main]
async fn main() {
    let client = VllmClient::new("http://localhost:8000/v1");

    let result = client
        .chat
        .completions()
        .create()
        .model("Qwen/Qwen2.5-7B-Instruct")
        .messages(json!([{"role": "user", "content": "Hello!"}]))
        .send()
        .await;

    match result {
        Ok(response) => {
            println!("Success: {}", response.content.unwrap_or_default());
        }
        Err(VllmError::ApiError { status_code, message, error_type }) => {
            eprintln!("API Error (HTTP {}): {}", status_code, message);
            if let Some(etype) = error_type {
                eprintln!("Error type: {}", etype);
            }
        }
        Err(VllmError::Timeout) => {
            eprintln!("Request timed out. Try increasing timeout.");
        }
        Err(VllmError::Http(msg)) => {
            eprintln!("Network error: {}", msg);
        }
        Err(VllmError::ModelNotFound(model)) => {
            eprintln!("Model '{}' not found. Check available models.", model);
        }
        Err(VllmError::MissingParameter(param)) => {
            eprintln!("Missing required parameter: {}", param);
        }
        Err(e) => {
            eprintln!("Other error: {}", e);
        }
    }
}

HTTP Status Codes

Common API error status codes:

CodeMeaningAction
400Bad RequestCheck request parameters
401UnauthorizedCheck API key
403ForbiddenCheck permissions
404Not FoundCheck endpoint or model name
429Rate LimitedImplement retry with backoff
500Server ErrorRetry or contact admin
502Bad GatewayCheck vLLM server status
503Service UnavailableWait and retry
504Gateway TimeoutIncrease timeout or retry

Retryable Errors

Check if an error is retryable:

#![allow(unused)]
fn main() {
use vllm_client::VllmError;

fn should_retry(error: &VllmError) -> bool {
    error.is_retryable()
}

// Manual check
match error {
    VllmError::Timeout => true,
    VllmError::ApiError { status_code: 429, .. } => true,  // Rate limit
    VllmError::ApiError { status_code: 500..=504, .. } => true,  // Server errors
    _ => false,
}
}

Retry with Exponential Backoff

#![allow(unused)]
fn main() {
use vllm_client::{VllmClient, json, VllmError};
use std::time::Duration;
use tokio::time::sleep;

async fn chat_with_retry(
    client: &VllmClient,
    prompt: &str,
    max_retries: u32,
) -> Result<String, VllmError> {
    let mut retries = 0;

    loop {
        let result = client
            .chat
            .completions()
            .create()
            .model("Qwen/Qwen2.5-7B-Instruct")
            .messages(json!([{"role": "user", "content": prompt}]))
            .send()
            .await;

        match result {
            Ok(response) => {
                return Ok(response.content.unwrap_or_default());
            }
            Err(e) if e.is_retryable() && retries < max_retries => {
                retries += 1;
                let delay = Duration::from_millis(100 * 2u64.pow(retries - 1));
                eprintln!("Retry {} after {:?}: {}", retries, delay, e);
                sleep(delay).await;
            }
            Err(e) => return Err(e),
        }
    }
}
}

Streaming Error Handling

Handle errors during streaming:

#![allow(unused)]
fn main() {
use vllm_client::{VllmClient, json, StreamEvent, VllmError};
use futures::StreamExt;

async fn stream_chat(prompt: &str) -> Result<String, VllmError> {
    let client = VllmClient::new("http://localhost:8000/v1");

    let mut stream = client
        .chat
        .completions()
        .create()
        .model("Qwen/Qwen2.5-7B-Instruct")
        .messages(json!([{"role": "user", "content": prompt}]))
        .stream(true)
        .send_stream()
        .await?;

    let mut content = String::new();

    while let Some(event) = stream.next().await {
        match event {
            StreamEvent::Content(delta) => content.push_str(&delta),
            StreamEvent::Done => break,
            StreamEvent::Error(e) => return Err(e),
            _ => {}
        }
    }

    Ok(content)
}
}

Error Context

Add context to errors for better debugging:

#![allow(unused)]
fn main() {
use vllm_client::{VllmClient, json, VllmError};

async fn chat_with_context(prompt: &str) -> Result<String, String> {
    let client = VllmClient::new("http://localhost:8000/v1");

    let response = client
        .chat
        .completions()
        .create()
        .model("Qwen/Qwen2.5-7B-Instruct")
        .messages(json!([{"role": "user", "content": prompt}]))
        .send()
        .await
        .map_err(|e| format!("Failed to get chat response: {}", e))?;

    Ok(response.content.unwrap_or_default())
}
}

Using anyhow or eyre

For applications using anyhow or eyre:

#![allow(unused)]
fn main() {
use vllm_client::{VllmClient, json, VllmError};
use anyhow::{Context, Result};

async fn chat(prompt: &str) -> Result<String> {
    let client = VllmClient::new("http://localhost:8000/v1");

    let response = client
        .chat
        .completions()
        .create()
        .model("Qwen/Qwen2.5-7B-Instruct")
        .messages(json!([{"role": "user", "content": prompt}]))
        .send()
        .await
        .context("Failed to send chat request")?;

    Ok(response.content.unwrap_or_default())
}
}

Best Practices

1. Always Handle Errors

#![allow(unused)]
fn main() {
// Bad
let response = client.chat.completions().create()
    .send().await.unwrap();

// Good
match client.chat.completions().create().send().await {
    Ok(r) => { /* handle */ },
    Err(e) => eprintln!("Error: {}", e),
}
}

2. Use Appropriate Timeout

#![allow(unused)]
fn main() {
let client = VllmClient::new("http://localhost:8000/v1")
    .timeout_secs(300); // 5 minutes for long tasks
}

3. Log Errors with Context

#![allow(unused)]
fn main() {
Err(e) => {
    log::error!("Chat request failed: {}", e);
    log::debug!("Request details: model={}, prompt_len={}", model, prompt.len());
}
}

4. Implement Graceful Degradation

#![allow(unused)]
fn main() {
match primary_client.chat.completions().create().send().await {
    Ok(r) => r,
    Err(e) => {
        log::warn!("Primary client failed: {}, trying fallback", e);
        fallback_client.chat.completions().create().send().await?
    }
}
}

See Also