Skip to content

vllm.entrypoints.serve.cache.api_router

logger module-attribute

logger = init_logger(__name__)

router module-attribute

router = APIRouter()

attach_router

attach_router(app: FastAPI)
Source code in vllm/entrypoints/serve/cache/api_router.py
def attach_router(app: FastAPI):
    if not envs.VLLM_SERVER_DEV_MODE:
        return
    app.include_router(router)

engine_client

engine_client(request: Request) -> EngineClient
Source code in vllm/entrypoints/serve/cache/api_router.py
def engine_client(request: Request) -> EngineClient:
    return request.app.state.engine_client

reset_mm_cache async

reset_mm_cache(raw_request: Request)

Reset the multi-modal cache. Note that we currently do not check if the multi-modal cache is successfully reset in the API server.

Source code in vllm/entrypoints/serve/cache/api_router.py
@router.post("/reset_mm_cache")
async def reset_mm_cache(raw_request: Request):
    """
    Reset the multi-modal cache. Note that we currently do not check if the
    multi-modal cache is successfully reset in the API server.
    """
    logger.info("Resetting multi-modal cache...")
    await engine_client(raw_request).reset_mm_cache()
    return Response(status_code=200)

reset_prefix_cache async

reset_prefix_cache(
    raw_request: Request,
    reset_running_requests: bool = Query(default=False),
    reset_external: bool = Query(default=False),
)

Reset the local prefix cache.

Optionally, if the query parameter reset_external=true also resets the external (connector-managed) prefix cache.

Note that we currently do not check if the prefix cache is successfully reset in the API server.

Example

POST /reset_prefix_cache?reset_external=true

Source code in vllm/entrypoints/serve/cache/api_router.py
@router.post("/reset_prefix_cache")
async def reset_prefix_cache(
    raw_request: Request,
    reset_running_requests: bool = Query(default=False),
    reset_external: bool = Query(default=False),
):
    """
    Reset the local prefix cache.

    Optionally, if the query parameter `reset_external=true`
    also resets the external (connector-managed) prefix cache.

    Note that we currently do not check if the prefix cache
    is successfully reset in the API server.

    Example:
       POST /reset_prefix_cache?reset_external=true
    """
    logger.info("Resetting prefix cache...")

    await engine_client(raw_request).reset_prefix_cache(
        reset_running_requests, reset_external
    )
    return Response(status_code=200)