"""Local LLM endpoints for bundled Qwen3 inference.""" import logging from typing import Optional from fastapi import APIRouter, HTTPException from pydantic import BaseModel from services.local_llm import get_status, download_model, complete logger = logging.getLogger(__name__) router = APIRouter() class CompleteRequest(BaseModel): prompt: str model_id: str = "qwen3-1.7b" system_prompt: Optional[str] = None temperature: float = 0.3 max_tokens: int = 2048 @router.get("/local-llm/status") async def llm_status(): try: return get_status() except Exception as e: logger.error(f"Local LLM status failed: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.post("/local-llm/download") async def llm_download(model_id: str = "qwen3-1.7b"): try: return download_model(model_id) except Exception as e: logger.error(f"Local LLM download failed: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.post("/local-llm/complete") async def llm_complete(req: CompleteRequest): try: result = complete( prompt=req.prompt, model_id=req.model_id, system_prompt=req.system_prompt, temperature=req.temperature, max_tokens=req.max_tokens, ) return {"response": result} except Exception as e: logger.error(f"Local LLM completion failed: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e))