Add local LLM router and service

2026-05-07 01:32:19 -06:00
parent 1993aabeac
commit 124f215a0a
2 changed files with 179 additions and 0 deletions
--- a/backend/routers/local_llm.py
+++ b/backend/routers/local_llm.py
@ -0,0 +1,54 @@
+"""Local LLM endpoints for bundled Qwen3 inference."""
+
+import logging
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from services.local_llm import get_status, download_model, complete
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+
+class CompleteRequest(BaseModel):
+    prompt: str
+    model_id: str = "qwen3-1.7b"
+    system_prompt: Optional[str] = None
+    temperature: float = 0.3
+    max_tokens: int = 2048
+
+
+@router.get("/local-llm/status")
+async def llm_status():
+    try:
+        return get_status()
+    except Exception as e:
+        logger.error(f"Local LLM status failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/local-llm/download")
+async def llm_download(model_id: str = "qwen3-1.7b"):
+    try:
+        return download_model(model_id)
+    except Exception as e:
+        logger.error(f"Local LLM download failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/local-llm/complete")
+async def llm_complete(req: CompleteRequest):
+    try:
+        result = complete(
+            prompt=req.prompt,
+            model_id=req.model_id,
+            system_prompt=req.system_prompt,
+            temperature=req.temperature,
+            max_tokens=req.max_tokens,
+        )
+        return {"response": result}
+    except Exception as e:
+        logger.error(f"Local LLM completion failed: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))