Files
TalkEdit/backend/routers/local_llm.py

55 lines
1.5 KiB
Python
Raw Normal View History

2026-05-07 01:32:19 -06:00
"""Local LLM endpoints for bundled Qwen3 inference."""
import logging
from typing import Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from services.local_llm import get_status, download_model, complete
logger = logging.getLogger(__name__)
router = APIRouter()
class CompleteRequest(BaseModel):
prompt: str
model_id: str = "qwen3-1.7b"
system_prompt: Optional[str] = None
temperature: float = 0.3
max_tokens: int = 2048
@router.get("/local-llm/status")
async def llm_status():
try:
return get_status()
except Exception as e:
logger.error(f"Local LLM status failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/local-llm/download")
async def llm_download(model_id: str = "qwen3-1.7b"):
try:
return download_model(model_id)
except Exception as e:
logger.error(f"Local LLM download failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.post("/local-llm/complete")
async def llm_complete(req: CompleteRequest):
try:
result = complete(
prompt=req.prompt,
model_id=req.model_id,
system_prompt=req.system_prompt,
temperature=req.temperature,
max_tokens=req.max_tokens,
)
return {"response": result}
except Exception as e:
logger.error(f"Local LLM completion failed: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))