able to re-transcribe
This commit is contained in:
@ -55,6 +55,13 @@ class ExportRequest(BaseModel):
|
||||
deleted_indices: Optional[List[int]] = None
|
||||
|
||||
|
||||
class TranscriptExportRequest(BaseModel):
|
||||
words: List[ExportWordModel]
|
||||
deleted_indices: Optional[List[int]] = None
|
||||
output_path: str
|
||||
format: str = "txt" # "txt" or "srt"
|
||||
|
||||
|
||||
def _map_ranges_to_output_timeline(
|
||||
ranges: List[dict],
|
||||
keep_segments: List[dict],
|
||||
@ -234,3 +241,34 @@ async def export_video(req: ExportRequest):
|
||||
except Exception as e:
|
||||
logger.error(f"Export error: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/export/transcript")
|
||||
async def export_transcript(req: TranscriptExportRequest):
|
||||
"""Export transcript as plain text or SRT without rendering video."""
|
||||
try:
|
||||
from services.caption_generator import generate_srt
|
||||
|
||||
deleted_set = set(req.deleted_indices or [])
|
||||
word_dicts = [w.model_dump() for w in req.words]
|
||||
|
||||
if req.format == "srt":
|
||||
content = generate_srt(word_dicts, deleted_set)
|
||||
else:
|
||||
# Plain text: join non-deleted words
|
||||
active_words = []
|
||||
for i, w in enumerate(word_dicts):
|
||||
if i not in deleted_set:
|
||||
active_words.append(w["word"])
|
||||
content = " ".join(active_words)
|
||||
|
||||
os.makedirs(os.path.dirname(req.output_path) or ".", exist_ok=True)
|
||||
with open(req.output_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
logger.info("Transcript exported to %s (format=%s)", req.output_path, req.format)
|
||||
return {"status": "ok", "output_path": req.output_path}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Transcript export failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@ -51,3 +51,99 @@ async def transcribe(req: TranscribeRequest):
|
||||
except Exception as e:
|
||||
logger.error(f"Transcription failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
class ReTranscribeSegmentRequest(BaseModel):
|
||||
file_path: str
|
||||
start: float
|
||||
end: float
|
||||
model: str = "base"
|
||||
language: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/transcribe/segment")
|
||||
async def transcribe_segment(req: ReTranscribeSegmentRequest):
|
||||
"""
|
||||
Re-transcribe a specific segment of audio.
|
||||
Extracts the segment with FFmpeg, transcribes it, and returns words
|
||||
with timestamps adjusted to the original file timeline.
|
||||
"""
|
||||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
try:
|
||||
# Extract the segment to a temp file
|
||||
tmp_dir = tempfile.mkdtemp(prefix="talkedit_segment_")
|
||||
segment_path = os.path.join(tmp_dir, "segment.wav")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", req.file_path,
|
||||
"-ss", str(req.start),
|
||||
"-to", str(req.end),
|
||||
"-vn",
|
||||
"-acodec", "pcm_s16le",
|
||||
"-ar", "16000",
|
||||
"-ac", "1",
|
||||
segment_path,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Segment extraction failed: {result.stderr[-300:]}")
|
||||
|
||||
# Transcribe the segment — try GPU first, fall back to CPU
|
||||
try:
|
||||
segment_result = transcribe_audio(
|
||||
file_path=segment_path,
|
||||
model_name=req.model,
|
||||
use_gpu=True,
|
||||
use_cache=False,
|
||||
language=req.language,
|
||||
)
|
||||
except Exception as gpu_err:
|
||||
logger.warning(f"GPU transcription failed (%s), falling back to CPU", gpu_err)
|
||||
segment_result = transcribe_audio(
|
||||
file_path=segment_path,
|
||||
model_name=req.model,
|
||||
use_gpu=False,
|
||||
use_cache=False,
|
||||
language=req.language,
|
||||
)
|
||||
|
||||
# Adjust timestamps to be relative to the original file
|
||||
offset = req.start
|
||||
adjusted_words = []
|
||||
for w in segment_result.get("words", []):
|
||||
w["start"] = round(w["start"] + offset, 3)
|
||||
w["end"] = round(w["end"] + offset, 3)
|
||||
adjusted_words.append(w)
|
||||
|
||||
adjusted_segments = []
|
||||
for seg in segment_result.get("segments", []):
|
||||
seg["start"] = round(seg["start"] + offset, 3)
|
||||
seg["end"] = round(seg["end"] + offset, 3)
|
||||
# Also adjust words within each segment
|
||||
for w in seg.get("words", []):
|
||||
w["start"] = round(w["start"] + offset, 3)
|
||||
w["end"] = round(w["end"] + offset, 3)
|
||||
adjusted_segments.append(seg)
|
||||
|
||||
# Cleanup
|
||||
try:
|
||||
os.remove(segment_path)
|
||||
os.rmdir(tmp_dir)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return {
|
||||
"words": adjusted_words,
|
||||
"segments": adjusted_segments,
|
||||
"language": segment_result.get("language", "en"),
|
||||
}
|
||||
|
||||
except FileNotFoundError:
|
||||
raise HTTPException(status_code=404, detail=f"File not found: {req.file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Segment transcription failed: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
Reference in New Issue
Block a user