Compare commits
3 Commits
0c7a4c94c2
...
dd4ce58920
| Author | SHA1 | Date | |
|---|---|---|---|
| dd4ce58920 | |||
| 5758401dda | |||
| 90b1999a57 |
10
.github/copilot-instructions.md
vendored
10
.github/copilot-instructions.md
vendored
@ -55,6 +55,16 @@ Use project virtualenvs where available (`.venv312`, `.venv`, or `venv`) for bac
|
|||||||
- Media URL handling between project load paths should remain consistent to avoid format-specific regressions (especially WAV/MP3 behavior).
|
- Media URL handling between project load paths should remain consistent to avoid format-specific regressions (especially WAV/MP3 behavior).
|
||||||
- Export pipeline changes must preserve caption modes (`none`, `sidecar`, `burn-in`) and audio enhancement behavior.
|
- Export pipeline changes must preserve caption modes (`none`, `sidecar`, `burn-in`) and audio enhancement behavior.
|
||||||
|
|
||||||
|
## Recent Changes
|
||||||
|
|
||||||
|
### 2026-05-04 — Word text correction, low-confidence highlighting, audio normalization
|
||||||
|
|
||||||
|
- **Word text correction (#015)**: Double-click any word in the transcript editor to edit its text inline. Press Enter to commit, Escape to cancel. State is updated in both `words[]` and `segments[]` arrays (segment text recomposed from updated words). Pure frontend; no backend changes needed.
|
||||||
|
- **Low-confidence word highlighting (#012)**: Words with `confidence < threshold` (default 0.6, configurable in Settings panel) render with an orange dotted underline. Tooltip shows exact confidence percentage. Threshold is persisted in `localStorage` key `talkedit:confidenceThreshold`.
|
||||||
|
- **Audio normalization (#018)**: New backend endpoint `POST /audio/normalize` in `backend/routers/audio.py`. Two-pass FFmpeg `loudnorm` (measure then apply) implemented in `backend/services/audio_cleaner.py:normalize_audio()`. Falls back to single-pass if measurement fails. Frontend UI in Export panel: target selector (YouTube -14, Spotify -16, Broadcast -23, etc.) with "Normalize" button.
|
||||||
|
- **Store**: New `updateWordText(index, text)` action in `editorStore.ts` updates both `words[]` and recomputes `segments[].text`.
|
||||||
|
- **Settings panel**: New confidence threshold slider (0–1 range).
|
||||||
|
|
||||||
## Update Rules (Important)
|
## Update Rules (Important)
|
||||||
|
|
||||||
When a task changes architecture, app wiring, commands, API shape, project schema, or major conventions, update this file before finishing.
|
When a task changes architecture, app wiring, commands, API shape, project schema, or major conventions, update this file before finishing.
|
||||||
|
|||||||
@ -6,13 +6,13 @@ Features are grouped by priority. Check off items as they are implemented.
|
|||||||
|
|
||||||
## 🔴 Highest Impact Next — Conversion and retention
|
## 🔴 Highest Impact Next — Conversion and retention
|
||||||
|
|
||||||
- [ ] [#015] **Word text correction** — allow editing the transcript text of a word without affecting its timing. Whisper gets homophones/proper nouns wrong constantly. Pure frontend state change; no backend needed.
|
- [x] [#015] **Word text correction** — double-click any word to edit its text in-place. Preserves timing and confidence. Pure frontend state change. (2026-05-04)
|
||||||
|
|
||||||
- [ ] [#013] **Re-transcribe selection** — if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language).
|
- [ ] [#013] **Re-transcribe selection** — if Whisper gets a section wrong, let the user select a word range and re-run transcription on just that segment (optionally with a different model or language).
|
||||||
|
|
||||||
- [ ] [#012] **Low-confidence word highlighting** — WhisperX already returns `confidence` per word. Words below a threshold (e.g. < 0.6) should be visually underlined or tinted so the user knows where to double-check.
|
- [x] [#012] **Low-confidence word highlighting** — words with `confidence < 0.6` (configurable in Settings) get an orange dotted underline. Hover shows exact confidence %. (2026-05-04)
|
||||||
|
|
||||||
- [ ] [#018] **Audio normalization / loudness targeting** — single "Normalize" button that targets a LUFS level (-14 for YouTube, -16 for Spotify). Backend: `ffmpeg -af loudnorm`. Very high value for podcasters, ~2–3 hours of work.
|
- [x] [#018] **Audio normalization / loudness targeting** — "Normalize" button in Export panel with LUFS target selector (-14 YouTube, -16 Spotify, -23 Broadcast). Backend: FFmpeg two-pass `loudnorm`. (2026-05-04)
|
||||||
|
|
||||||
- [ ] [#024] **Export to transcript text / SRT only** — some users just want a clean `.txt` or `.srt` of the edited transcript without rendering video.
|
- [ ] [#024] **Export to transcript text / SRT only** — some users just want a clean `.txt` or `.srt` of the edited transcript without rendering video.
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@ from fastapi import APIRouter, HTTPException, Query, Request
|
|||||||
from fastapi.responses import FileResponse
|
from fastapi.responses import FileResponse
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available
|
from services.audio_cleaner import clean_audio, detect_silence_ranges, is_deepfilter_available, normalize_audio
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
@ -164,3 +164,30 @@ async def get_waveform_audio(request: Request, path: str = Query(...)):
|
|||||||
)
|
)
|
||||||
_waveform_cache[cache_key] = str(out_wav)
|
_waveform_cache[cache_key] = str(out_wav)
|
||||||
return FileResponse(str(out_wav), media_type="audio/wav")
|
return FileResponse(str(out_wav), media_type="audio/wav")
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizeRequest(BaseModel):
|
||||||
|
input_path: str
|
||||||
|
output_path: Optional[str] = None
|
||||||
|
target_lufs: float = -14.0
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/audio/normalize")
|
||||||
|
async def normalize_audio_endpoint(req: NormalizeRequest):
|
||||||
|
"""Normalize audio loudness to a target LUFS level using FFmpeg loudnorm."""
|
||||||
|
if req.target_lufs < -70 or req.target_lufs > 0:
|
||||||
|
raise HTTPException(status_code=400, detail="target_lufs must be between -70 and 0")
|
||||||
|
try:
|
||||||
|
output = normalize_audio(
|
||||||
|
req.input_path,
|
||||||
|
req.output_path or "",
|
||||||
|
target_lufs=req.target_lufs,
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"output_path": output,
|
||||||
|
"target_lufs": req.target_lufs,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Audio normalization failed: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|||||||
@ -48,6 +48,8 @@ class ExportRequest(BaseModel):
|
|||||||
resolution: str = "1080p"
|
resolution: str = "1080p"
|
||||||
format: str = "mp4"
|
format: str = "mp4"
|
||||||
enhanceAudio: bool = False
|
enhanceAudio: bool = False
|
||||||
|
normalize_loudness: bool = False
|
||||||
|
normalize_target_lufs: float = -14.0
|
||||||
captions: str = "none"
|
captions: str = "none"
|
||||||
words: Optional[List[ExportWordModel]] = None
|
words: Optional[List[ExportWordModel]] = None
|
||||||
deleted_indices: Optional[List[int]] = None
|
deleted_indices: Optional[List[int]] = None
|
||||||
@ -166,6 +168,8 @@ async def export_video(req: ExportRequest):
|
|||||||
gain_ranges=mapped_gain_segments,
|
gain_ranges=mapped_gain_segments,
|
||||||
speed_ranges=speed_segments,
|
speed_ranges=speed_segments,
|
||||||
global_gain_db=req.global_gain_db,
|
global_gain_db=req.global_gain_db,
|
||||||
|
normalize_loudness=req.normalize_loudness,
|
||||||
|
normalize_target_lufs=req.normalize_target_lufs,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
output = export_reencode(
|
output = export_reencode(
|
||||||
@ -178,6 +182,8 @@ async def export_video(req: ExportRequest):
|
|||||||
gain_ranges=mapped_gain_segments,
|
gain_ranges=mapped_gain_segments,
|
||||||
speed_ranges=speed_segments,
|
speed_ranges=speed_segments,
|
||||||
global_gain_db=req.global_gain_db,
|
global_gain_db=req.global_gain_db,
|
||||||
|
normalize_loudness=req.normalize_loudness,
|
||||||
|
normalize_target_lufs=req.normalize_target_lufs,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
if ass_path and os.path.exists(ass_path):
|
if ass_path and os.path.exists(ass_path):
|
||||||
|
|||||||
@ -158,3 +158,125 @@ def detect_silence_ranges(input_path: str, min_silence_ms: int, silence_db: floa
|
|||||||
silence_db,
|
silence_db,
|
||||||
)
|
)
|
||||||
return ranges
|
return ranges
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_audio(
|
||||||
|
input_path: str,
|
||||||
|
output_path: str = "",
|
||||||
|
target_lufs: float = -14.0,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Normalize audio loudness to a target LUFS level using FFmpeg's loudnorm filter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_path: Path to the input audio/video file.
|
||||||
|
output_path: Path for the normalized output. Auto-generated if empty.
|
||||||
|
target_lufs: Target integrated loudness in LUFS.
|
||||||
|
Common targets: -14 (YouTube), -16 (Spotify), -23 (broadcast).
|
||||||
|
|
||||||
|
Returns: path to the normalized audio file.
|
||||||
|
"""
|
||||||
|
import os as _os
|
||||||
|
|
||||||
|
inp = Path(input_path)
|
||||||
|
if not output_path:
|
||||||
|
output_path = str(inp.with_stem(inp.stem + "_normalized"))
|
||||||
|
|
||||||
|
# Two-pass loudnorm: first pass measures loudness, second pass applies correction.
|
||||||
|
# First pass: measure only (print_format=json)
|
||||||
|
measure_cmd = [
|
||||||
|
"ffmpeg", "-y",
|
||||||
|
"-i", str(inp),
|
||||||
|
"-af", f"loudnorm=I={target_lufs}:LRA=7:TP=-1.5:print_format=json",
|
||||||
|
"-f", "null",
|
||||||
|
"-",
|
||||||
|
]
|
||||||
|
logger.info("Running loudnorm first pass (measurement): %s", " ".join(measure_cmd))
|
||||||
|
measure_result = subprocess.run(measure_cmd, capture_output=True, text=True)
|
||||||
|
|
||||||
|
# Parse measured parameters from stderr (loudnorm outputs JSON to stderr)
|
||||||
|
measured = _parse_loudnorm_measurement(measure_result.stderr)
|
||||||
|
if not measured:
|
||||||
|
logger.warning(
|
||||||
|
"loudnorm measurement failed or produced no output; "
|
||||||
|
"falling back to single-pass normalization"
|
||||||
|
)
|
||||||
|
# Single-pass fallback
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg", "-y",
|
||||||
|
"-i", str(inp),
|
||||||
|
"-af", f"loudnorm=I={target_lufs}:LRA=7:TP=-1.5",
|
||||||
|
"-c:v", "copy",
|
||||||
|
output_path,
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"Audio normalization failed: {result.stderr[-300:]}")
|
||||||
|
logger.info("Single-pass normalized audio saved to %s", output_path)
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
# Second pass: apply normalization using measured values
|
||||||
|
input_i = measured.get("input_i", target_lufs)
|
||||||
|
input_lra = measured.get("input_lra", 7.0)
|
||||||
|
input_tp = measured.get("input_tp", -1.5)
|
||||||
|
input_thresh = measured.get("input_thresh", -30.0)
|
||||||
|
offset = measured.get("target_offset", 0.0)
|
||||||
|
|
||||||
|
apply_cmd = [
|
||||||
|
"ffmpeg", "-y",
|
||||||
|
"-i", str(inp),
|
||||||
|
"-af",
|
||||||
|
(
|
||||||
|
f"loudnorm=I={target_lufs}:LRA=7:TP=-1.5:"
|
||||||
|
f"measured_I={input_i}:measured_LRA={input_lra}:"
|
||||||
|
f"measured_TP={input_tp}:measured_thresh={input_thresh}:"
|
||||||
|
f"offset={offset}:linear=true:print_format=summary"
|
||||||
|
),
|
||||||
|
"-c:v", "copy",
|
||||||
|
output_path,
|
||||||
|
]
|
||||||
|
logger.info("Running loudnorm second pass (apply): %s", " ".join(apply_cmd))
|
||||||
|
result = subprocess.run(apply_cmd, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"Audio normalization (apply) failed: {result.stderr[-300:]}")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Normalized audio saved to %s (target=%s LUFS, measured_I=%s)",
|
||||||
|
output_path,
|
||||||
|
target_lufs,
|
||||||
|
input_i,
|
||||||
|
)
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_loudnorm_measurement(stderr_output: str) -> dict:
|
||||||
|
"""Parse loudnorm JSON measurement output from FFmpeg stderr."""
|
||||||
|
import json
|
||||||
|
|
||||||
|
# loudnorm prints JSON block between "Parsed_loudnorm" lines
|
||||||
|
lines = stderr_output.split("\n")
|
||||||
|
json_lines = []
|
||||||
|
in_json = False
|
||||||
|
for line in lines:
|
||||||
|
if "Parsed_loudnorm" in line and "}" in line:
|
||||||
|
# Single-line JSON
|
||||||
|
try:
|
||||||
|
start = line.index("{")
|
||||||
|
end = line.rindex("}") + 1
|
||||||
|
return json.loads(line[start:end])
|
||||||
|
except (ValueError, json.JSONDecodeError):
|
||||||
|
continue
|
||||||
|
if "{" in line and not in_json:
|
||||||
|
in_json = True
|
||||||
|
if in_json:
|
||||||
|
json_lines.append(line)
|
||||||
|
if in_json and "}" in line:
|
||||||
|
in_json = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if json_lines:
|
||||||
|
try:
|
||||||
|
return json.loads("".join(json_lines))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
return {}
|
||||||
|
|||||||
@ -13,6 +13,20 @@ from typing import List
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_codec_args(format_hint: str, has_video: bool = True) -> list:
|
||||||
|
"""Return FFmpeg codec arguments for the given format."""
|
||||||
|
if format_hint == "wav":
|
||||||
|
return ["-c:a", "pcm_s16le"]
|
||||||
|
if format_hint == "webm":
|
||||||
|
if has_video:
|
||||||
|
return ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
|
||||||
|
return ["-c:a", "libopus", "-b:a", "160k"]
|
||||||
|
# Default: MP4
|
||||||
|
if has_video:
|
||||||
|
return ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
|
||||||
|
return ["-c:a", "aac", "-b:a", "192k"]
|
||||||
|
|
||||||
|
|
||||||
def _input_has_video_stream(ffmpeg_cmd: str, input_path: str) -> bool:
|
def _input_has_video_stream(ffmpeg_cmd: str, input_path: str) -> bool:
|
||||||
"""Return True if the input contains at least one video stream."""
|
"""Return True if the input contains at least one video stream."""
|
||||||
ffprobe = ffmpeg_cmd.replace("ffmpeg", "ffprobe")
|
ffprobe = ffmpeg_cmd.replace("ffmpeg", "ffprobe")
|
||||||
@ -135,7 +149,7 @@ def export_stream_copy(
|
|||||||
output_path on success
|
output_path on success
|
||||||
"""
|
"""
|
||||||
if mute_ranges:
|
if mute_ranges:
|
||||||
# Mute ranges require audio filtering, so fall back to re-encoding
|
# Mute ranges require audio filtering, so fall back to re-encode
|
||||||
return export_reencode(input_path, output_path, keep_segments, "1080p", "mp4", mute_ranges)
|
return export_reencode(input_path, output_path, keep_segments, "1080p", "mp4", mute_ranges)
|
||||||
ffmpeg = _find_ffmpeg()
|
ffmpeg = _find_ffmpeg()
|
||||||
if not _input_has_video_stream(ffmpeg, input_path):
|
if not _input_has_video_stream(ffmpeg, input_path):
|
||||||
@ -209,6 +223,8 @@ def export_reencode(
|
|||||||
gain_ranges: List[dict] = None,
|
gain_ranges: List[dict] = None,
|
||||||
speed_ranges: List[dict] = None,
|
speed_ranges: List[dict] = None,
|
||||||
global_gain_db: float = 0.0,
|
global_gain_db: float = 0.0,
|
||||||
|
normalize_loudness: bool = False,
|
||||||
|
normalize_target_lufs: float = -14.0,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Export video with full re-encode. Slower but supports resolution changes,
|
Export video with full re-encode. Slower but supports resolution changes,
|
||||||
@ -241,6 +257,9 @@ def export_reencode(
|
|||||||
end = mute_range['end']
|
end = mute_range['end']
|
||||||
filters.append(f"volume=0:enable='between(t,{start},{end})'")
|
filters.append(f"volume=0:enable='between(t,{start},{end})'")
|
||||||
|
|
||||||
|
if normalize_loudness:
|
||||||
|
filters.append(f"loudnorm=I={normalize_target_lufs}:LRA=7:TP=-1.5")
|
||||||
|
|
||||||
return ",".join(filters) if filters else "anull"
|
return ",".join(filters) if filters else "anull"
|
||||||
|
|
||||||
has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6
|
has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6
|
||||||
@ -278,16 +297,14 @@ def export_reencode(
|
|||||||
|
|
||||||
filter_complex = "".join(filter_parts)
|
filter_complex = "".join(filter_parts)
|
||||||
|
|
||||||
audio_codec_args = ["-c:a", "aac", "-b:a", "192k"]
|
codec_args = _get_codec_args(format_hint, has_video=False)
|
||||||
if format_hint == "webm":
|
|
||||||
audio_codec_args = ["-c:a", "libopus", "-b:a", "160k"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
ffmpeg, "-y",
|
ffmpeg, "-y",
|
||||||
"-i", input_path,
|
"-i", input_path,
|
||||||
"-filter_complex", filter_complex,
|
"-filter_complex", filter_complex,
|
||||||
"-map", audio_map,
|
"-map", audio_map,
|
||||||
*audio_codec_args,
|
*codec_args,
|
||||||
output_path,
|
output_path,
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -319,9 +336,7 @@ def export_reencode(
|
|||||||
|
|
||||||
filter_complex = f"[0:a]{audio_filter}[a];[0:v]{video_filter}{video_map}"
|
filter_complex = f"[0:a]{audio_filter}[a];[0:v]{video_filter}{video_map}"
|
||||||
|
|
||||||
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
|
codec_args = _get_codec_args(format_hint, has_video)
|
||||||
if format_hint == "webm":
|
|
||||||
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
ffmpeg, "-y",
|
ffmpeg, "-y",
|
||||||
@ -367,6 +382,12 @@ def export_reencode(
|
|||||||
|
|
||||||
filter_complex = "".join(filter_parts)
|
filter_complex = "".join(filter_parts)
|
||||||
|
|
||||||
|
# Add loudnorm to the cutting path audio chain if enabled
|
||||||
|
audio_map_label = "[outa]"
|
||||||
|
if normalize_loudness:
|
||||||
|
filter_complex += f";{audio_map_label}loudnorm=I={normalize_target_lufs}:LRA=7:TP=-1.5[outa_norm]"
|
||||||
|
audio_map_label = "[outa_norm]"
|
||||||
|
|
||||||
scale = scale_map.get(resolution, "")
|
scale = scale_map.get(resolution, "")
|
||||||
if scale:
|
if scale:
|
||||||
filter_complex += f";[outv]{scale}[outv_scaled]"
|
filter_complex += f";[outv]{scale}[outv_scaled]"
|
||||||
@ -374,25 +395,24 @@ def export_reencode(
|
|||||||
else:
|
else:
|
||||||
video_map = "[outv]"
|
video_map = "[outv]"
|
||||||
|
|
||||||
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
|
codec_args = _get_codec_args(format_hint, has_video)
|
||||||
if format_hint == "webm":
|
|
||||||
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
ffmpeg, "-y",
|
ffmpeg, "-y",
|
||||||
"-i", input_path,
|
"-i", input_path,
|
||||||
"-filter_complex", filter_complex,
|
"-filter_complex", filter_complex,
|
||||||
"-map", video_map,
|
"-map", video_map,
|
||||||
"-map", "[outa]",
|
"-map", audio_map_label,
|
||||||
*codec_args,
|
*codec_args,
|
||||||
"-movflags", "+faststart",
|
"-movflags", "+faststart",
|
||||||
output_path,
|
output_path,
|
||||||
]
|
]
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Re-encoding %s segments (speed-adjusted=%s) -> %s (%s)",
|
"Re-encoding %s segments (speed-adjusted=%s, normalize=%s) -> %s (%s)",
|
||||||
n,
|
n,
|
||||||
has_speed,
|
has_speed,
|
||||||
|
normalize_loudness,
|
||||||
output_path,
|
output_path,
|
||||||
resolution,
|
resolution,
|
||||||
)
|
)
|
||||||
@ -415,6 +435,8 @@ def export_reencode_with_subs(
|
|||||||
gain_ranges: List[dict] = None,
|
gain_ranges: List[dict] = None,
|
||||||
speed_ranges: List[dict] = None,
|
speed_ranges: List[dict] = None,
|
||||||
global_gain_db: float = 0.0,
|
global_gain_db: float = 0.0,
|
||||||
|
normalize_loudness: bool = False,
|
||||||
|
normalize_target_lufs: float = -14.0,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Export video with re-encode and burn-in subtitles (ASS format).
|
Export video with re-encode and burn-in subtitles (ASS format).
|
||||||
@ -451,6 +473,9 @@ def export_reencode_with_subs(
|
|||||||
end = mute_range['end']
|
end = mute_range['end']
|
||||||
filters.append(f"volume=0:enable='between(t,{start},{end})'")
|
filters.append(f"volume=0:enable='between(t,{start},{end})'")
|
||||||
|
|
||||||
|
if normalize_loudness:
|
||||||
|
filters.append(f"loudnorm=I={normalize_target_lufs}:LRA=7:TP=-1.5")
|
||||||
|
|
||||||
return ",".join(filters) if filters else "anull"
|
return ",".join(filters) if filters else "anull"
|
||||||
|
|
||||||
has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6
|
has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6
|
||||||
@ -472,9 +497,7 @@ def export_reencode_with_subs(
|
|||||||
|
|
||||||
filter_complex = f"[0:a]{audio_filter}[a];[0:v]{video_filter}[v]"
|
filter_complex = f"[0:a]{audio_filter}[a];[0:v]{video_filter}[v]"
|
||||||
|
|
||||||
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
|
codec_args = _get_codec_args(format_hint, has_video=True)
|
||||||
if format_hint == "webm":
|
|
||||||
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
ffmpeg, "-y",
|
ffmpeg, "-y",
|
||||||
@ -530,9 +553,7 @@ def export_reencode_with_subs(
|
|||||||
filter_complex += f";[outv]ass='{escaped_sub}'[outv_final]"
|
filter_complex += f";[outv]ass='{escaped_sub}'[outv_final]"
|
||||||
video_map = "[outv_final]"
|
video_map = "[outv_final]"
|
||||||
|
|
||||||
codec_args = ["-c:v", "libx264", "-preset", "medium", "-crf", "18", "-c:a", "aac", "-b:a", "192k"]
|
codec_args = _get_codec_args(format_hint, has_video=True)
|
||||||
if format_hint == "webm":
|
|
||||||
codec_args = ["-c:v", "libvpx-vp9", "-crf", "30", "-b:v", "0", "-c:a", "libopus"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
ffmpeg, "-y",
|
ffmpeg, "-y",
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import { useState, useCallback } from 'react';
|
import { useState, useCallback } from 'react';
|
||||||
import { useEditorStore } from '../store/editorStore';
|
import { useEditorStore } from '../store/editorStore';
|
||||||
import { Download, Loader2, Zap, Cog, Info } from 'lucide-react';
|
import { Download, Loader2, Zap, Cog, Info, Volume2 } from 'lucide-react';
|
||||||
import type { ExportOptions } from '../types/project';
|
import type { ExportOptions } from '../types/project';
|
||||||
|
|
||||||
export default function ExportDialog() {
|
export default function ExportDialog() {
|
||||||
@ -9,25 +9,40 @@ export default function ExportDialog() {
|
|||||||
|
|
||||||
const hasCuts = cutRanges.length > 0;
|
const hasCuts = cutRanges.length > 0;
|
||||||
|
|
||||||
const [options, setOptions] = useState<Omit<ExportOptions, 'outputPath'>>({
|
// Detect if input is audio-only by its extension
|
||||||
mode: 'fast',
|
const audioExtensions = new Set(['.wav', '.mp3', '.flac', '.m4a', '.ogg', '.aac', '.wma']);
|
||||||
|
const inputExt = videoPath ? '.' + videoPath.split('.').pop()?.toLowerCase() : '';
|
||||||
|
const isAudioOnly = videoPath ? audioExtensions.has(inputExt) : false;
|
||||||
|
|
||||||
|
const [options, setOptions] = useState<Omit<ExportOptions, 'outputPath'> & { normalizeAudio: boolean; normalizeTarget: number }>({
|
||||||
|
mode: isAudioOnly ? 'reencode' : 'fast',
|
||||||
resolution: '1080p',
|
resolution: '1080p',
|
||||||
format: 'mp4',
|
format: isAudioOnly ? 'wav' : 'mp4',
|
||||||
enhanceAudio: false,
|
enhanceAudio: false,
|
||||||
captions: 'none',
|
captions: 'none',
|
||||||
|
normalizeAudio: false,
|
||||||
|
normalizeTarget: -14,
|
||||||
});
|
});
|
||||||
const [exportError, setExportError] = useState<string | null>(null);
|
const [exportError, setExportError] = useState<string | null>(null);
|
||||||
|
|
||||||
|
const HANDLE_EXPORT_filters = useCallback(() => {
|
||||||
|
const ext = options.format;
|
||||||
|
const nameMap: Record<string, string> = {
|
||||||
|
mp4: 'MP4',
|
||||||
|
mov: 'MOV',
|
||||||
|
webm: 'WebM',
|
||||||
|
wav: 'WAV Audio',
|
||||||
|
};
|
||||||
|
return [{ name: nameMap[ext] || 'File', extensions: [ext] }];
|
||||||
|
}, [options.format]);
|
||||||
|
|
||||||
const handleExport = useCallback(async () => {
|
const handleExport = useCallback(async () => {
|
||||||
if (!videoPath) return;
|
if (!videoPath) return;
|
||||||
|
|
||||||
|
const defaultExt = options.format === 'wav' ? 'wav' : 'mp4';
|
||||||
const outputPath = await window.electronAPI?.saveFile({
|
const outputPath = await window.electronAPI?.saveFile({
|
||||||
defaultPath: videoPath.replace(/\.[^.]+$/, '_edited.mp4'),
|
defaultPath: videoPath.replace(/\.[^.]+$/, `_edited.${defaultExt}`),
|
||||||
filters: [
|
filters: HANDLE_EXPORT_filters(),
|
||||||
{ name: 'MP4', extensions: ['mp4'] },
|
|
||||||
{ name: 'MOV', extensions: ['mov'] },
|
|
||||||
{ name: 'WebM', extensions: ['webm'] },
|
|
||||||
],
|
|
||||||
});
|
});
|
||||||
if (!outputPath) return;
|
if (!outputPath) return;
|
||||||
|
|
||||||
@ -44,6 +59,18 @@ export default function ExportDialog() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Map frontend camelCase gain/speed fields to backend snake_case
|
||||||
|
const backendGainRanges = gainRanges.map((r) => ({
|
||||||
|
start: r.start,
|
||||||
|
end: r.end,
|
||||||
|
gain_db: r.gainDb,
|
||||||
|
}));
|
||||||
|
const backendSpeedRanges = speedRanges.map((r) => ({
|
||||||
|
start: r.start,
|
||||||
|
end: r.end,
|
||||||
|
speed: r.speed,
|
||||||
|
}));
|
||||||
|
|
||||||
const res = await fetch(`${backendUrl}/export`, {
|
const res = await fetch(`${backendUrl}/export`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
@ -51,13 +78,19 @@ export default function ExportDialog() {
|
|||||||
input_path: videoPath,
|
input_path: videoPath,
|
||||||
output_path: outputPath,
|
output_path: outputPath,
|
||||||
keep_segments: keepSegments,
|
keep_segments: keepSegments,
|
||||||
mute_ranges: muteRanges,
|
mute_ranges: muteRanges.length > 0 ? muteRanges.map((r) => ({ start: r.start, end: r.end })) : undefined,
|
||||||
gain_ranges: gainRanges,
|
gain_ranges: backendGainRanges.length > 0 ? backendGainRanges : undefined,
|
||||||
speed_ranges: speedRanges,
|
speed_ranges: backendSpeedRanges.length > 0 ? backendSpeedRanges : undefined,
|
||||||
global_gain_db: globalGainDb,
|
global_gain_db: globalGainDb,
|
||||||
words: options.captions !== 'none' ? words : undefined,
|
words: options.captions !== 'none' ? words : undefined,
|
||||||
deleted_indices: options.captions !== 'none' ? [...deletedSet] : undefined,
|
deleted_indices: options.captions !== 'none' ? [...deletedSet] : undefined,
|
||||||
...options,
|
mode: options.mode,
|
||||||
|
resolution: options.resolution,
|
||||||
|
format: options.format,
|
||||||
|
enhanceAudio: options.enhanceAudio,
|
||||||
|
normalize_loudness: options.normalizeAudio,
|
||||||
|
normalize_target_lufs: options.normalizeTarget,
|
||||||
|
captions: options.captions,
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
@ -76,7 +109,7 @@ export default function ExportDialog() {
|
|||||||
setExportError(err instanceof Error ? err.message : 'Export failed');
|
setExportError(err instanceof Error ? err.message : 'Export failed');
|
||||||
setExporting(false);
|
setExporting(false);
|
||||||
}
|
}
|
||||||
}, [videoPath, options, backendUrl, setExporting, getKeepSegments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, words]);
|
}, [videoPath, options, backendUrl, setExporting, getKeepSegments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="p-4 space-y-5">
|
<div className="p-4 space-y-5">
|
||||||
@ -126,9 +159,44 @@ export default function ExportDialog() {
|
|||||||
{ value: 'mp4', label: 'MP4 (H.264)' },
|
{ value: 'mp4', label: 'MP4 (H.264)' },
|
||||||
{ value: 'mov', label: 'MOV (QuickTime)' },
|
{ value: 'mov', label: 'MOV (QuickTime)' },
|
||||||
{ value: 'webm', label: 'WebM (VP9)' },
|
{ value: 'webm', label: 'WebM (VP9)' },
|
||||||
|
...(isAudioOnly ? [{ value: 'wav' as const, label: 'WAV (Uncompressed)' }] : []),
|
||||||
]}
|
]}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
{/* Audio normalization — integrated into export */}
|
||||||
|
<div className="space-y-2 pt-1 border-t border-editor-border">
|
||||||
|
<label className="flex items-center gap-2 cursor-pointer">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
checked={options.normalizeAudio}
|
||||||
|
onChange={(e) => setOptions((o) => ({ ...o, normalizeAudio: e.target.checked }))}
|
||||||
|
className="w-4 h-4 rounded bg-editor-surface border-editor-border accent-editor-accent"
|
||||||
|
/>
|
||||||
|
<div>
|
||||||
|
<span className="text-xs font-medium">Normalize loudness</span>
|
||||||
|
<p className="text-[10px] text-editor-text-muted">
|
||||||
|
Apply LUFS normalization during export. Requires re-encode.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
{options.normalizeAudio && (
|
||||||
|
<div className="flex items-center gap-2 pl-6">
|
||||||
|
<Volume2 className="w-3 h-3 text-editor-text-muted shrink-0" />
|
||||||
|
<select
|
||||||
|
value={options.normalizeTarget}
|
||||||
|
onChange={(e) => setOptions((o) => ({ ...o, normalizeTarget: Number(e.target.value) }))}
|
||||||
|
className="flex-1 px-2 py-1.5 text-xs bg-editor-surface border border-editor-border rounded focus:outline-none focus:border-editor-accent [color-scheme:dark]"
|
||||||
|
>
|
||||||
|
<option value={-14}>YouTube (-14 LUFS)</option>
|
||||||
|
<option value={-16}>Spotify (-16 LUFS)</option>
|
||||||
|
<option value={-23}>Broadcast (-23 LUFS)</option>
|
||||||
|
<option value={-11}>Loud (-11 LUFS)</option>
|
||||||
|
<option value={-9}>Very Loud (-9 LUFS)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Audio enhancement */}
|
{/* Audio enhancement */}
|
||||||
<label className="flex items-center gap-2 cursor-pointer">
|
<label className="flex items-center gap-2 cursor-pointer">
|
||||||
<input
|
<input
|
||||||
@ -241,7 +309,7 @@ function SelectField({
|
|||||||
<select
|
<select
|
||||||
value={value}
|
value={value}
|
||||||
onChange={(e) => onChange(e.target.value)}
|
onChange={(e) => onChange(e.target.value)}
|
||||||
className="w-full px-3 py-2 bg-editor-surface border border-editor-border rounded-lg text-xs text-editor-text focus:outline-none focus:border-editor-accent"
|
className="w-full px-3 py-2 bg-editor-surface border border-editor-border rounded-lg text-xs text-editor-text focus:outline-none focus:border-editor-accent [color-scheme:dark]"
|
||||||
>
|
>
|
||||||
{options.map((opt) => (
|
{options.map((opt) => (
|
||||||
<option key={opt.value} value={opt.value}>
|
<option key={opt.value} value={opt.value}>
|
||||||
|
|||||||
@ -7,6 +7,18 @@ import { Bot, Cloud, Brain, RefreshCw } from 'lucide-react';
|
|||||||
export default function SettingsPanel() {
|
export default function SettingsPanel() {
|
||||||
const { providers, defaultProvider, setProviderConfig, setDefaultProvider } = useAIStore();
|
const { providers, defaultProvider, setProviderConfig, setDefaultProvider } = useAIStore();
|
||||||
const { backendUrl, zonePreviewPaddingSeconds, setZonePreviewPaddingSeconds } = useEditorStore();
|
const { backendUrl, zonePreviewPaddingSeconds, setZonePreviewPaddingSeconds } = useEditorStore();
|
||||||
|
const CONFIDENCE_THRESHOLD_KEY = 'talkedit:confidenceThreshold';
|
||||||
|
const [confidenceThreshold, setConfidenceThresholdState] = useState(() => {
|
||||||
|
const stored = typeof window !== 'undefined' ? Number(window.localStorage.getItem(CONFIDENCE_THRESHOLD_KEY)) : 0;
|
||||||
|
return Number.isFinite(stored) ? stored : 0.6;
|
||||||
|
});
|
||||||
|
const setConfidenceThreshold = (value: number) => {
|
||||||
|
const clamped = Math.max(0, Math.min(1, value));
|
||||||
|
setConfidenceThresholdState(clamped);
|
||||||
|
if (typeof window !== 'undefined') {
|
||||||
|
window.localStorage.setItem(CONFIDENCE_THRESHOLD_KEY, String(clamped));
|
||||||
|
}
|
||||||
|
};
|
||||||
const [ollamaModels, setOllamaModels] = useState<string[]>([]);
|
const [ollamaModels, setOllamaModels] = useState<string[]>([]);
|
||||||
const [loadingModels, setLoadingModels] = useState(false);
|
const [loadingModels, setLoadingModels] = useState(false);
|
||||||
|
|
||||||
@ -66,6 +78,40 @@ export default function SettingsPanel() {
|
|||||||
</div>
|
</div>
|
||||||
</ProviderSection>
|
</ProviderSection>
|
||||||
|
|
||||||
|
{/* Confidence threshold */}
|
||||||
|
<div className="space-y-2">
|
||||||
|
<label className="text-xs text-editor-text-muted font-medium">Low-Confidence Word Threshold</label>
|
||||||
|
<p className="text-[10px] text-editor-text-muted leading-relaxed">
|
||||||
|
Words with confidence below this value are highlighted with an orange dotted underline.
|
||||||
|
Whisper often gets homophones and proper nouns wrong at low confidence.
|
||||||
|
</p>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<input
|
||||||
|
type="range"
|
||||||
|
min={0}
|
||||||
|
max={1}
|
||||||
|
step={0.05}
|
||||||
|
value={confidenceThreshold}
|
||||||
|
onChange={(e) => setConfidenceThreshold(Number(e.target.value))}
|
||||||
|
className="flex-1 h-1.5"
|
||||||
|
/>
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min={0}
|
||||||
|
max={1}
|
||||||
|
step={0.05}
|
||||||
|
value={confidenceThreshold}
|
||||||
|
onChange={(e) => setConfidenceThreshold(Math.max(0, Math.min(1, Number(e.target.value) || 0)))}
|
||||||
|
className="w-16 px-2 py-1 bg-editor-bg border border-editor-border rounded-lg text-xs text-editor-text focus:outline-none focus:border-editor-accent"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center justify-between text-[10px]">
|
||||||
|
<span className="text-editor-text-muted">Show all</span>
|
||||||
|
<span className="font-medium text-editor-text">{confidenceThreshold.toFixed(2)}</span>
|
||||||
|
<span className="text-editor-text-muted">Strict</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{/* Default provider selector */}
|
{/* Default provider selector */}
|
||||||
<div className="space-y-2">
|
<div className="space-y-2">
|
||||||
<label className="text-xs text-editor-text-muted font-medium">Default AI Provider</label>
|
<label className="text-xs text-editor-text-muted font-medium">Default AI Provider</label>
|
||||||
|
|||||||
@ -49,6 +49,10 @@ export default function TranscriptEditor({
|
|||||||
const [searchQuery, setSearchQuery] = useState('');
|
const [searchQuery, setSearchQuery] = useState('');
|
||||||
const [activeMatchIdx, setActiveMatchIdx] = useState(0);
|
const [activeMatchIdx, setActiveMatchIdx] = useState(0);
|
||||||
const searchInputRef = useRef<HTMLInputElement | null>(null);
|
const searchInputRef = useRef<HTMLInputElement | null>(null);
|
||||||
|
const updateWordText = useEditorStore((s) => s.updateWordText);
|
||||||
|
const [editingWordIndex, setEditingWordIndex] = useState<number | null>(null);
|
||||||
|
const [editText, setEditText] = useState('');
|
||||||
|
const editInputRef = useRef<HTMLInputElement | null>(null);
|
||||||
|
|
||||||
const selectedSet = useMemo(() => new Set(selectedWordIndices), [selectedWordIndices]);
|
const selectedSet = useMemo(() => new Set(selectedWordIndices), [selectedWordIndices]);
|
||||||
const matchIndices = useMemo(() => {
|
const matchIndices = useMemo(() => {
|
||||||
@ -224,6 +228,61 @@ export default function TranscriptEditor({
|
|||||||
[setSelectedWordIndices],
|
[setSelectedWordIndices],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const startEditing = useCallback((index: number) => {
|
||||||
|
const word = words[index];
|
||||||
|
if (!word) return;
|
||||||
|
setEditingWordIndex(index);
|
||||||
|
setEditText(word.word);
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
editInputRef.current?.focus();
|
||||||
|
editInputRef.current?.select();
|
||||||
|
});
|
||||||
|
}, [words]);
|
||||||
|
|
||||||
|
const commitEdit = useCallback(() => {
|
||||||
|
if (editingWordIndex === null) return;
|
||||||
|
const trimmed = editText.trim();
|
||||||
|
if (trimmed && trimmed !== words[editingWordIndex]?.word) {
|
||||||
|
updateWordText(editingWordIndex, trimmed);
|
||||||
|
}
|
||||||
|
setEditingWordIndex(null);
|
||||||
|
setEditText('');
|
||||||
|
}, [editingWordIndex, editText, words, updateWordText]);
|
||||||
|
|
||||||
|
const cancelEdit = useCallback(() => {
|
||||||
|
setEditingWordIndex(null);
|
||||||
|
setEditText('');
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const handleWordDoubleClick = useCallback((index: number) => {
|
||||||
|
if (cutMode || muteMode || gainMode || speedMode) return;
|
||||||
|
startEditing(index);
|
||||||
|
}, [cutMode, muteMode, gainMode, speedMode, startEditing]);
|
||||||
|
|
||||||
|
// Focus edit input when it appears
|
||||||
|
useEffect(() => {
|
||||||
|
if (editingWordIndex !== null && editInputRef.current) {
|
||||||
|
editInputRef.current.focus();
|
||||||
|
editInputRef.current.select();
|
||||||
|
}
|
||||||
|
}, [editingWordIndex]);
|
||||||
|
|
||||||
|
// Global key handler for edit mode
|
||||||
|
useEffect(() => {
|
||||||
|
const onKeyDown = (e: KeyboardEvent) => {
|
||||||
|
if (editingWordIndex === null) return;
|
||||||
|
if (e.key === 'Enter') {
|
||||||
|
e.preventDefault();
|
||||||
|
commitEdit();
|
||||||
|
} else if (e.key === 'Escape') {
|
||||||
|
e.preventDefault();
|
||||||
|
cancelEdit();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
window.addEventListener('keydown', onKeyDown);
|
||||||
|
return () => window.removeEventListener('keydown', onKeyDown);
|
||||||
|
}, [editingWordIndex, commitEdit, cancelEdit]);
|
||||||
|
|
||||||
const cutSelectedWords = useCallback(() => {
|
const cutSelectedWords = useCallback(() => {
|
||||||
if (selectedWordIndices.length === 0) return;
|
if (selectedWordIndices.length === 0) return;
|
||||||
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
|
const sorted = [...selectedWordIndices].sort((a, b) => a - b);
|
||||||
@ -319,15 +378,25 @@ export default function TranscriptEditor({
|
|||||||
const isSearchMatch = matchSet.has(globalIndex);
|
const isSearchMatch = matchSet.has(globalIndex);
|
||||||
const isActiveSearchMatch = matchIndices.length > 0 && matchIndices[safeActiveMatchIdx] === globalIndex;
|
const isActiveSearchMatch = matchIndices.length > 0 && matchIndices[safeActiveMatchIdx] === globalIndex;
|
||||||
|
|
||||||
|
const isEditing = globalIndex === editingWordIndex;
|
||||||
|
|
||||||
|
// Low-confidence highlighting
|
||||||
|
const CONFIDENCE_THRESHOLD_KEY = 'talkedit:confidenceThreshold';
|
||||||
|
const storedThreshold = typeof window !== 'undefined' ? Number(window.localStorage.getItem(CONFIDENCE_THRESHOLD_KEY)) : 0;
|
||||||
|
const confidenceThreshold = Number.isFinite(storedThreshold) ? storedThreshold : 0.6;
|
||||||
|
const isLowConfidence = word.confidence > 0 && word.confidence < confidenceThreshold && !cutRange && !muteRange && !gainRange && !speedRange;
|
||||||
|
const confidencePct = word.confidence > 0 ? Math.round(word.confidence * 100) : null;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<span
|
<span
|
||||||
key={globalIndex}
|
key={globalIndex}
|
||||||
id={`word-${globalIndex}`}
|
id={`word-${globalIndex}`}
|
||||||
data-word-index={globalIndex}
|
data-word-index={globalIndex}
|
||||||
title={`${word.start.toFixed(2)}s — Ctrl+click to seek`}
|
title={`${word.start.toFixed(2)}s — confidence: ${confidencePct !== null ? confidencePct + '%' : 'N/A'}${isLowConfidence ? ' ⚠️ Low confidence' : ''} — Ctrl+click to seek, double-click to edit`}
|
||||||
onMouseDown={(e) => handleWordMouseDown(globalIndex, e)}
|
onMouseDown={(e) => handleWordMouseDown(globalIndex, e)}
|
||||||
onMouseEnter={() => handleWordMouseEnter(globalIndex)}
|
onMouseEnter={() => handleWordMouseEnter(globalIndex)}
|
||||||
onMouseLeave={() => setHoveredWordIndex(null)}
|
onMouseLeave={() => setHoveredWordIndex(null)}
|
||||||
|
onDoubleClick={() => handleWordDoubleClick(globalIndex)}
|
||||||
className={`
|
className={`
|
||||||
relative px-[2px] py-[1px] rounded cursor-pointer transition-colors
|
relative px-[2px] py-[1px] rounded cursor-pointer transition-colors
|
||||||
${cutRange ? 'bg-red-500/20 text-red-100' : ''}
|
${cutRange ? 'bg-red-500/20 text-red-100' : ''}
|
||||||
@ -343,9 +412,21 @@ export default function TranscriptEditor({
|
|||||||
${isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-selected text-white' : ''}
|
${isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-selected text-white' : ''}
|
||||||
${isActive && !isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/20 text-editor-accent' : ''}
|
${isActive && !isSelected && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-accent/20 text-editor-accent' : ''}
|
||||||
${isHovered && !isSelected && !isActive && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-hover' : ''}
|
${isHovered && !isSelected && !isActive && !cutRange && !muteRange && !gainRange && !speedRange ? 'bg-editor-word-hover' : ''}
|
||||||
|
${isLowConfidence ? 'border-b border-dashed border-orange-400/60' : ''}
|
||||||
`}
|
`}
|
||||||
>
|
>
|
||||||
{word.word}{' '}
|
{isEditing ? (
|
||||||
|
<input
|
||||||
|
ref={editInputRef}
|
||||||
|
value={editText}
|
||||||
|
onChange={(e) => setEditText(e.target.value)}
|
||||||
|
onBlur={commitEdit}
|
||||||
|
className="w-24 px-1 py-0 text-xs bg-editor-bg border border-editor-accent rounded text-editor-text focus:outline-none"
|
||||||
|
style={{ minWidth: `${Math.max(word.word.length * 8, 48)}px` }}
|
||||||
|
/>
|
||||||
|
) : (
|
||||||
|
<>{word.word}{' '}</>
|
||||||
|
)}
|
||||||
{(cutRange || muteRange || gainRange || speedRange) && isHovered && (
|
{(cutRange || muteRange || gainRange || speedRange) && isHovered && (
|
||||||
<button
|
<button
|
||||||
onClick={(e) => {
|
onClick={(e) => {
|
||||||
@ -367,7 +448,7 @@ export default function TranscriptEditor({
|
|||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
[segments, selectedSet, matchSet, matchIndices, safeActiveMatchIdx, activeWordIndex, hoveredWordIndex, handleWordMouseDown, handleWordMouseEnter, setHoveredWordIndex, getCutRangeForWord, getMuteRangeForWord, getGainRangeForWord, getSpeedRangeForWord, removeCutRange, removeMuteRange, removeGainRange, removeSpeedRange, zoneDragRange, cutMode, muteMode, gainMode, speedMode],
|
[segments, selectedSet, matchSet, matchIndices, safeActiveMatchIdx, activeWordIndex, hoveredWordIndex, handleWordMouseDown, handleWordMouseEnter, setHoveredWordIndex, getCutRangeForWord, getMuteRangeForWord, getGainRangeForWord, getSpeedRangeForWord, removeCutRange, removeMuteRange, removeGainRange, removeSpeedRange, zoneDragRange, cutMode, muteMode, gainMode, speedMode, editingWordIndex, editText, editInputRef, handleWordDoubleClick, commitEdit, setEditText],
|
||||||
);
|
);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|||||||
@ -11,7 +11,7 @@ import { invoke } from '@tauri-apps/api/core';
|
|||||||
import { open, save } from '@tauri-apps/plugin-dialog';
|
import { open, save } from '@tauri-apps/plugin-dialog';
|
||||||
import { readTextFile, writeTextFile } from '@tauri-apps/plugin-fs';
|
import { readTextFile, writeTextFile } from '@tauri-apps/plugin-fs';
|
||||||
|
|
||||||
const backendPort = import.meta.env.VITE_BACKEND_PORT || '8642';
|
const backendPort = import.meta.env.VITE_BACKEND_PORT || '8000';
|
||||||
const backendUrl = `http://127.0.0.1:${backendPort}`;
|
const backendUrl = `http://127.0.0.1:${backendPort}`;
|
||||||
|
|
||||||
const VIDEO_FILTERS = [
|
const VIDEO_FILTERS = [
|
||||||
|
|||||||
@ -67,6 +67,7 @@ interface EditorActions {
|
|||||||
setHoveredWordIndex: (index: number | null) => void;
|
setHoveredWordIndex: (index: number | null) => void;
|
||||||
deleteSelectedWords: () => void;
|
deleteSelectedWords: () => void;
|
||||||
deleteWordRange: (startIndex: number, endIndex: number) => void;
|
deleteWordRange: (startIndex: number, endIndex: number) => void;
|
||||||
|
updateWordText: (index: number, text: string) => void;
|
||||||
addCutRange: (start: number, end: number, trimGroupId?: string) => void;
|
addCutRange: (start: number, end: number, trimGroupId?: string) => void;
|
||||||
addMuteRange: (start: number, end: number) => void;
|
addMuteRange: (start: number, end: number) => void;
|
||||||
addGainRange: (start: number, end: number, gainDb: number) => void;
|
addGainRange: (start: number, end: number, gainDb: number) => void;
|
||||||
@ -262,6 +263,33 @@ export const useEditorStore = create<EditorState & EditorActions>()(
|
|||||||
get().addCutRange(words[startIndex].start, words[endIndex].end);
|
get().addCutRange(words[startIndex].start, words[endIndex].end);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
updateWordText: (index, text) => {
|
||||||
|
const { words, segments } = get();
|
||||||
|
if (index < 0 || index >= words.length) return;
|
||||||
|
const newWords = words.map((w, i) =>
|
||||||
|
i === index ? { ...w, word: text } : w
|
||||||
|
);
|
||||||
|
// Also update the corresponding segment's words and text
|
||||||
|
let globalIdx = 0;
|
||||||
|
const newSegments = segments.map((seg) => {
|
||||||
|
const start = globalIdx;
|
||||||
|
globalIdx += seg.words.length;
|
||||||
|
if (index >= start && index < start + seg.words.length) {
|
||||||
|
const localIdx = index - start;
|
||||||
|
const updatedSegWords = seg.words.map((w, i) =>
|
||||||
|
i === localIdx ? { ...w, word: text } : w
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
...seg,
|
||||||
|
words: updatedSegWords,
|
||||||
|
text: updatedSegWords.map((w) => w.word).join(' '),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return seg;
|
||||||
|
});
|
||||||
|
set({ words: newWords, segments: newSegments });
|
||||||
|
},
|
||||||
|
|
||||||
addCutRange: (start, end, trimGroupId) => {
|
addCutRange: (start, end, trimGroupId) => {
|
||||||
const { cutRanges } = get();
|
const { cutRanges } = get();
|
||||||
const newRange: CutRange = {
|
const newRange: CutRange = {
|
||||||
|
|||||||
@ -87,7 +87,7 @@ export interface ExportOptions {
|
|||||||
outputPath: string;
|
outputPath: string;
|
||||||
mode: 'fast' | 'reencode';
|
mode: 'fast' | 'reencode';
|
||||||
resolution: '720p' | '1080p' | '4k';
|
resolution: '720p' | '1080p' | '4k';
|
||||||
format: 'mp4' | 'mov' | 'webm';
|
format: 'mp4' | 'mov' | 'webm' | 'wav';
|
||||||
enhanceAudio: boolean;
|
enhanceAudio: boolean;
|
||||||
captions: 'none' | 'burn-in' | 'sidecar';
|
captions: 'none' | 'burn-in' | 'sidecar';
|
||||||
captionStyle?: CaptionStyle;
|
captionStyle?: CaptionStyle;
|
||||||
|
|||||||
Reference in New Issue
Block a user