diff --git a/backend/routers/export.py b/backend/routers/export.py index f567eb4..03e3e99 100644 --- a/backend/routers/export.py +++ b/backend/routers/export.py @@ -25,6 +25,10 @@ class GainRangeModel(SegmentModel): gain_db: float +class SpeedRangeModel(SegmentModel): + speed: float + + class ExportWordModel(BaseModel): word: str start: float @@ -38,6 +42,7 @@ class ExportRequest(BaseModel): keep_segments: List[SegmentModel] mute_ranges: Optional[List[SegmentModel]] = None gain_ranges: Optional[List[GainRangeModel]] = None + speed_ranges: Optional[List[SpeedRangeModel]] = None global_gain_db: float = 0.0 mode: str = "fast" resolution: str = "1080p" @@ -77,6 +82,8 @@ def _map_ranges_to_output_timeline( } if "gain_db" in src_range: mapped_range["gain_db"] = float(src_range["gain_db"]) + if "speed" in src_range: + mapped_range["speed"] = float(src_range["speed"]) mapped.append(mapped_range) output_cursor += keep_len @@ -109,6 +116,7 @@ async def export_video(req: ExportRequest): segments = [{"start": s.start, "end": s.end} for s in req.keep_segments] mute_segments = [{"start": s.start, "end": s.end} for s in req.mute_ranges] if req.mute_ranges else None gain_segments = [{"start": s.start, "end": s.end, "gain_db": s.gain_db} for s in req.gain_ranges] if req.gain_ranges else None + speed_segments = [{"start": s.start, "end": s.end, "speed": s.speed} for s in req.speed_ranges] if req.speed_ranges else None if not segments and not mute_segments: raise HTTPException(status_code=400, detail="No segments to export") @@ -116,11 +124,19 @@ async def export_video(req: ExportRequest): mapped_gain_segments = _map_ranges_to_output_timeline(gain_segments or [], segments) has_gain = abs(float(req.global_gain_db)) > 1e-6 or bool(gain_segments) - use_stream_copy = req.mode == "fast" and len(segments) == 1 and not mute_segments and not has_gain + has_speed = bool(speed_segments) + + if has_speed and (mute_segments or has_gain): + raise HTTPException( + status_code=400, + detail="Speed zones currently cannot be combined with mute/gain filters in one export", + ) + + use_stream_copy = req.mode == "fast" and len(segments) == 1 and not mute_segments and not has_gain and not has_speed needs_reencode_for_subs = req.captions == "burn-in" # Burn-in captions or audio filters require re-encode - if needs_reencode_for_subs or mute_segments or has_gain: + if needs_reencode_for_subs or mute_segments or has_gain or has_speed: use_stream_copy = False words_dicts = [w.model_dump() for w in req.words] if req.words else [] @@ -148,6 +164,7 @@ async def export_video(req: ExportRequest): format_hint=req.format, mute_ranges=mute_segments, gain_ranges=mapped_gain_segments, + speed_ranges=speed_segments, global_gain_db=req.global_gain_db, ) else: @@ -159,6 +176,7 @@ async def export_video(req: ExportRequest): format_hint=req.format, mute_ranges=mute_segments, gain_ranges=mapped_gain_segments, + speed_ranges=speed_segments, global_gain_db=req.global_gain_db, ) finally: diff --git a/backend/services/video_editor.py b/backend/services/video_editor.py index 948465c..e12e3f8 100644 --- a/backend/services/video_editor.py +++ b/backend/services/video_editor.py @@ -13,6 +13,78 @@ from typing import List logger = logging.getLogger(__name__) +def _clamp_speed(speed: float) -> float: + return max(0.25, min(4.0, float(speed))) + + +def _build_atempo_chain(speed: float) -> str: + """Build an FFmpeg atempo chain since each atempo node only supports 0.5..2.0.""" + s = _clamp_speed(speed) + filters = [] + while s > 2.0: + filters.append("atempo=2.0") + s /= 2.0 + while s < 0.5: + filters.append("atempo=0.5") + s /= 0.5 + filters.append(f"atempo={s:.6f}") + return ",".join(filters) + + +def _split_keep_segments_by_speed( + keep_segments: List[dict], + speed_ranges: List[dict] = None, +) -> List[dict]: + """Split keep segments by speed ranges, attaching speed multiplier per piece.""" + if not keep_segments: + return [] + + normalized_ranges = [] + for r in speed_ranges or []: + start = float(r.get("start", 0.0)) + end = float(r.get("end", 0.0)) + if end <= start: + continue + normalized_ranges.append({ + "start": start, + "end": end, + "speed": _clamp_speed(float(r.get("speed", 1.0))), + }) + normalized_ranges.sort(key=lambda x: x["start"]) + + result = [] + for keep in keep_segments: + k_start = float(keep["start"]) + k_end = float(keep["end"]) + if k_end <= k_start: + continue + + cuts = {k_start, k_end} + for sr in normalized_ranges: + overlap_start = max(k_start, sr["start"]) + overlap_end = min(k_end, sr["end"]) + if overlap_end > overlap_start: + cuts.add(overlap_start) + cuts.add(overlap_end) + + points = sorted(cuts) + for i in range(len(points) - 1): + seg_start = points[i] + seg_end = points[i + 1] + if seg_end - seg_start < 1e-6: + continue + + speed = 1.0 + for sr in normalized_ranges: + if seg_start >= sr["start"] and seg_end <= sr["end"]: + speed = sr["speed"] + break + + result.append({"start": seg_start, "end": seg_end, "speed": speed}) + + return result + + def _find_ffmpeg() -> str: """Locate ffmpeg binary.""" for cmd in ["ffmpeg", "ffmpeg.exe"]: @@ -113,6 +185,7 @@ def export_reencode( format_hint: str = "mp4", mute_ranges: List[dict] = None, gain_ranges: List[dict] = None, + speed_ranges: List[dict] = None, global_gain_db: float = 0.0, ) -> str: """ @@ -150,8 +223,11 @@ def export_reencode( has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6 - # Handle filtered full-timeline audio case (mute/gain/global gain) - if has_audio_filters: + speed_segments = _split_keep_segments_by_speed(keep_segments, speed_ranges) + has_speed = any(abs(seg.get("speed", 1.0) - 1.0) > 1e-6 for seg in speed_segments) + + # Handle filtered full-timeline audio case (mute/gain/global gain) when no speed warping is needed + if has_audio_filters and not has_speed: audio_filter = build_audio_filter() # Video filter - just scaling if needed @@ -189,18 +265,25 @@ def export_reencode( resolution, ) else: - # Original cutting logic + # Cutting logic with optional per-segment speed changes if not keep_segments: raise ValueError("No segments to export") - filter_parts = [] - for i, seg in enumerate(keep_segments): - filter_parts.append( - f"[0:v]trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS[v{i}];" - f"[0:a]atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS[a{i}];" - ) + segments_for_concat = speed_segments if speed_segments else _split_keep_segments_by_speed(keep_segments, None) + if not segments_for_concat: + raise ValueError("No segments to export") - n = len(keep_segments) + filter_parts = [] + for i, seg in enumerate(segments_for_concat): + speed = _clamp_speed(seg.get("speed", 1.0)) + v_chain = f"trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS" + a_chain = f"atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS" + if abs(speed - 1.0) > 1e-6: + v_chain += f",setpts=PTS/{speed:.6f}" + a_chain += f",{_build_atempo_chain(speed)}" + filter_parts.append(f"[0:v]{v_chain}[v{i}];[0:a]{a_chain}[a{i}];") + + n = len(segments_for_concat) concat_inputs = "".join(f"[v{i}][a{i}]" for i in range(n)) filter_parts.append(f"{concat_inputs}concat=n={n}:v=1:a=1[outv][outa]") @@ -228,7 +311,13 @@ def export_reencode( output_path, ] - logger.info(f"Re-encoding {n} segments -> {output_path} ({resolution})") + logger.info( + "Re-encoding %s segments (speed-adjusted=%s) -> %s (%s)", + n, + has_speed, + output_path, + resolution, + ) result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: @@ -246,6 +335,7 @@ def export_reencode_with_subs( format_hint: str = "mp4", mute_ranges: List[dict] = None, gain_ranges: List[dict] = None, + speed_ranges: List[dict] = None, global_gain_db: float = 0.0, ) -> str: """ @@ -284,8 +374,11 @@ def export_reencode_with_subs( has_audio_filters = bool(mute_ranges) or bool(gain_ranges) or abs(float(global_gain_db)) > 1e-6 - # Handle filtered full-timeline audio case (mute/gain/global gain) - if has_audio_filters: + speed_segments = _split_keep_segments_by_speed(keep_segments, speed_ranges) + has_speed = any(abs(seg.get("speed", 1.0) - 1.0) > 1e-6 for seg in speed_segments) + + # Handle filtered full-timeline audio case (mute/gain/global gain) when no speed warping is needed + if has_audio_filters and not has_speed: audio_filter = build_audio_filter() # Video filter with subtitles @@ -322,18 +415,25 @@ def export_reencode_with_subs( resolution, ) else: - # Original cutting logic with subtitles + # Cutting logic with subtitles and optional speed changes if not keep_segments: raise ValueError("No segments to export") - filter_parts = [] - for i, seg in enumerate(keep_segments): - filter_parts.append( - f"[0:v]trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS[v{i}];" - f"[0:a]atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS[a{i}];" - ) + segments_for_concat = speed_segments if speed_segments else _split_keep_segments_by_speed(keep_segments, None) + if not segments_for_concat: + raise ValueError("No segments to export") - n = len(keep_segments) + filter_parts = [] + for i, seg in enumerate(segments_for_concat): + speed = _clamp_speed(seg.get("speed", 1.0)) + v_chain = f"trim=start={seg['start']}:end={seg['end']},setpts=PTS-STARTPTS" + a_chain = f"atrim=start={seg['start']}:end={seg['end']},asetpts=PTS-STARTPTS" + if abs(speed - 1.0) > 1e-6: + v_chain += f",setpts=PTS/{speed:.6f}" + a_chain += f",{_build_atempo_chain(speed)}" + filter_parts.append(f"[0:v]{v_chain}[v{i}];[0:a]{a_chain}[a{i}];") + + n = len(segments_for_concat) concat_inputs = "".join(f"[v{i}][a{i}]" for i in range(n)) filter_parts.append(f"{concat_inputs}concat=n={n}:v=1:a=1[outv][outa]") @@ -364,7 +464,13 @@ def export_reencode_with_subs( output_path, ] - logger.info(f"Re-encoding {n} segments with subtitles -> {output_path} ({resolution})") + logger.info( + "Re-encoding %s segments with subtitles (speed-adjusted=%s) -> %s (%s)", + n, + has_speed, + output_path, + resolution, + ) result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 51f6558..4d4b30a 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -21,6 +21,7 @@ import { Scissors, VolumeX, SlidersHorizontal, + Gauge, FilePlus2, RefreshCw, Grid3x3, @@ -40,6 +41,7 @@ export default function App() { cutRanges, muteRanges, gainRanges, + speedRanges, globalGainDb, silenceTrimGroups, transcriptionModel, @@ -55,14 +57,18 @@ export default function App() { addCutRange, addMuteRange, addGainRange, + addSpeedRange, } = useEditorStore(); const [activePanel, setActivePanel] = useState(null); const [whisperModel, setWhisperModel] = useState('base'); + useEffect(() => { if (transcriptionModel) setWhisperModel(transcriptionModel); }, [transcriptionModel]); const [cutMode, setCutMode] = useState(false); const [muteMode, setMuteMode] = useState(false); const [gainMode, setGainMode] = useState(false); const [gainModeDb, setGainModeDb] = useState(3); + const [speedMode, setSpeedMode] = useState(false); + const [speedModeValue, setSpeedModeValue] = useState(1.25); const [showReprocessConfirm, setShowReprocessConfirm] = useState(false); const [showUnsavedPrompt, setShowUnsavedPrompt] = useState(false); const [pendingProceedAction, setPendingProceedAction] = useState<(() => Promise) | null>(null); @@ -79,6 +85,7 @@ export default function App() { cutRanges, muteRanges, gainRanges, + speedRanges, globalGainDb, silenceTrimGroups, transcriptionModel, @@ -93,6 +100,7 @@ export default function App() { cutRanges, muteRanges, gainRanges, + speedRanges, globalGainDb, silenceTrimGroups, transcriptionModel, @@ -112,6 +120,7 @@ export default function App() { cutRanges: data.cutRanges || [], muteRanges: data.muteRanges || [], gainRanges: data.gainRanges || [], + speedRanges: data.speedRanges || [], globalGainDb: typeof data.globalGainDb === 'number' ? data.globalGainDb : 0, silenceTrimGroups: data.silenceTrimGroups || [], transcriptionModel: data.transcriptionModel ?? null, @@ -138,6 +147,7 @@ export default function App() { setCutMode(false); setMuteMode(false); setGainMode(false); + setSpeedMode(false); } }; @@ -217,6 +227,7 @@ export default function App() { setCutMode(false); setMuteMode(false); setGainMode(false); + setSpeedMode(false); }); }; @@ -315,6 +326,7 @@ export default function App() { setCutMode(!cutMode); setMuteMode(false); // Exit mute mode setGainMode(false); // Exit gain mode + setSpeedMode(false); // Exit speed mode } }; @@ -330,6 +342,7 @@ export default function App() { setMuteMode(!muteMode); setCutMode(false); // Exit cut mode setGainMode(false); // Exit gain mode + setSpeedMode(false); // Exit speed mode } }; @@ -343,6 +356,21 @@ export default function App() { setGainMode(!gainMode); setCutMode(false); setMuteMode(false); + setSpeedMode(false); + } + }; + + const handleSpeed = () => { + if (selectedWordIndices.length > 0) { + const sorted = [...selectedWordIndices].sort((a, b) => a - b); + const startTime = words[sorted[0]].start; + const endTime = words[sorted[sorted.length - 1]].end; + addSpeedRange(startTime, endTime, speedModeValue); + } else { + setSpeedMode(!speedMode); + setCutMode(false); + setMuteMode(false); + setGainMode(false); } }; @@ -413,19 +441,8 @@ export default function App() { return (
{/* Top bar */} -
-
- - - {videoPath.split(/[\\/]/).pop()} - - {transcriptionModel && ( - - Model: {transcriptionModel} - - )} -
-
+
+
} label="New" @@ -477,6 +494,24 @@ export default function App() { title="Gain dB for new gain zones" />
+
+ } + label="Speed Zone" + onClick={handleSpeed} + active={speedMode} + /> + setSpeedModeValue(Math.max(0.25, Math.min(4, Number(e.target.value) || 1)))} + className="w-16 px-1.5 py-1 text-xs bg-editor-surface border border-editor-border rounded text-editor-text focus:outline-none focus:border-editor-accent" + title="Playback rate for new speed zones" + /> +
} label="Zones" @@ -495,7 +530,7 @@ export default function App() { updateSpeedRange(range.id, Number(e.target.value) || 1)} + className="w-16 px-1.5 py-0.5 text-xs bg-editor-surface border border-editor-border rounded focus:border-editor-accent focus:outline-none" + title="Speed multiplier" + /> + +
+ ))} +
+ + )} )} diff --git a/frontend/src/store/editorStore.ts b/frontend/src/store/editorStore.ts index a1c8afb..c3de0d7 100644 --- a/frontend/src/store/editorStore.ts +++ b/frontend/src/store/editorStore.ts @@ -7,6 +7,7 @@ import type { CutRange, MuteRange, GainRange, + SpeedRange, TranscriptionResult, ProjectFile, SilenceDetectionRange, @@ -24,6 +25,7 @@ interface EditorState { cutRanges: CutRange[]; muteRanges: MuteRange[]; gainRanges: GainRange[]; + speedRanges: SpeedRange[]; globalGainDb: number; silenceTrimGroups: SilenceTrimGroup[]; transcriptionModel: string | null; @@ -63,13 +65,17 @@ interface EditorActions { addCutRange: (start: number, end: number, trimGroupId?: string) => void; addMuteRange: (start: number, end: number) => void; addGainRange: (start: number, end: number, gainDb: number) => void; + addSpeedRange: (start: number, end: number, speed: number) => void; updateCutRange: (id: string, start: number, end: number) => void; updateMuteRange: (id: string, start: number, end: number) => void; updateGainRangeBounds: (id: string, start: number, end: number) => void; updateGainRange: (id: string, gainDb: number) => void; + updateSpeedRangeBounds: (id: string, start: number, end: number) => void; + updateSpeedRange: (id: string, speed: number) => void; removeCutRange: (id: string) => void; removeMuteRange: (id: string) => void; removeGainRange: (id: string) => void; + removeSpeedRange: (id: string) => void; setGlobalGainDb: (gainDb: number) => void; applySilenceTrimGroup: (args: { groupId?: string; @@ -95,6 +101,7 @@ const initialState: EditorState = { cutRanges: [], muteRanges: [], gainRanges: [], + speedRanges: [], globalGainDb: 0, silenceTrimGroups: [], transcriptionModel: null, @@ -152,7 +159,7 @@ export const useEditorStore = create()( setTranscriptionModel: (model) => set({ transcriptionModel: model }), saveProject: (): ProjectFile => { - const { videoPath, words, segments, deletedRanges, cutRanges, muteRanges, gainRanges, globalGainDb, silenceTrimGroups, transcriptionModel, language, exportedAudioPath } = get(); + const { videoPath, words, segments, deletedRanges, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, silenceTrimGroups, transcriptionModel, language, exportedAudioPath } = get(); if (!videoPath) throw new Error('No video loaded'); const now = new Date().toISOString(); // Strip globalStartIndex (runtime-only field) before persisting. @@ -172,6 +179,7 @@ export const useEditorStore = create()( cutRanges, muteRanges, gainRanges, + speedRanges, globalGainDb, silenceTrimGroups, language, @@ -286,6 +294,17 @@ export const useEditorStore = create()( set({ gainRanges: [...gainRanges, newRange] }); }, + addSpeedRange: (start, end, speed) => { + const { speedRanges } = get(); + const newRange: SpeedRange = { + id: `speed_${nextRangeId++}`, + start, + end, + speed: Math.max(0.25, Math.min(4, speed)), + }; + set({ speedRanges: [...speedRanges, newRange] }); + }, + updateCutRange: (id, start, end) => { const { cutRanges } = get(); set({ @@ -322,6 +341,24 @@ export const useEditorStore = create()( }); }, + updateSpeedRangeBounds: (id, start, end) => { + const { speedRanges } = get(); + set({ + speedRanges: speedRanges.map((r) => + r.id === id ? { ...r, start, end } : r + ), + }); + }, + + updateSpeedRange: (id, speed) => { + const { speedRanges } = get(); + set({ + speedRanges: speedRanges.map((r) => + r.id === id ? { ...r, speed: Math.max(0.25, Math.min(4, speed)) } : r + ), + }); + }, + removeCutRange: (id) => { const { cutRanges } = get(); set({ cutRanges: cutRanges.filter((r) => r.id !== id) }); @@ -337,6 +374,11 @@ export const useEditorStore = create()( set({ gainRanges: gainRanges.filter((r) => r.id !== id) }); }, + removeSpeedRange: (id) => { + const { speedRanges } = get(); + set({ speedRanges: speedRanges.filter((r) => r.id !== id) }); + }, + setGlobalGainDb: (gainDb) => { set({ globalGainDb: Math.max(-24, Math.min(24, gainDb)) }); }, @@ -470,6 +512,7 @@ export const useEditorStore = create()( cutRanges: data.cutRanges || [], muteRanges: data.muteRanges || [], gainRanges: data.gainRanges || [], + speedRanges: data.speedRanges || [], globalGainDb: typeof data.globalGainDb === 'number' ? data.globalGainDb : 0, silenceTrimGroups: data.silenceTrimGroups || [], transcriptionModel: data.transcriptionModel ?? null, diff --git a/frontend/src/types/project.ts b/frontend/src/types/project.ts index e9b5af4..2b42d89 100644 --- a/frontend/src/types/project.ts +++ b/frontend/src/types/project.ts @@ -40,6 +40,11 @@ export interface GainRange extends TimeRange { gainDb: number; } +export interface SpeedRange extends TimeRange { + id: string; + speed: number; +} + export interface SilenceDetectionRange extends TimeRange { duration: number; } @@ -70,6 +75,7 @@ export interface ProjectFile { cutRanges: CutRange[]; muteRanges: MuteRange[]; gainRanges?: GainRange[]; + speedRanges?: SpeedRange[]; globalGainDb?: number; silenceTrimGroups?: SilenceTrimGroup[]; language: string;