clean up of features

This commit is contained in:
2026-05-05 23:31:18 -06:00
parent 4d4dfa7f7c
commit 810957747b
7 changed files with 178 additions and 77 deletions

View File

@ -62,6 +62,42 @@ Features are grouped by priority. Check off items as they are implemented.
- [x] [#042] **Background removal** — MediaPipe Selfie Segmentation + FFmpeg frame processing for person/background separation. Configurable replacement: blur, solid color, or custom image. Applied during export. Falls back to FFmpeg colorkey when MediaPipe unavailable. (2026-05-05) - [x] [#042] **Background removal** — MediaPipe Selfie Segmentation + FFmpeg frame processing for person/background separation. Configurable replacement: blur, solid color, or custom image. Applied during export. Falls back to FFmpeg colorkey when MediaPipe unavailable. (2026-05-05)
## 🔮 Future — AI-powered editing & resource library
All AI features use the existing Ollama/OpenAI/Claude provider config — no new auth or setup needed.
- [ ] [#043] **AI Smart Clean** — one-click chain: filler removal + silence trim + noise reduction + loudness normalization in a single pass. `POST /ai/smart-clean` calls existing services sequentially.
- [ ] [#044] **AI Transcript Summarization** — generate bullet-point summary from transcript. `POST /ai/summarize`. AIPanel new tab.
- [ ] [#045] **AI Sentence Rephrase** — right-click word/sentence in transcript → "Rephrase with AI" → see 3 alternatives → click to replace. `POST /ai/rephrase`. TranscriptEditor context menu.
- [ ] [#046] **AI Smart Speed** — detect slow/low-energy sections → mark as suggested SpeedRange segments. `POST /ai/smart-speed`. Preview in AIPanel.
- [ ] [#047] **AI Auto-Chapters** — detect topic shifts in transcript → create TimelineMarkers automatically. `POST /ai/chapters`.
- [ ] [#048] **AI Show Notes** — generate title, description, soundbites, keywords from transcript + markers. `POST /ai/show-notes`. Copy to clipboard or save to file.
- [ ] [#049] **AI Find Fluff** — AI marks rambles, intros, off-topic chatter for deletion. Extends existing filler detection. `POST /ai/find-fluff`. AIPanel tab showing suggested cut ranges.
- [ ] [#050] **AI Smooth Cuts** — remove jump cuts between deleted segments using crossfade/blend during re-encode. Export option toggle.
- [ ] [#051] **AI B-roll** — generate footage from a text prompt to fill visual gaps in the timeline. Uses local SD or API. New "B-roll" section in AIPanel.
- [ ] [#052] **Smart Layouts** — auto-switch video layout between speakers based on who's talking. Detects active speaker from diarization + volume, applies crop/pad to focus on current speaker during export.
- [ ] [#053] **Per-track audio levels** — individual gain per speaker track. Extend `GainRange` model with `track_id`, apply per-stream via FFmpeg.
- [ ] [#054] **Intro/Outro templates** — save segment ranges as reusable templates, apply with one click on export.
- [ ] [#055] **Built-in free music library** — 510 CC0/royalty-free short loops shipped in `frontend/public/resources/music/`. BackgroundMusicPanel gets a "Built-in" tab with play/preview.
- [ ] [#056] **Stock media browser** — new `MediaLibraryPanel` that browses local `resources/media/` for images, video, audio with thumbnails. Frontend-only via Tauri `readDir`. Drag-to-add for bg removal images, append clips, or music.
- [ ] [#057] **Sample content downloader** — "Get Sample Video" button on empty state downloads a short public-domain test video + pre-made transcription JSON for trying the app without your own media.
---
## 💡 TalkEdit competitive advantages to lean into ## 💡 TalkEdit competitive advantages to lean into
These aren't features to build — they're things to make more visible in the UI and README: These aren't features to build — they're things to make more visible in the UI and README:

View File

@ -175,30 +175,40 @@ def _remove_with_mediapipe(
raise RuntimeError(f"MediaPipe background removal failed: {e}") raise RuntimeError(f"MediaPipe background removal failed: {e}")
def _remove_with_ffmpeg_portrait( def _remove_with_ffmpeg_portrait(
input_path: str, input_path: str,
output_path: str, output_path: str,
replacement: str = "blur", replacement: str = "blur",
replacement_value: str = "", replacement_value: str = "",
) -> str: ) -> str:
"""Fallback: use FFmpeg's colorkey + chromakey for basic background removal. """Fallback: basic FFmpeg-only background blur.
This is a crude approximation. For best results, install mediapipe + opencv-python. Uses a strong gaussian blur as a crude background replacement.
For proper person segmentation (color/image replacement), install:
pip install mediapipe opencv-python
""" """
ffmpeg = "ffmpeg" ffmpeg = "ffmpeg"
# Use a simple chromakey-based approach with a neutral background if replacement == "blur":
# This won't work well for most real videos but provides a fallback filter_complex = "gblur=sigma=30"
if replacement == "color": elif replacement == "color":
color = replacement_value or "00FF00" color = replacement_value or "00FF00"
filter_complex = f"colorkey=0x{color}:0.3:0.1,chromakey=0x{color}:0.3:0.1" filter_complex = (
elif replacement == "blur": f"split[fg][bg];"
filter_complex = "gblur=sigma=20:enable='gt(scene,0.01)'" f"[bg]colorkey=0x{color}:0.3:0.1[bg_key];"
f"[fg][bg_key]overlay"
)
elif replacement == "image" and replacement_value:
escaped = replacement_value.replace("\\", "/").replace(":", "\\:")
filter_complex = (
f"movie='{escaped}':loop=0,scale=iw:ih[bg];"
f"[0:v][bg]overlay=0:0:shortest=1"
)
else: else:
filter_complex = "null" filter_complex = "null"
if filter_complex == "null": if filter_complex == "null":
# No-op, copy input to output
cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path] cmd = [ffmpeg, "-y", "-i", input_path, "-c", "copy", output_path]
else: else:
cmd = [ cmd = [
@ -215,5 +225,8 @@ def _remove_with_ffmpeg_portrait(
if result.returncode != 0: if result.returncode != 0:
raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}") raise RuntimeError(f"FFmpeg background removal failed: {result.stderr[-500:]}")
logger.info("FFmpeg portait background removal completed -> %s", output_path) logger.warning(
"FFmpeg fallback background removal used (no MediaPipe). "
"Install 'mediapipe' and 'opencv-python' for proper person segmentation."
)
return output_path return output_path

View File

@ -45,6 +45,24 @@ def _input_has_video_stream(ffmpeg_cmd: str, input_path: str) -> bool:
return False return False
def _input_has_audio_stream(ffmpeg_cmd: str, input_path: str) -> bool:
"""Return True if the input contains at least one audio stream."""
ffprobe = ffmpeg_cmd.replace("ffmpeg", "ffprobe")
cmd = [
ffprobe,
"-v", "error",
"-select_streams", "a:0",
"-show_entries", "stream=index",
"-of", "csv=p=0",
str(input_path),
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
return result.returncode == 0 and bool(result.stdout.strip())
except Exception:
return False
def _clamp_speed(speed: float) -> float: def _clamp_speed(speed: float) -> float:
return max(0.25, min(4.0, float(speed))) return max(0.25, min(4.0, float(speed)))
@ -145,27 +163,53 @@ def mix_background_music(
) -> str: ) -> str:
"""Mix background music into a video with optional ducking. """Mix background music into a video with optional ducking.
Uses FFmpeg amix + sidechaincompress. Output is written to output_path. Uses FFmpeg amix + sidechaincompress. If the input has no audio,
the music track becomes the sole audio track. Output is written to output_path.
""" """
ffmpeg = _find_ffmpeg() ffmpeg = _find_ffmpeg()
escaped_music = music_path.replace("\\", "/").replace(":", "\\:") escaped_music = music_path.replace("\\", "/").replace(":", "\\:")
has_audio_result = _input_has_audio_stream(ffmpeg, video_path)
# Build the filter graph if not has_audio_result:
if ducking_enabled: cmd = [
ffmpeg, "-y",
"-i", video_path,
"-i", music_path,
"-map", "0:v",
"-map", "1:a",
"-c:v", "copy",
"-c:a", "aac", "-b:a", "192k",
"-shortest",
"-movflags", "+faststart",
output_path,
]
elif ducking_enabled:
music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]"
filter_complex = ( filter_complex = (
f"[0:a]asplit[main][sidechain];" f"[0:a]asplit[main][sidechain];"
f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];" f"{music_source};"
f"[main][music]amix=inputs=2:duration=first:dropout_transition=2[mixed];" f"[main][music]amix=inputs=2:duration=first:dropout_transition=2[mixed];"
f"[mixed][sidechain]sidechaincompress=" f"[mixed][sidechain]sidechaincompress="
f"threshold=-30dB:ratio=100:attack={ducking_attack_ms}ms:" f"threshold=-30dB:ratio=20:attack={ducking_attack_ms / 1000}:"
f"release={ducking_release_ms}ms:makeup=1:level_sc={ducking_db}[outa]" f"release={ducking_release_ms / 1000}:makeup=1:level_sc={ducking_db}[outa]"
) )
cmd = [
ffmpeg, "-y",
"-i", video_path,
"-filter_complex", filter_complex,
"-map", "0:v",
"-map", "[outa]",
"-c:v", "copy",
"-c:a", "aac", "-b:a", "192k",
"-shortest",
output_path,
]
else: else:
music_source = f"amovie='{escaped_music}',volume={volume_db}dB[music]"
filter_complex = ( filter_complex = (
f"movie='{escaped_music}':loop=0,volume={volume_db}dB[music];" f"{music_source};"
f"[0:a][music]amix=inputs=2:duration=first:dropout_transition=2[outa]" f"[0:a][music]amix=inputs=2:duration=first:dropout_transition=2[outa]"
) )
cmd = [ cmd = [
ffmpeg, "-y", ffmpeg, "-y",
"-i", video_path, "-i", video_path,
@ -198,20 +242,21 @@ def concat_clips(
raise ValueError("No clips to concatenate") raise ValueError("No clips to concatenate")
ffmpeg = _find_ffmpeg() ffmpeg = _find_ffmpeg()
import tempfile resolved_main = str(Path(main_path).resolve())
import os
# If output_path collides with an input, write to temp first
all_inputs = [resolved_main] + [str(Path(p).resolve()) for p in append_paths]
needs_rename = str(Path(output_path).resolve()) in all_inputs
final_output = output_path
if needs_rename:
final_output = output_path + ".concat_tmp.mp4"
temp_dir = tempfile.mkdtemp(prefix="aive_concat_") temp_dir = tempfile.mkdtemp(prefix="aive_concat_")
try: try:
segment_files = [main_path]
segment_files.extend(append_paths)
# Create concat file list
concat_file = os.path.join(temp_dir, "concat.txt") concat_file = os.path.join(temp_dir, "concat.txt")
with open(concat_file, "w") as f: with open(concat_file, "w") as f:
for path in segment_files: for path in all_inputs:
resolved = os.path.abspath(path) f.write(f"file '{path}'\n")
f.write(f"file '{resolved}'\n")
cmd = [ cmd = [
ffmpeg, "-y", ffmpeg, "-y",
@ -220,13 +265,16 @@ def concat_clips(
"-i", concat_file, "-i", concat_file,
"-c", "copy", "-c", "copy",
"-movflags", "+faststart", "-movflags", "+faststart",
output_path, final_output,
] ]
result = subprocess.run(cmd, capture_output=True, text=True) result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0: if result.returncode != 0:
raise RuntimeError(f"Clip concat failed: {result.stderr[-500:]}") raise RuntimeError(f"Clip concat failed: {result.stderr[-500:]}")
if needs_rename:
os.replace(final_output, output_path)
return output_path return output_path
finally: finally:
for f in os.listdir(temp_dir): for f in os.listdir(temp_dir):
@ -570,11 +618,9 @@ def export_reencode(
# Apply zoom post-processing if configured # Apply zoom post-processing if configured
if zoom_config and zoom_config.get("enabled") and has_video: if zoom_config and zoom_config.get("enabled") and has_video:
import tempfile as _tf
import os as _os
zoomed_path = output_path + ".zoomed.mp4" zoomed_path = output_path + ".zoomed.mp4"
_apply_zoom_post(output_path, zoomed_path, zoom_config) _apply_zoom_post(output_path, zoomed_path, zoom_config)
_os.replace(zoomed_path, output_path) os.replace(zoomed_path, output_path)
logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0)) logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
return output_path return output_path
@ -737,11 +783,9 @@ def export_reencode_with_subs(
# Apply zoom post-processing if configured # Apply zoom post-processing if configured
if zoom_config and zoom_config.get("enabled"): if zoom_config and zoom_config.get("enabled"):
import tempfile as _tf
import os as _os
zoomed_path = output_path + ".zoomed.mp4" zoomed_path = output_path + ".zoomed.mp4"
_apply_zoom_post(output_path, zoomed_path, zoom_config) _apply_zoom_post(output_path, zoomed_path, zoom_config)
_os.replace(zoomed_path, output_path) os.replace(zoomed_path, output_path)
logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0)) logger.info("Zoom/punch-in applied to %s (factor=%s)", output_path, zoom_config.get("zoomFactor", 1.0))
return output_path return output_path

View File

@ -5,7 +5,12 @@ export default function AppendClipPanel() {
const { additionalClips, addAdditionalClip, removeAdditionalClip, reorderAdditionalClip, videoPath } = useEditorStore(); const { additionalClips, addAdditionalClip, removeAdditionalClip, reorderAdditionalClip, videoPath } = useEditorStore();
const handleAddClip = async () => { const handleAddClip = async () => {
const path = await window.electronAPI?.openFile(); const path = await window.electronAPI?.openFile({
filters: [
{ name: 'Video Files', extensions: ['mp4', 'mkv', 'mov', 'avi', 'webm'] },
{ name: 'All Files', extensions: ['*'] },
],
});
if (path) { if (path) {
addAdditionalClip(path); addAdditionalClip(path);
} }

View File

@ -5,7 +5,12 @@ export default function BackgroundMusicPanel() {
const { backgroundMusic, setBackgroundMusic, updateBackgroundMusic } = useEditorStore(); const { backgroundMusic, setBackgroundMusic, updateBackgroundMusic } = useEditorStore();
const handleLoadMusic = async () => { const handleLoadMusic = async () => {
const path = await window.electronAPI?.openFile(); const path = await window.electronAPI?.openFile({
filters: [
{ name: 'Audio Files', extensions: ['mp3', 'wav', 'm4a', 'flac', 'ogg', 'aac', 'wma'] },
{ name: 'All Files', extensions: ['*'] },
],
});
if (path) { if (path) {
setBackgroundMusic({ setBackgroundMusic({
path, path,

View File

@ -9,6 +9,19 @@ export default function ExportDialog() {
const hasCuts = cutRanges.length > 0; const hasCuts = cutRanges.length > 0;
// Compute set of deleted word indices from cutRanges
const getDeletedSet = useCallback(() => {
const deletedSet = new Set<number>();
for (const range of cutRanges) {
for (let i = 0; i < words.length; i++) {
if (words[i].start >= range.start && words[i].end <= range.end) {
deletedSet.add(i);
}
}
}
return deletedSet;
}, [cutRanges, words]);
// Detect if input is audio-only by its extension // Detect if input is audio-only by its extension
const audioExtensions = new Set(['.wav', '.mp3', '.flac', '.m4a', '.ogg', '.aac', '.wma']); const audioExtensions = new Set(['.wav', '.mp3', '.flac', '.m4a', '.ogg', '.aac', '.wma']);
const inputExt = videoPath ? '.' + videoPath.split('.').pop()?.toLowerCase() : ''; const inputExt = videoPath ? '.' + videoPath.split('.').pop()?.toLowerCase() : '';
@ -46,14 +59,7 @@ export default function ExportDialog() {
setIsTranscribingTranscript(true); setIsTranscribingTranscript(true);
try { try {
// Compute deleted word set // Compute deleted word set
const deletedSet = new Set<number>(); const deletedSet = getDeletedSet();
for (const range of cutRanges) {
for (let i = 0; i < words.length; i++) {
if (words[i].start >= range.start && words[i].end <= range.end) {
deletedSet.add(i);
}
}
}
// Generate content entirely on the frontend — no backend needed // Generate content entirely on the frontend — no backend needed
let content: string; let content: string;
@ -103,7 +109,7 @@ export default function ExportDialog() {
} finally { } finally {
setIsTranscribingTranscript(false); setIsTranscribingTranscript(false);
} }
}, [videoPath, words, cutRanges, transcriptFormat]); }, [videoPath, words, getDeletedSet, transcriptFormat]);
const HANDLE_EXPORT_filters = useCallback(() => { const HANDLE_EXPORT_filters = useCallback(() => {
const ext = options.format; const ext = options.format;
@ -130,14 +136,7 @@ export default function ExportDialog() {
setExportError(null); setExportError(null);
try { try {
const keepSegments = getKeepSegments(); const keepSegments = getKeepSegments();
const deletedSet = getDeletedSet();
const deletedSet = new Set<number>();
for (const range of cutRanges) {
for (let i = 0; i < words.length; i++) {
const w = words[i];
if (w.start >= range.start && w.end <= range.end) deletedSet.add(i);
}
}
// Map frontend camelCase gain/speed fields to backend snake_case // Map frontend camelCase gain/speed fields to backend snake_case
const backendGainRanges = gainRanges.map((r) => ({ const backendGainRanges = gainRanges.map((r) => ({
@ -213,7 +212,7 @@ export default function ExportDialog() {
setExportError(err instanceof Error ? err.message : 'Export failed'); setExportError(err instanceof Error ? err.message : 'Export failed');
setExporting(false); setExporting(false);
} }
}, [videoPath, options, backendUrl, setExporting, getKeepSegments, cutRanges, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]); }, [videoPath, options, backendUrl, setExporting, getKeepSegments, getDeletedSet, muteRanges, gainRanges, speedRanges, globalGainDb, words, HANDLE_EXPORT_filters, additionalClips, backgroundMusic]);
return ( return (
<div className="p-4 space-y-5"> <div className="p-4 space-y-5">

View File

@ -287,10 +287,9 @@ export default function WaveformTimeline({
const [showAdjustedTimeline, setShowAdjustedTimeline] = useState(false); const [showAdjustedTimeline, setShowAdjustedTimeline] = useState(false);
const sourceDuration = duration || waveformDataRef.current?.duration || 0; const sourceDuration = duration || waveformDataRef.current?.duration || 0;
const timelineCutRanges = showAdjustedTimeline ? cutRanges : [];
const { segments: timelineSegments, displayDuration } = useMemo( const { segments: timelineSegments, displayDuration } = useMemo(
() => buildTimelineSegments(sourceDuration, timelineCutRanges), () => buildTimelineSegments(sourceDuration, showAdjustedTimeline ? cutRanges : []),
[sourceDuration, timelineCutRanges], [sourceDuration, cutRanges, showAdjustedTimeline],
); );
useEffect(() => { useEffect(() => {
@ -687,7 +686,6 @@ export default function WaveformTimeline({
gainMode, gainMode,
speedMode, speedMode,
selectedZone, selectedZone,
showAdjustedTimeline,
markInTime, markInTime,
markOutTime, markOutTime,
displayDuration, displayDuration,
@ -696,6 +694,7 @@ export default function WaveformTimeline({
showGainZones, showGainZones,
showSpeedZones, showSpeedZones,
timelineSegments, timelineSegments,
timelineMarkers,
]); ]);
// Keep the ref in sync with the latest drawStaticWaveform closure // Keep the ref in sync with the latest drawStaticWaveform closure