""" Subtitle export utilities for the OBS Recording Transcriber. Supports exporting transcripts to SRT, ASS, and WebVTT subtitle formats. """ from pathlib import Path import re from datetime import timedelta import gzip import zipfile import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def format_timestamp_srt(timestamp_ms): """ Format a timestamp in milliseconds to SRT format (HH:MM:SS,mmm). Args: timestamp_ms (int): Timestamp in milliseconds Returns: str: Formatted timestamp string """ hours, remainder = divmod(timestamp_ms, 3600000) minutes, remainder = divmod(remainder, 60000) seconds, milliseconds = divmod(remainder, 1000) return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}" def format_timestamp_ass(timestamp_ms): """ Format a timestamp in milliseconds to ASS format (H:MM:SS.cc). Args: timestamp_ms (int): Timestamp in milliseconds Returns: str: Formatted timestamp string """ hours, remainder = divmod(timestamp_ms, 3600000) minutes, remainder = divmod(remainder, 60000) seconds, remainder = divmod(remainder, 1000) centiseconds = remainder // 10 return f"{int(hours)}:{int(minutes):02d}:{int(seconds):02d}.{int(centiseconds):02d}" def format_timestamp_vtt(timestamp_ms): """ Format a timestamp in milliseconds to WebVTT format (HH:MM:SS.mmm). Args: timestamp_ms (int): Timestamp in milliseconds Returns: str: Formatted timestamp string """ hours, remainder = divmod(timestamp_ms, 3600000) minutes, remainder = divmod(remainder, 60000) seconds, milliseconds = divmod(remainder, 1000) return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}.{int(milliseconds):03d}" def export_to_srt(segments, output_path): """ Export transcript segments to SRT format. Args: segments (list): List of transcript segments with start, end, and text output_path (Path): Path to save the SRT file Returns: Path: Path to the saved SRT file """ with open(output_path, 'w', encoding='utf-8') as f: for i, segment in enumerate(segments, 1): start_time = format_timestamp_srt(int(segment['start'] * 1000)) end_time = format_timestamp_srt(int(segment['end'] * 1000)) f.write(f"{i}\n") f.write(f"{start_time} --> {end_time}\n") f.write(f"{segment['text'].strip()}\n\n") return output_path def export_to_ass(segments, output_path, video_width=1920, video_height=1080, style=None): """ Export transcript segments to ASS format with styling. Args: segments (list): List of transcript segments with start, end, and text output_path (Path): Path to save the ASS file video_width (int): Width of the video in pixels video_height (int): Height of the video in pixels style (dict, optional): Custom style parameters Returns: Path: Path to the saved ASS file """ # Default style default_style = { "fontname": "Arial", "fontsize": "48", "primary_color": "&H00FFFFFF", # White "secondary_color": "&H000000FF", # Blue "outline_color": "&H00000000", # Black "back_color": "&H80000000", # Semi-transparent black "bold": "-1", # True "italic": "0", # False "alignment": "2", # Bottom center } # Apply custom style if provided if style: default_style.update(style) # ASS header template ass_header = f"""[Script Info] Title: Transcription ScriptType: v4.00+ WrapStyle: 0 PlayResX: {video_width} PlayResY: {video_height} ScaledBorderAndShadow: yes [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: Default,{default_style['fontname']},{default_style['fontsize']},{default_style['primary_color']},{default_style['secondary_color']},{default_style['outline_color']},{default_style['back_color']},{default_style['bold']},{default_style['italic']},0,0,100,100,0,0,1,2,2,{default_style['alignment']},10,10,10,1 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text """ with open(output_path, 'w', encoding='utf-8') as f: f.write(ass_header) for segment in segments: start_time = format_timestamp_ass(int(segment['start'] * 1000)) end_time = format_timestamp_ass(int(segment['end'] * 1000)) text = segment['text'].strip().replace('\n', '\\N') f.write(f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{text}\n") return output_path def export_to_vtt(segments, output_path): """ Export transcript segments to WebVTT format. Args: segments (list): List of transcript segments with start, end, and text output_path (Path): Path to save the WebVTT file Returns: Path: Path to the saved WebVTT file """ with open(output_path, 'w', encoding='utf-8') as f: # WebVTT header f.write("WEBVTT\n\n") for i, segment in enumerate(segments, 1): start_time = format_timestamp_vtt(int(segment['start'] * 1000)) end_time = format_timestamp_vtt(int(segment['end'] * 1000)) # Optional cue identifier f.write(f"{i}\n") f.write(f"{start_time} --> {end_time}\n") f.write(f"{segment['text'].strip()}\n\n") return output_path def transcript_to_segments(transcript, segment_duration=5.0): """ Convert a plain transcript to timed segments for subtitle export. Used when the original segments are not available. Args: transcript (str): Full transcript text segment_duration (float): Duration of each segment in seconds Returns: list: List of segments with start, end, and text """ # Split transcript into sentences sentences = re.split(r'(?<=[.!?])\s+', transcript) segments = [] current_time = 0.0 for sentence in sentences: if not sentence.strip(): continue # Estimate duration based on word count (approx. 2.5 words per second) word_count = len(sentence.split()) duration = max(2.0, word_count / 2.5) segments.append({ 'start': current_time, 'end': current_time + duration, 'text': sentence }) current_time += duration return segments def compress_file(input_path, compression_type='gzip'): """ Compress a file using the specified compression method. Args: input_path (Path): Path to the file to compress compression_type (str): Type of compression ('gzip' or 'zip') Returns: Path: Path to the compressed file """ input_path = Path(input_path) if compression_type == 'gzip': output_path = input_path.with_suffix(input_path.suffix + '.gz') with open(input_path, 'rb') as f_in: with gzip.open(output_path, 'wb') as f_out: f_out.write(f_in.read()) return output_path elif compression_type == 'zip': output_path = input_path.with_suffix('.zip') with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf: zipf.write(input_path, arcname=input_path.name) return output_path else: logger.warning(f"Unsupported compression type: {compression_type}") return input_path def export_transcript(transcript, output_path, format_type='srt', segments=None, compress=False, compression_type='gzip', style=None): """ Export transcript to the specified subtitle format. Args: transcript (str): Full transcript text output_path (Path): Base path for the output file (without extension) format_type (str): 'srt', 'ass', or 'vtt' segments (list, optional): List of transcript segments with timing information compress (bool): Whether to compress the output file compression_type (str): Type of compression ('gzip' or 'zip') style (dict, optional): Custom style parameters for ASS format Returns: Path: Path to the saved subtitle file """ output_path = Path(output_path) # If segments are not provided, create them from the transcript if segments is None: segments = transcript_to_segments(transcript) if format_type.lower() == 'srt': output_file = output_path.with_suffix('.srt') result_path = export_to_srt(segments, output_file) elif format_type.lower() == 'ass': output_file = output_path.with_suffix('.ass') result_path = export_to_ass(segments, output_file, style=style) elif format_type.lower() == 'vtt': output_file = output_path.with_suffix('.vtt') result_path = export_to_vtt(segments, output_file) else: raise ValueError(f"Unsupported format type: {format_type}. Use 'srt', 'ass', or 'vtt'.") # Compress the file if requested if compress: result_path = compress_file(result_path, compression_type) return result_path