summary refs log tree commit diff
path: root/src/subtitle_extraction/whisper.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/subtitle_extraction/whisper.rs')
-rw-r--r--src/subtitle_extraction/whisper.rs143
1 files changed, 0 insertions, 143 deletions
diff --git a/src/subtitle_extraction/whisper.rs b/src/subtitle_extraction/whisper.rs
deleted file mode 100644
index ffa2e47..0000000
--- a/src/subtitle_extraction/whisper.rs
+++ /dev/null
@@ -1,143 +0,0 @@
-use std::{
-    io::{self, BufRead, BufReader},
-    net::{TcpListener, TcpStream},
-    sync::mpsc,
-};
-
-use anyhow::Context;
-use ffmpeg::{filter, frame};
-use serde::Deserialize;
-
-use crate::{subtitle_extraction::*, tracks::StreamIndex};
-
-#[derive(Debug, Deserialize)]
-struct WhisperCue {
-    start: u64,
-    end: u64,
-    text: String,
-}
-
-pub fn generate_whisper_subtitles(
-    // stream index to use when storing generated subtitles, this index
-    // already has to be in TRACKS when this function is called!
-    stream_ix: StreamIndex,
-    context: ffmpeg::codec::Context,
-    time_base: ffmpeg::Rational,
-    packet_rx: mpsc::Receiver<ffmpeg::Packet>,
-    sender: ComponentSender<SubtitleExtractor>,
-) -> anyhow::Result<()> {
-    // FFmpeg's whisper filter will send the generated subtitles to us as JSON
-    // objects over a TCP socket. This is the best solution I could find
-    // because we need to use one of the protocols in
-    // https://ffmpeg.org/ffmpeg-protocols.html, and TCP is the only one on the
-    // list which is portable and supports non-blocking IO in Rust.
-    let tcp_listener = TcpListener::bind("127.0.0.1:0")?;
-
-    let mut decoder = context
-        .decoder()
-        .audio()
-        .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
-
-    let mut filter = filter::Graph::new();
-
-    let abuffer_args = format!(
-        "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
-        time_base,
-        decoder.rate(),
-        decoder.format().name(),
-        decoder.channel_layout().bits()
-    );
-
-    let whisper_args = format!(
-        "model={}:queue={}:destination=tcp\\\\://127.0.0.1\\\\:{}:format=json",
-        "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin",
-        30,
-        tcp_listener.local_addr()?.port()
-    );
-    let filter_spec = format!("[src] whisper={} [sink]", whisper_args);
-
-    filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?;
-    filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?;
-    filter
-        .output("src", 0)?
-        .input("sink", 0)?
-        .parse(&filter_spec)?;
-    filter.validate()?;
-
-    let mut source_ctx = filter.get("src").unwrap();
-    let mut sink_ctx = filter.get("sink").unwrap();
-
-    let (tcp_stream, _) = tcp_listener.accept()?;
-    tcp_stream.set_nonblocking(true)?;
-
-    let mut transcript_reader = BufReader::new(tcp_stream);
-    let mut line_buf = String::new();
-
-    while let Ok(packet) = packet_rx.recv() {
-        handle_packet(
-            stream_ix,
-            &sender,
-            &mut decoder,
-            source_ctx.source(),
-            sink_ctx.sink(),
-            &mut transcript_reader,
-            &mut line_buf,
-            packet,
-        )
-        .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e))
-    }
-
-    Ok(())
-}
-
-// TODO: can we do this without passing all the arguments? this is kinda ugly
-fn handle_packet(
-    stream_ix: StreamIndex,
-    sender: &ComponentSender<SubtitleExtractor>,
-    decoder: &mut ffmpeg::decoder::Audio,
-    mut source: filter::Source,
-    mut sink: filter::Sink,
-    transcript_reader: &mut BufReader<TcpStream>,
-    line_buf: &mut String,
-    packet: ffmpeg::Packet,
-) -> anyhow::Result<()> {
-    decoder.send_packet(&packet)?;
-
-    let mut decoded = frame::Audio::empty();
-    while decoder.receive_frame(&mut decoded).is_ok() {
-        source.add(&decoded)?;
-    }
-
-    let mut out_frame = frame::Audio::empty();
-    while sink.frame(&mut out_frame).is_ok() {}
-
-    line_buf.clear();
-    match transcript_reader.read_line(line_buf) {
-        Ok(_) => {
-            let whisper_cue: WhisperCue = serde_json::from_str(&line_buf)?;
-
-            let cue = SubtitleCue {
-                start: gst::ClockTime::from_mseconds(whisper_cue.start),
-                end: gst::ClockTime::from_mseconds(whisper_cue.end),
-                text: whisper_cue.text,
-            };
-
-            // TODO deduplicate this vs. the code in embedded.rs
-            SUBTITLE_TRACKS
-                .write()
-                .get_mut(&stream_ix)
-                .unwrap()
-                .cues
-                .push(cue.clone());
-            sender
-                .output(SubtitleExtractorOutput::NewCue(stream_ix, cue))
-                .unwrap();
-
-            Ok(())
-        }
-        Err(e) => match e.kind() {
-            io::ErrorKind::WouldBlock => Ok(()),
-            _ => Err(e)?,
-        },
-    }
-}