~zethra/deepstreamer

ref: c69963fb9065024995f16d69c840019b7d3f5699 deepstreamer/src/sync.rs -rw-r--r-- 3.2 KiB
c69963fb — zethra Set release opti stuff 9 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
use gstreamer as gst;
use gstreamer_base as gst_base;

use crossbeam_channel::{unbounded, Sender};
use deepspeech::Model;
use gst::message::Message;
use gst::structure::Structure;
use gst_base::prelude::*;
use gst_base::BaseTransform;
use serde::Serialize;
use std::path::Path;

pub enum Msg {
    Audio(Vec<i16>),
    Done(BaseTransform),
}

#[derive(Debug, Serialize)]
struct Candidate {
    text: String,
    confidence: f64,
}

#[derive(Debug, Serialize)]
enum Output {
    // Intermediate(String),
    Final(Vec<Candidate>),
}

pub type DSChan = Sender<Msg>;

// TODO error handling
pub fn deepspeech_thread() -> DSChan {
    let (s, r) = unbounded();
    std::thread::spawn(move || {
        println!("\n\n{:?}\n\n", std::env::var("DS_MODELS_DIR"));
        let model_env =
            std::env::var("DS_MODEL_PATH").expect("Invalid model path");
        let mut model = Model::load_from_files(Path::new(&model_env)).unwrap();
        if let Ok(scorer_env) = std::env::var("DS_SCORER_PATH") {
            model.enable_external_scorer(Path::new(&scorer_env));
        }

        'main: loop {
            let mut stream = model.create_stream().unwrap();
            loop {
                match r.recv() {
                    Ok(msg) => match msg {
                        Msg::Audio(buf) => {
                            stream.feed_audio(&buf);
                            // let text = stream.intermediate_decode().unwrap();
                            // if !text.is_empty() {
                            //     let text = serde_json::to_string_pretty(
                            //         &Output::Intermediate(text),
                            //     )
                            //     .unwrap();

                            // }
                        }
                        Msg::Done(element) => {
                            let meta = stream.finish_with_metadata(5).unwrap();
                            let trans = meta.transcripts();
                            // let text = stream.finish().unwrap();
                            if !trans.is_empty() {
                                let candidates = trans
                                    .iter()
                                    .map(|v| Candidate {
                                        text: v.to_string(),
                                        confidence: v.confidence(),
                                    })
                                    .collect();
                                let text = serde_json::to_string_pretty(
                                    &Output::Final(candidates),
                                )
                                .unwrap();
                                let msg = Message::new_element(
                                    Structure::builder("deepspeech")
                                        .field("text", &text)
                                        .build(),
                                )
                                .build();
                                element.post_message(&msg).unwrap();
                            }
                            break;
                        }
                    },
                    Err(_) => {
                        break 'main;
                    }
                }
            }
        }
    });
    s
}