initial commit

2023-03-26 17:31:42 -04:00
commit e3b5b090fb
51 changed files with 4222 additions and 0 deletions
--- a/LibWhisper/CaptureDevice.swift
+++ b/LibWhisper/CaptureDevice.swift
@@ -0,0 +1,43 @@
+public enum CaptureDeviceError: Error {
+    case sdlErrorCode(Int32)
+}
+
+public struct CaptureDevice {
+    public let id: Int32
+    public let name: String
+    
+    public init(id: Int32, name: String) {
+        self.id = id
+        self.name = name
+    }
+    
+    public static var devices: [CaptureDevice] {
+        get throws {
+            var devices = [CaptureDevice]()
+            
+            let result = SDL_Init(SDL_INIT_AUDIO)
+            if result < 0 {
+                throw CaptureDeviceError.sdlErrorCode(result)
+            }
+            
+            for i in 0..<SDL_GetNumAudioDevices(1) {
+                let name = String(cString: SDL_GetAudioDeviceName(i, 1))
+                devices.append(CaptureDevice(id: i, name: name))
+            }
+            
+            return devices
+        }
+    }
+}
+
+extension CaptureDevice: Equatable {
+    public static func == (lhs: Self, rhs: Self) -> Bool {
+        return lhs.id == rhs.id
+    }
+}
+
+extension CaptureDevice: Hashable {
+    public func hash(into hasher: inout Hasher) {
+        hasher.combine(id)
+    }
+}
--- a/LibWhisper/LibWhisper.h
+++ b/LibWhisper/LibWhisper.h
@@ -0,0 +1,15 @@
+#import <Foundation/Foundation.h>
+
+//! Project version number for LibWhisper.
+FOUNDATION_EXPORT double LibWhisperVersionNumber;
+
+//! Project version string for LibWhisper.
+FOUNDATION_EXPORT const unsigned char LibWhisperVersionString[];
+
+// SDL functions used in CaptureDevice
+#define SDL_INIT_AUDIO 0x00000010u
+extern int SDL_Init(uint32_t flags);
+extern int SDL_GetNumAudioDevices(int iscapture);
+extern const char * SDL_GetAudioDeviceName(int index, int iscapture);
+
+#import "stream.h"
--- a/LibWhisper/SDL.h
+++ b/LibWhisper/SDL.h
@@ -0,0 +1,233 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2023 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+/**
+ *  \file SDL.h
+ *
+ *  Main include header for the SDL library
+ */
+
+
+#ifndef SDL_h_
+#define SDL_h_
+
+#include "SDL_main.h"
+#include "SDL_stdinc.h"
+#include "SDL_assert.h"
+#include "SDL_atomic.h"
+#include "SDL_audio.h"
+#include "SDL_clipboard.h"
+#include "SDL_cpuinfo.h"
+#include "SDL_endian.h"
+#include "SDL_error.h"
+#include "SDL_events.h"
+#include "SDL_filesystem.h"
+#include "SDL_gamecontroller.h"
+#include "SDL_guid.h"
+#include "SDL_haptic.h"
+#include "SDL_hidapi.h"
+#include "SDL_hints.h"
+#include "SDL_joystick.h"
+#include "SDL_loadso.h"
+#include "SDL_log.h"
+#include "SDL_messagebox.h"
+#include "SDL_metal.h"
+#include "SDL_mutex.h"
+#include "SDL_power.h"
+#include "SDL_render.h"
+#include "SDL_rwops.h"
+#include "SDL_sensor.h"
+#include "SDL_shape.h"
+#include "SDL_system.h"
+#include "SDL_thread.h"
+#include "SDL_timer.h"
+#include "SDL_version.h"
+#include "SDL_video.h"
+#include "SDL_locale.h"
+#include "SDL_misc.h"
+
+#include "begin_code.h"
+/* Set up for C function definitions, even when using C++ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* As of version 0.5, SDL is loaded dynamically into the application */
+
+/**
+ *  \name SDL_INIT_*
+ *
+ *  These are the flags which may be passed to SDL_Init().  You should
+ *  specify the subsystems which you will be using in your application.
+ */
+/* @{ */
+#define SDL_INIT_TIMER          0x00000001u
+#define SDL_INIT_AUDIO          0x00000010u
+#define SDL_INIT_VIDEO          0x00000020u  /**< SDL_INIT_VIDEO implies SDL_INIT_EVENTS */
+#define SDL_INIT_JOYSTICK       0x00000200u  /**< SDL_INIT_JOYSTICK implies SDL_INIT_EVENTS */
+#define SDL_INIT_HAPTIC         0x00001000u
+#define SDL_INIT_GAMECONTROLLER 0x00002000u  /**< SDL_INIT_GAMECONTROLLER implies SDL_INIT_JOYSTICK */
+#define SDL_INIT_EVENTS         0x00004000u
+#define SDL_INIT_SENSOR         0x00008000u
+#define SDL_INIT_NOPARACHUTE    0x00100000u  /**< compatibility; this flag is ignored. */
+#define SDL_INIT_EVERYTHING ( \
+                SDL_INIT_TIMER | SDL_INIT_AUDIO | SDL_INIT_VIDEO | SDL_INIT_EVENTS | \
+                SDL_INIT_JOYSTICK | SDL_INIT_HAPTIC | SDL_INIT_GAMECONTROLLER | SDL_INIT_SENSOR \
+            )
+/* @} */
+
+/**
+ * Initialize the SDL library.
+ *
+ * SDL_Init() simply forwards to calling SDL_InitSubSystem(). Therefore, the
+ * two may be used interchangeably. Though for readability of your code
+ * SDL_InitSubSystem() might be preferred.
+ *
+ * The file I/O (for example: SDL_RWFromFile) and threading (SDL_CreateThread)
+ * subsystems are initialized by default. Message boxes
+ * (SDL_ShowSimpleMessageBox) also attempt to work without initializing the
+ * video subsystem, in hopes of being useful in showing an error dialog when
+ * SDL_Init fails. You must specifically initialize other subsystems if you
+ * use them in your application.
+ *
+ * Logging (such as SDL_Log) works without initialization, too.
+ *
+ * `flags` may be any of the following OR'd together:
+ *
+ * - `SDL_INIT_TIMER`: timer subsystem
+ * - `SDL_INIT_AUDIO`: audio subsystem
+ * - `SDL_INIT_VIDEO`: video subsystem; automatically initializes the events
+ *   subsystem
+ * - `SDL_INIT_JOYSTICK`: joystick subsystem; automatically initializes the
+ *   events subsystem
+ * - `SDL_INIT_HAPTIC`: haptic (force feedback) subsystem
+ * - `SDL_INIT_GAMECONTROLLER`: controller subsystem; automatically
+ *   initializes the joystick subsystem
+ * - `SDL_INIT_EVENTS`: events subsystem
+ * - `SDL_INIT_EVERYTHING`: all of the above subsystems
+ * - `SDL_INIT_NOPARACHUTE`: compatibility; this flag is ignored
+ *
+ * Subsystem initialization is ref-counted, you must call SDL_QuitSubSystem()
+ * for each SDL_InitSubSystem() to correctly shutdown a subsystem manually (or
+ * call SDL_Quit() to force shutdown). If a subsystem is already loaded then
+ * this call will increase the ref-count and return.
+ *
+ * \param flags subsystem initialization flags
+ * \returns 0 on success or a negative error code on failure; call
+ *          SDL_GetError() for more information.
+ *
+ * \since This function is available since SDL 2.0.0.
+ *
+ * \sa SDL_InitSubSystem
+ * \sa SDL_Quit
+ * \sa SDL_SetMainReady
+ * \sa SDL_WasInit
+ */
+extern DECLSPEC int SDLCALL SDL_Init(Uint32 flags);
+
+/**
+ * Compatibility function to initialize the SDL library.
+ *
+ * In SDL2, this function and SDL_Init() are interchangeable.
+ *
+ * \param flags any of the flags used by SDL_Init(); see SDL_Init for details.
+ * \returns 0 on success or a negative error code on failure; call
+ *          SDL_GetError() for more information.
+ *
+ * \since This function is available since SDL 2.0.0.
+ *
+ * \sa SDL_Init
+ * \sa SDL_Quit
+ * \sa SDL_QuitSubSystem
+ */
+extern DECLSPEC int SDLCALL SDL_InitSubSystem(Uint32 flags);
+
+/**
+ * Shut down specific SDL subsystems.
+ *
+ * If you start a subsystem using a call to that subsystem's init function
+ * (for example SDL_VideoInit()) instead of SDL_Init() or SDL_InitSubSystem(),
+ * SDL_QuitSubSystem() and SDL_WasInit() will not work. You will need to use
+ * that subsystem's quit function (SDL_VideoQuit()) directly instead. But
+ * generally, you should not be using those functions directly anyhow; use
+ * SDL_Init() instead.
+ *
+ * You still need to call SDL_Quit() even if you close all open subsystems
+ * with SDL_QuitSubSystem().
+ *
+ * \param flags any of the flags used by SDL_Init(); see SDL_Init for details.
+ *
+ * \since This function is available since SDL 2.0.0.
+ *
+ * \sa SDL_InitSubSystem
+ * \sa SDL_Quit
+ */
+extern DECLSPEC void SDLCALL SDL_QuitSubSystem(Uint32 flags);
+
+/**
+ * Get a mask of the specified subsystems which are currently initialized.
+ *
+ * \param flags any of the flags used by SDL_Init(); see SDL_Init for details.
+ * \returns a mask of all initialized subsystems if `flags` is 0, otherwise it
+ *          returns the initialization status of the specified subsystems.
+ *
+ *          The return value does not include SDL_INIT_NOPARACHUTE.
+ *
+ * \since This function is available since SDL 2.0.0.
+ *
+ * \sa SDL_Init
+ * \sa SDL_InitSubSystem
+ */
+extern DECLSPEC Uint32 SDLCALL SDL_WasInit(Uint32 flags);
+
+/**
+ * Clean up all initialized subsystems.
+ *
+ * You should call this function even if you have already shutdown each
+ * initialized subsystem with SDL_QuitSubSystem(). It is safe to call this
+ * function even in the case of errors in initialization.
+ *
+ * If you start a subsystem using a call to that subsystem's init function
+ * (for example SDL_VideoInit()) instead of SDL_Init() or SDL_InitSubSystem(),
+ * then you must use that subsystem's quit function (SDL_VideoQuit()) to shut
+ * it down before calling SDL_Quit(). But generally, you should not be using
+ * those functions directly anyhow; use SDL_Init() instead.
+ *
+ * You can use this function with atexit() to ensure that it is run when your
+ * application is shutdown, but it is not wise to do this from a library or
+ * other dynamically loaded code.
+ *
+ * \since This function is available since SDL 2.0.0.
+ *
+ * \sa SDL_Init
+ * \sa SDL_QuitSubSystem
+ */
+extern DECLSPEC void SDLCALL SDL_Quit(void);
+
+/* Ends C function definitions when using C++ */
+#ifdef __cplusplus
+}
+#endif
+#include "close_code.h"
+
+#endif /* SDL_h_ */
+
+/* vi: set ts=4 sw=4 expandtab: */
--- a/LibWhisper/WhisperStream.swift
+++ b/LibWhisper/WhisperStream.swift
@@ -0,0 +1,98 @@
+import AVFoundation
+
+public struct Segment {
+    let text: String
+    let t0: Int64
+    let t1: Int64
+}
+
+public typealias OrderedSegments = [Segment]
+
+public extension OrderedSegments {
+    var text: any StringProtocol {
+        map { $0.text }.joined()
+    }
+}
+
+public class WhisperStream: Thread {
+    let waiter = DispatchGroup()
+    
+    @Published public private(set) var segments = OrderedSegments()
+    @Published public private(set) var alive = true
+    
+    let model: URL
+    let device: CaptureDevice?
+    let window: TimeInterval
+    
+    public init(model: URL, device: CaptureDevice? = nil, window: TimeInterval = 300) {
+        self.model = model
+        self.device = device
+        self.window = window
+        super.init()
+    }
+    
+    public override func start() {
+        waiter.enter()
+        super.start()
+    }
+    
+    public override func main() {
+        task()
+        waiter.leave()
+    }
+    
+    public func join() {
+        waiter.wait()
+    }
+    
+    func task() {
+        model.path.withCString { modelCStr in
+            var params = stream_default_params()
+            params.model = modelCStr
+            
+            if let device = device {
+                params.capture_id = device.id
+            }
+            
+            let ctx = stream_init(params)
+            if ctx == nil {
+                return
+            }
+            
+            while !self.isCancelled {
+                let errno = stream_run(ctx, Unmanaged.passUnretained(self).toOpaque()) {
+                    return Unmanaged<WhisperStream>.fromOpaque($3!).takeUnretainedValue().callback(
+                        text: $0 != nil ? String(cString: $0!) : nil,
+                        t0: $1,
+                        t1: $2
+                    )
+                }
+                if errno != 0 {
+                    break
+                }
+            }
+            
+            stream_free(ctx)
+            alive = false
+        }
+    }
+    
+    func callback(text: String?, t0: Int64, t1: Int64) -> Int32 {
+        if segments.isEmpty || text == nil {
+            segments.append(Segment(text: "", t0: -1, t1: -1))
+        }
+        if let text = text {
+            segments[segments.count - 1] = Segment(text: text, t0: t0, t1: t1)
+        }
+        
+        var k = 0
+        for segment in segments {
+            if let last = segments.last, last.t0 - segment.t0 > Int64(window * 1000) {
+                k += 1
+            }
+        }
+        segments.removeFirst(k)
+        
+        return 0
+    }
+}
--- a/LibWhisper/stream.cpp
+++ b/LibWhisper/stream.cpp
@@ -0,0 +1,240 @@
+// This code is based on the streaming example provided with whisper.cpp:
+// https://github.com/ggerganov/whisper.cpp/blob/ca21f7ab16694384fb74b1ba4f68b39f16540d23/examples/stream/stream.cpp
+
+#include "common.h"
+#include "common-sdl.h"
+#include "whisper.h"
+#include "stream.h"
+
+#include <cassert>
+#include <cstdio>
+#include <string>
+#include <thread>
+#include <vector>
+#include <fstream>
+
+using unique_whisper = std::unique_ptr<whisper_context, std::integral_constant<decltype(&whisper_free), &whisper_free>>;
+
+struct stream_context {
+    stream_params params;
+    std::unique_ptr<audio_async> audio;
+    unique_whisper whisper;
+    std::vector<float> pcmf32;
+    std::vector<float> pcmf32_old;
+    std::vector<float> pcmf32_new;
+    std::vector<whisper_token> prompt_tokens;
+    std::chrono::time_point<std::chrono::high_resolution_clock> t_last;
+    std::chrono::time_point<std::chrono::high_resolution_clock> t_start;
+    int n_samples_step;
+    int n_samples_len;
+    int n_samples_keep;
+    bool use_vad;
+    int n_new_line;
+    int n_iter = 0;
+};
+
+struct stream_params stream_default_params() {
+    return stream_params {
+        /* .n_threads     =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+        /* .step_ms       =*/ 3000,
+        /* .length_ms     =*/ 10000,
+        /* .keep_ms       =*/ 200,
+        /* .capture_id    =*/ -1,
+        /* .max_tokens    =*/ 32,
+        /* .audio_ctx     =*/ 0,
+
+        /* .vad_thold     =*/ 0.6f,
+        /* .freq_thold    =*/ 100.0f,
+
+        /* .speed_up      =*/ false,
+        /* .translate     =*/ false,
+        /* .print_special =*/ false,
+        /* .no_context    =*/ true,
+        /* .no_timestamps =*/ false,
+
+        /* .language      =*/ "en",
+        /* .model         =*/ "models/ggml-base.en.bin"
+    };
+}
+
+stream_context *stream_init(stream_params params) {
+    auto ctx = std::make_unique<stream_context>();
+
+    params.keep_ms = std::min(params.keep_ms, params.step_ms);
+    params.length_ms = std::max(params.length_ms, params.step_ms);
+
+    ctx->n_samples_step = (1e-3 * params.step_ms) * WHISPER_SAMPLE_RATE;
+    ctx->n_samples_len = (1e-3 * params.length_ms) * WHISPER_SAMPLE_RATE;
+    ctx->n_samples_keep = (1e-3 * params.keep_ms) * WHISPER_SAMPLE_RATE;
+    const int n_samples_30s = (1e-3 * 30000.0) * WHISPER_SAMPLE_RATE;
+
+    ctx->use_vad = ctx->n_samples_step <= 0; // sliding window mode uses VAD
+
+    ctx->n_new_line = !ctx->use_vad ? std::max(1, params.length_ms / params.step_ms - 1) : 1; // number of steps to print new line
+
+    params.no_timestamps = !ctx->use_vad;
+    params.no_context |= ctx->use_vad;
+    params.max_tokens = 0;
+
+    // init audio
+    ctx->audio = std::make_unique<audio_async>(params.length_ms);
+    if (!ctx->audio->init(params.capture_id, WHISPER_SAMPLE_RATE)) {
+        fprintf(stderr, "%s: audio.init() failed!\n", __func__);
+        return NULL;
+    }
+
+    ctx->audio->resume();
+
+    // whisper init
+    if (whisper_lang_id(params.language) == -1) {
+        fprintf(stderr, "%s: unknown language '%s'\n", __func__, params.language);
+        return NULL;
+    }
+
+    if ((ctx->whisper = unique_whisper(whisper_init_from_file(params.model))) == NULL) {
+        return NULL;
+    }
+
+    ctx->pcmf32 = std::vector<float>(n_samples_30s, 0.0f);
+    ctx->pcmf32_new = std::vector<float>(n_samples_30s, 0.0f);
+
+    ctx->t_last = std::chrono::high_resolution_clock::now();
+    ctx->t_start = ctx->t_last;
+
+    ctx->params = params;
+
+    return ctx.release();
+}
+
+void stream_free(stream_context *ctx) {
+    ctx->audio = NULL;
+    ctx->whisper = NULL;
+    ctx->pcmf32.clear();
+    ctx->pcmf32_old.clear();
+    ctx->pcmf32_new.clear();
+    ctx->prompt_tokens.clear();
+}
+
+int stream_run(stream_context *ctx, void *callback_ctx, stream_callback_t callback) {
+    auto params = ctx->params;
+    auto whisper = ctx->whisper.get();
+
+    auto t_now = std::chrono::high_resolution_clock::now();
+
+    if (!ctx->use_vad) {
+        while (true) {
+            ctx->audio->get(params.step_ms, ctx->pcmf32_new);
+
+            if ((int)ctx->pcmf32_new.size() > 2 * ctx->n_samples_step) {
+                fprintf(stderr, "\n\n%s: WARNING: cannot process audio fast enough, dropping audio ...\n\n", __func__);
+                ctx->audio->clear();
+                continue;
+            }
+
+            if ((int)ctx->pcmf32_new.size() >= ctx->n_samples_step) {
+                ctx->audio->clear();
+                break;
+            }
+
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        }
+
+        const int n_samples_new = ctx->pcmf32_new.size();
+
+        // take up to params.length_ms audio from previous iteration
+        const int n_samples_take = std::min((int)ctx->pcmf32_old.size(), std::max(0, ctx->n_samples_keep + ctx->n_samples_len - n_samples_new));
+
+        ctx->pcmf32.resize(n_samples_new + n_samples_take);
+
+        for (int i = 0; i < n_samples_take; i++) {
+            ctx->pcmf32[i] = ctx->pcmf32_old[ctx->pcmf32_old.size() - n_samples_take + i];
+        }
+
+        memcpy(ctx->pcmf32.data() + n_samples_take, ctx->pcmf32_new.data(), n_samples_new * sizeof(float));
+
+        ctx->pcmf32_old = ctx->pcmf32;
+    } else {
+        auto t_diff = std::chrono::duration_cast<std::chrono::milliseconds>(t_now - ctx->t_last).count();
+        if (t_diff < 2000) {
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+            return 0;
+        }
+        
+        // process new audio
+        ctx->audio->get(2000, ctx->pcmf32_new);
+        
+        if (::vad_simple(ctx->pcmf32_new, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, false)) {
+            ctx->audio->get(params.length_ms, ctx->pcmf32);
+        } else {
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+            return 0;
+        }
+        
+        ctx->t_last = t_now;
+    }
+
+    // run the inference
+    whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+
+    wparams.print_progress = false;
+    wparams.print_special = params.print_special;
+    wparams.print_realtime = false;
+    wparams.print_timestamps = !params.no_timestamps;
+    wparams.translate = params.translate;
+    wparams.no_context = true;
+    wparams.single_segment = !ctx->use_vad;
+    wparams.max_tokens = params.max_tokens;
+    wparams.language = params.language;
+    wparams.n_threads = params.n_threads;
+
+    wparams.audio_ctx = params.audio_ctx;
+    wparams.speed_up = params.speed_up;
+
+    // disable temperature fallback
+    wparams.temperature_inc = -1.0f;
+
+    wparams.prompt_tokens = params.no_context ? nullptr : ctx->prompt_tokens.data();
+    wparams.prompt_n_tokens = params.no_context ? 0 : ctx->prompt_tokens.size();
+
+    const int64_t t1 = (t_now - ctx->t_start).count() / 1000000;
+    const int64_t t0 = std::max(0.0, t1 - ctx->pcmf32.size() * 1000.0 / WHISPER_SAMPLE_RATE);
+
+    if (whisper_full(whisper, wparams, ctx->pcmf32.data(), ctx->pcmf32.size()) != 0) {
+        fprintf(stderr, "%s: failed to process audio\n", __func__);
+        return 6;
+    }
+
+    const int n_segments = whisper_full_n_segments(whisper);
+    for (int i = 0; i < n_segments; ++i) {
+        const char *text = whisper_full_get_segment_text(whisper, i);
+
+        const int64_t segment_t0 = whisper_full_get_segment_t0(whisper, i);
+        const int64_t segment_t1 = whisper_full_get_segment_t1(whisper, i);
+
+        callback(text, ctx->use_vad ? segment_t0 : t0, ctx->use_vad ? segment_t1 : t1, callback_ctx);
+    }
+
+    ++ctx->n_iter;
+
+    if (!ctx->use_vad && (ctx->n_iter % ctx->n_new_line) == 0) {
+        callback(NULL, 0, 0, callback_ctx);
+
+        // keep part of the audio for next iteration to try to mitigate word boundary issues
+        ctx->pcmf32_old = std::vector<float>(ctx->pcmf32.end() - ctx->n_samples_keep, ctx->pcmf32.end());
+
+        // Add tokens of the last full length segment as the prompt
+        if (!params.no_context) {
+            ctx->prompt_tokens.clear();
+
+            const int n_segments = whisper_full_n_segments(whisper);
+            for (int i = 0; i < n_segments; ++i) {
+                const int token_count = whisper_full_n_tokens(whisper, i);
+                for (int j = 0; j < token_count; ++j) {
+                    ctx->prompt_tokens.push_back(whisper_full_get_token_id(whisper, i, j));
+                }
+            }
+        }
+    }
+
+    return 0;
+}
--- a/LibWhisper/stream.h
+++ b/LibWhisper/stream.h
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct stream_params {
+    int32_t n_threads;
+    int32_t step_ms;
+    int32_t length_ms;
+    int32_t keep_ms;
+    int32_t capture_id;
+    int32_t max_tokens;
+    int32_t audio_ctx;
+
+    float vad_thold;
+    float freq_thold;
+
+    bool speed_up;
+    bool translate;
+    bool print_special;
+    bool no_context;
+    bool no_timestamps;
+
+    const char *language;
+    const char *model;
+} stream_params_t;
+
+stream_params_t stream_default_params();
+
+typedef struct stream_context *stream_context_t;
+
+stream_context_t stream_init(stream_params_t params);
+void stream_free(stream_context_t ctx);
+
+typedef int (*stream_callback_t) (const char *text, int64_t t0, int64_t t1, void *ctx);
+int stream_run(stream_context_t ctx, void *callback_ctx, stream_callback_t callback);
+
+#ifdef __cplusplus
+}
+#endif