|
| 1 | +package main |
| 2 | + |
| 3 | +/* |
| 4 | +#cgo CFLAGS: -I${SRCDIR}/../../include |
| 5 | +
|
| 6 | +// macOS (Darwin) - Universal Framework (assuming it supports both amd64 and arm64) |
| 7 | +#cgo darwin CFLAGS: -I${SRCDIR}/../../lib/macOS/ten_vad.framework/Versions/A/Headers |
| 8 | +#cgo darwin LDFLAGS: -F${SRCDIR}/../../lib/macOS -framework ten_vad -Wl,-rpath,${SRCDIR}/../../lib/macOS |
| 9 | +
|
| 10 | +// Linux AMD64 |
| 11 | +#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/../../lib/Linux/amd64 -lten_vad -Wl,-rpath,'$ORIGIN'/../../lib/Linux/amd64 |
| 12 | +
|
| 13 | +// Linux ARM64 |
| 14 | +#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/../../lib/Linux/arm64 -lten_vad -Wl,-rpath,'$ORIGIN'/../../lib/Linux/arm64 |
| 15 | +
|
| 16 | +// Windows AMD64 |
| 17 | +// For Windows, the .dll needs to be in the PATH or alongside the .exe at runtime. |
| 18 | +// The .lib file is used for linking. |
| 19 | +#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/../../lib/Windows/amd64 -lten_vad |
| 20 | +
|
| 21 | +#include "ten_vad.h" |
| 22 | +#include <stdlib.h> // Required for C.free if ever used directly for strings (not in this API but good practice) |
| 23 | +// Explicitly include headers that define C types we will use, like size_t |
| 24 | +#include <stddef.h> |
| 25 | +#include <stdint.h> |
| 26 | +*/ |
| 27 | +import "C" |
| 28 | +import ( |
| 29 | + "fmt" |
| 30 | + "runtime" |
| 31 | + "unsafe" |
| 32 | +) |
| 33 | + |
| 34 | +// VadMode defines the aggressiveness of the VAD. |
| 35 | +type VadMode int |
| 36 | + |
| 37 | +const ( |
| 38 | + // VadModeNormal is the normal mode. |
| 39 | + VadModeNormal VadMode = 0 |
| 40 | + // VadModeLowBitrate is optimized for low bitrate. |
| 41 | + VadModeLowBitrate VadMode = 1 |
| 42 | + // VadModeAggressive is the aggressive mode. |
| 43 | + VadModeAggressive VadMode = 2 |
| 44 | + // VadModeVeryAggressive is the most aggressive mode. |
| 45 | + VadModeVeryAggressive VadMode = 3 |
| 46 | +) |
| 47 | + |
| 48 | +// VadError represents an error from the TenVAD library. |
| 49 | +type VadError struct { |
| 50 | + Code int |
| 51 | + Message string |
| 52 | +} |
| 53 | + |
| 54 | +func (e *VadError) Error() string { |
| 55 | + return fmt.Sprintf("ten_vad error (code %d): %s", e.Code, e.Message) |
| 56 | +} |
| 57 | + |
| 58 | +var ( |
| 59 | + ErrVadInitFailed = &VadError{Code: -1, Message: "Initialization failed"} |
| 60 | + ErrVadInvalidSampleRate = &VadError{Code: -2, Message: "Invalid sample rate (must be 8000, 16000, 32000, or 48000 Hz)"} |
| 61 | + ErrVadInvalidFrameLength = &VadError{Code: -3, Message: "Invalid frame length (must be 10, 20, or 30 ms)"} |
| 62 | + ErrVadInvalidMode = &VadError{Code: -4, Message: "Invalid mode"} |
| 63 | + ErrVadUninitialized = &VadError{Code: -5, Message: "VAD instance is uninitialized or already closed"} |
| 64 | + ErrVadProcessError = &VadError{Code: -6, Message: "Error during processing"} |
| 65 | + ErrVadInvalidParameter = &VadError{Code: -7, Message: "Invalid parameter for set operations"} |
| 66 | + ErrVadInternalError = &VadError{Code: -100, Message: "Unknown internal error during processing"} |
| 67 | +) |
| 68 | + |
| 69 | +func mapErrorCodeToError(code C.int) error { |
| 70 | + switch int(code) { |
| 71 | + case 0: // Success for some operations or non-error state for process |
| 72 | + return nil |
| 73 | + case 1: // Speech detected (not an error for process) |
| 74 | + return nil |
| 75 | + case -1: |
| 76 | + return ErrVadInitFailed |
| 77 | + case -2: |
| 78 | + return ErrVadInvalidSampleRate |
| 79 | + case -3: |
| 80 | + return ErrVadInvalidFrameLength |
| 81 | + case -4: |
| 82 | + return ErrVadInvalidMode |
| 83 | + case -5: |
| 84 | + return ErrVadUninitialized // Or a more specific error if available from C context |
| 85 | + case -6: |
| 86 | + return ErrVadProcessError |
| 87 | + case -7: |
| 88 | + return ErrVadInvalidParameter |
| 89 | + default: |
| 90 | + if code < 0 { |
| 91 | + return &VadError{Code: int(code), Message: fmt.Sprintf("Unknown C VAD error code: %d", code)} |
| 92 | + } |
| 93 | + return nil // Non-negative codes (like 0 or 1 from process) are not errors |
| 94 | + } |
| 95 | +} |
| 96 | + |
| 97 | +// Vad represents a Voice Activity Detection instance. |
| 98 | +type Vad struct { |
| 99 | + instance C.ten_vad_handle_t |
| 100 | + hopSize int // Number of samples per frame, consistent with ten_vad_create hop_size |
| 101 | +} |
| 102 | + |
| 103 | +// NewVad creates and initializes a new VAD instance. |
| 104 | +// hopSize: The number of samples between the start points of two consecutive analysis frames (e.g., 256). |
| 105 | +// threshold: VAD detection threshold ranging from [0.0, 1.0]. |
| 106 | +func NewVad(hopSize int, threshold float32) (*Vad, error) { |
| 107 | + var inst C.ten_vad_handle_t |
| 108 | + |
| 109 | + cHopSize := C.size_t(hopSize) |
| 110 | + cThreshold := C.float(threshold) |
| 111 | + |
| 112 | + if !(threshold >= 0.0 && threshold <= 1.0) { |
| 113 | + return nil, ErrVadInvalidParameter // Or a more specific error for threshold |
| 114 | + } |
| 115 | + // Basic validation for hopSize, e.g., must be positive |
| 116 | + if hopSize <= 0 { |
| 117 | + return nil, ErrVadInvalidParameter // Or a specific error for hopSize |
| 118 | + } |
| 119 | + |
| 120 | + ret := C.ten_vad_create(&inst, cHopSize, cThreshold) |
| 121 | + if ret != 0 || inst == nil { |
| 122 | + return nil, ErrVadInitFailed |
| 123 | + } |
| 124 | + |
| 125 | + v := &Vad{ |
| 126 | + instance: inst, |
| 127 | + hopSize: hopSize, |
| 128 | + } |
| 129 | + |
| 130 | + runtime.SetFinalizer(v, func(vad *Vad) { |
| 131 | + if vad.instance != nil { |
| 132 | + C.ten_vad_destroy(&vad.instance) |
| 133 | + vad.instance = nil |
| 134 | + } |
| 135 | + }) |
| 136 | + return v, nil |
| 137 | +} |
| 138 | + |
| 139 | +// Close explicitly releases the C VAD instance and its associated resources. |
| 140 | +// It's good practice to call Close when done with the VAD instance, |
| 141 | +// rather than relying solely on the garbage collector. |
| 142 | +func (v *Vad) Close() error { |
| 143 | + if v.instance == nil { |
| 144 | + return ErrVadUninitialized |
| 145 | + } |
| 146 | + C.ten_vad_destroy(&v.instance) |
| 147 | + v.instance = nil |
| 148 | + runtime.SetFinalizer(v, nil) // Remove the finalizer |
| 149 | + return nil |
| 150 | +} |
| 151 | + |
| 152 | +// Process processes a single audio frame to determine if it contains speech. |
| 153 | +// speechFrame: A slice of int16 PCM audio samples. |
| 154 | +// The length of speechFrame should be equal to the hopSize used during initialization. |
| 155 | +// Returns probability of speech, true if speech is detected, false otherwise, and an error if one occurred. |
| 156 | +func (v *Vad) Process(speechFrame []int16) (float32, bool, error) { |
| 157 | + if v.instance == nil { |
| 158 | + return 0.0, false, ErrVadUninitialized |
| 159 | + } |
| 160 | + if len(speechFrame) != v.hopSize { |
| 161 | + return 0.0, false, fmt.Errorf("ten_vad: input audio frame length %d does not match expected hop_size %d", len(speechFrame), v.hopSize) |
| 162 | + } |
| 163 | + |
| 164 | + cSpeechFramePtr := (*C.short)(unsafe.Pointer(&speechFrame[0])) |
| 165 | + cAudioDataLength := C.size_t(v.hopSize) // This is the hop_size |
| 166 | + |
| 167 | + var cOutProbability C.float |
| 168 | + var cOutFlag C.int |
| 169 | + |
| 170 | + result := C.ten_vad_process(v.instance, cSpeechFramePtr, cAudioDataLength, &cOutProbability, &cOutFlag) |
| 171 | + |
| 172 | + if result != 0 { // ten_vad_process returns 0 on success, -1 on error |
| 173 | + return 0.0, false, mapErrorCodeToError(result) // Ensure mapErrorCodeToError handles -1 appropriately for process error |
| 174 | + } |
| 175 | + |
| 176 | + return float32(cOutProbability), cOutFlag == 1, nil |
| 177 | +} |
| 178 | + |
| 179 | +// FrameSize returns the expected number of int16 samples per frame (i.e., hop_size). |
| 180 | +func (v *Vad) FrameSize() int { |
| 181 | + return v.hopSize |
| 182 | +} |
0 commit comments