|
| 1 | +// |
| 2 | +// Copyright © 2025 Agora |
| 3 | +// This file is part of TEN Framework, an open source project. |
| 4 | +// Licensed under the Apache License, Version 2.0, with certain conditions. |
| 5 | +// Refer to the "LICENSE" file in the root directory for more information. |
| 6 | +// |
| 7 | + |
| 8 | + |
| 9 | +// import com.ten.vad.TenVad; // Uncomment when using package structure |
| 10 | +import javax.sound.sampled.*; |
| 11 | +import java.io.*; |
| 12 | +import java.nio.ByteBuffer; |
| 13 | +import java.nio.ByteOrder; |
| 14 | + |
| 15 | +/** |
| 16 | + * Example usage of TEN VAD Java interface. |
| 17 | + * |
| 18 | + * This example demonstrates how to use the TEN VAD library in Java |
| 19 | + * for real-time voice activity detection. |
| 20 | + * |
| 21 | + * Usage: java TestTenVad <input_wav_file> <output_txt_file> |
| 22 | + * |
| 23 | + * @author TEN Framework Team |
| 24 | + * @version 1.0 |
| 25 | + */ |
| 26 | +public class TestTenVad { |
| 27 | + |
| 28 | + private static final int HOP_SIZE = 256; // 16 ms per frame at 16kHz |
| 29 | + private static final float THRESHOLD = 0.5f; |
| 30 | + |
| 31 | + public static void main(String[] args) { |
| 32 | + if (args.length != 2) { |
| 33 | + System.err.println("Usage: java TestTenVad <input_wav_file> <output_txt_file>"); |
| 34 | + System.exit(1); |
| 35 | + } |
| 36 | + |
| 37 | + String inputFile = args[0]; |
| 38 | + String outputFile = args[1]; |
| 39 | + |
| 40 | + try { |
| 41 | + |
| 42 | + // Create VAD instance |
| 43 | + TenVad vad = new TenVad(HOP_SIZE, THRESHOLD); |
| 44 | + System.out.println("TEN VAD initialized with hop_size=" + vad.getHopSize() + |
| 45 | + ", threshold=" + vad.getThreshold()); |
| 46 | + |
| 47 | + // Load and process audio file |
| 48 | + processAudioFile(vad, inputFile, outputFile); |
| 49 | + |
| 50 | + // Clean up |
| 51 | + vad.destroy(); |
| 52 | + System.out.println("Processing completed successfully!"); |
| 53 | + |
| 54 | + } catch (Exception e) { |
| 55 | + System.err.println("Error: " + e.getMessage()); |
| 56 | + e.printStackTrace(); |
| 57 | + System.exit(1); |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + /** |
| 62 | + * Process audio file and write VAD results to output file. |
| 63 | + */ |
| 64 | + private static void processAudioFile(TenVad vad, String inputFile, String outputFile) |
| 65 | + throws IOException, UnsupportedAudioFileException { |
| 66 | + |
| 67 | + // Load WAV file |
| 68 | + AudioInputStream audioStream = AudioSystem.getAudioInputStream(new File(inputFile)); |
| 69 | + AudioFormat format = audioStream.getFormat(); |
| 70 | + |
| 71 | + // Verify audio format |
| 72 | + if (format.getSampleRate() != 16000) { |
| 73 | + throw new IllegalArgumentException("Audio sample rate must be 16kHz, got: " + |
| 74 | + format.getSampleRate()); |
| 75 | + } |
| 76 | + |
| 77 | + if (format.getSampleSizeInBits() != 16) { |
| 78 | + throw new IllegalArgumentException("Audio sample size must be 16-bit, got: " + |
| 79 | + format.getSampleSizeInBits()); |
| 80 | + } |
| 81 | + |
| 82 | + if (format.getChannels() != 1) { |
| 83 | + throw new IllegalArgumentException("Audio must be mono, got: " + |
| 84 | + format.getChannels() + " channels"); |
| 85 | + } |
| 86 | + |
| 87 | + System.out.println("Audio format: " + format); |
| 88 | + |
| 89 | + // Read audio data |
| 90 | + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
| 91 | + byte[] data = new byte[4096]; |
| 92 | + int bytesRead; |
| 93 | + |
| 94 | + while ((bytesRead = audioStream.read(data)) != -1) { |
| 95 | + buffer.write(data, 0, bytesRead); |
| 96 | + } |
| 97 | + |
| 98 | + audioStream.close(); |
| 99 | + byte[] audioBytes = buffer.toByteArray(); |
| 100 | + |
| 101 | + // Convert to short array |
| 102 | + short[] audioSamples = bytesToShorts(audioBytes, format.isBigEndian()); |
| 103 | + System.out.println("Loaded " + audioSamples.length + " audio samples"); |
| 104 | + |
| 105 | + // Process audio in frames |
| 106 | + int numFrames = audioSamples.length / HOP_SIZE; |
| 107 | + System.out.println("Processing " + numFrames + " frames..."); |
| 108 | + |
| 109 | + try (PrintWriter writer = new PrintWriter(new FileWriter(outputFile))) { |
| 110 | + for (int i = 0; i < numFrames; i++) { |
| 111 | + // Extract frame |
| 112 | + short[] frame = new short[HOP_SIZE]; |
| 113 | + System.arraycopy(audioSamples, i * HOP_SIZE, frame, 0, HOP_SIZE); |
| 114 | + |
| 115 | + // Process frame |
| 116 | + TenVad.VadResult result = vad.process(frame); |
| 117 | + |
| 118 | + // Write result |
| 119 | + String line = String.format("[%d] %.6f, %d", i, |
| 120 | + result.getProbability(), result.getFlag()); |
| 121 | + System.out.println(line); |
| 122 | + writer.println(line); |
| 123 | + } |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + /** |
| 128 | + * Convert byte array to short array. |
| 129 | + */ |
| 130 | + private static short[] bytesToShorts(byte[] bytes, boolean bigEndian) { |
| 131 | + short[] shorts = new short[bytes.length / 2]; |
| 132 | + ByteBuffer.wrap(bytes).order(bigEndian ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN) |
| 133 | + .asShortBuffer().get(shorts); |
| 134 | + return shorts; |
| 135 | + } |
| 136 | + |
| 137 | + /** |
| 138 | + * Example of real-time audio processing from microphone. |
| 139 | + */ |
| 140 | + public static void processMicrophoneInput() { |
| 141 | + try { |
| 142 | + TenVad vad = new TenVad(HOP_SIZE, THRESHOLD); |
| 143 | + |
| 144 | + // Set up audio capture |
| 145 | + AudioFormat format = new AudioFormat(16000, 16, 1, true, false); |
| 146 | + DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); |
| 147 | + |
| 148 | + if (!AudioSystem.isLineSupported(info)) { |
| 149 | + System.err.println("Microphone input not supported"); |
| 150 | + return; |
| 151 | + } |
| 152 | + |
| 153 | + TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info); |
| 154 | + line.open(format); |
| 155 | + line.start(); |
| 156 | + |
| 157 | + System.out.println("Recording from microphone... Press Ctrl+C to stop"); |
| 158 | + |
| 159 | + byte[] buffer = new byte[HOP_SIZE * 2]; // 16-bit samples |
| 160 | + short[] frame = new short[HOP_SIZE]; |
| 161 | + |
| 162 | + while (true) { |
| 163 | + int bytesRead = line.read(buffer, 0, buffer.length); |
| 164 | + if (bytesRead == buffer.length) { |
| 165 | + // Convert to short array |
| 166 | + ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN) |
| 167 | + .asShortBuffer().get(frame); |
| 168 | + |
| 169 | + // Process frame |
| 170 | + TenVad.VadResult result = vad.process(frame); |
| 171 | + |
| 172 | + // Print result |
| 173 | + System.out.printf("VAD: %.3f, %s%n", |
| 174 | + result.getProbability(), |
| 175 | + result.isVoiceDetected() ? "VOICE" : "SILENCE"); |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + } catch (Exception e) { |
| 180 | + System.err.println("Error in microphone processing: " + e.getMessage()); |
| 181 | + e.printStackTrace(); |
| 182 | + } |
| 183 | + } |
| 184 | +} |
0 commit comments