添加语音websocket等,还没测试
This commit is contained in:
269
keyBoard/Class/AiTalk/VM/AudioCaptureManager.m
Normal file
269
keyBoard/Class/AiTalk/VM/AudioCaptureManager.m
Normal file
@@ -0,0 +1,269 @@
|
||||
//
|
||||
// AudioCaptureManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "AudioCaptureManager.h"
|
||||
#import <AVFoundation/AVFoundation.h>
|
||||
|
||||
// 音频采集参数常量
|
||||
const double kAudioSampleRate = 16000.0;
|
||||
const int kAudioChannels = 1;
|
||||
const NSUInteger kAudioFrameDuration = 20; // ms
|
||||
const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02
|
||||
const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16)
|
||||
|
||||
@interface AudioCaptureManager ()
|
||||
|
||||
@property(nonatomic, strong) AVAudioEngine *audioEngine;
|
||||
@property(nonatomic, strong) dispatch_queue_t audioQueue;
|
||||
@property(nonatomic, assign) BOOL capturing;
|
||||
|
||||
// Ring buffer for accumulating samples to form 20ms frames
|
||||
@property(nonatomic, strong) NSMutableData *ringBuffer;
|
||||
@property(nonatomic, assign) NSUInteger ringBufferWriteIndex;
|
||||
|
||||
@end
|
||||
|
||||
@implementation AudioCaptureManager
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_audioEngine = [[AVAudioEngine alloc] init];
|
||||
_audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_ringBuffer = [[NSMutableData alloc]
|
||||
initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames
|
||||
_ringBufferWriteIndex = 0;
|
||||
_capturing = NO;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self stopCapture];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (BOOL)startCapture:(NSError **)error {
|
||||
if (self.capturing) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
AVAudioInputNode *inputNode = self.audioEngine.inputNode;
|
||||
|
||||
// 获取输入格式
|
||||
AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0];
|
||||
|
||||
// 目标格式:16kHz, Mono, Int16
|
||||
AVAudioFormat *targetFormat =
|
||||
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
|
||||
sampleRate:kAudioSampleRate
|
||||
channels:kAudioChannels
|
||||
interleaved:YES];
|
||||
|
||||
// 创建格式转换器
|
||||
AVAudioConverter *converter =
|
||||
[[AVAudioConverter alloc] initFromFormat:inputFormat
|
||||
toFormat:targetFormat];
|
||||
if (!converter) {
|
||||
if (error) {
|
||||
*error = [NSError errorWithDomain:@"AudioCaptureManager"
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey :
|
||||
@"Failed to create audio converter"
|
||||
}];
|
||||
}
|
||||
return NO;
|
||||
}
|
||||
|
||||
// 计算合适的 buffer size(约 20ms 的输入采样数)
|
||||
AVAudioFrameCount bufferSize =
|
||||
(AVAudioFrameCount)(inputFormat.sampleRate * 0.02);
|
||||
|
||||
// 安装 tap
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[inputNode installTapOnBus:0
|
||||
bufferSize:bufferSize
|
||||
format:inputFormat
|
||||
block:^(AVAudioPCMBuffer *_Nonnull buffer,
|
||||
AVAudioTime *_Nonnull when) {
|
||||
[weakSelf processAudioBuffer:buffer
|
||||
withConverter:converter
|
||||
targetFormat:targetFormat];
|
||||
}];
|
||||
|
||||
// 启动引擎
|
||||
NSError *startError = nil;
|
||||
[self.audioEngine prepare];
|
||||
|
||||
if (![self.audioEngine startAndReturnError:&startError]) {
|
||||
[inputNode removeTapOnBus:0];
|
||||
if (error) {
|
||||
*error = startError;
|
||||
}
|
||||
NSLog(@"[AudioCaptureManager] Failed to start engine: %@",
|
||||
startError.localizedDescription);
|
||||
return NO;
|
||||
}
|
||||
|
||||
self.capturing = YES;
|
||||
self.ringBufferWriteIndex = 0;
|
||||
|
||||
NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz",
|
||||
inputFormat.sampleRate);
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (void)stopCapture {
|
||||
if (!self.capturing) {
|
||||
return;
|
||||
}
|
||||
|
||||
[self.audioEngine.inputNode removeTapOnBus:0];
|
||||
[self.audioEngine stop];
|
||||
|
||||
self.capturing = NO;
|
||||
self.ringBufferWriteIndex = 0;
|
||||
|
||||
NSLog(@"[AudioCaptureManager] Stopped capturing");
|
||||
}
|
||||
|
||||
#pragma mark - Audio Processing
|
||||
|
||||
- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer
|
||||
withConverter:(AVAudioConverter *)converter
|
||||
targetFormat:(AVAudioFormat *)targetFormat {
|
||||
|
||||
if (!self.capturing) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 计算输出帧数
|
||||
AVAudioFrameCount outputFrameCapacity =
|
||||
(AVAudioFrameCount)(buffer.frameLength *
|
||||
(kAudioSampleRate / buffer.format.sampleRate)) +
|
||||
1;
|
||||
|
||||
// 创建输出 buffer
|
||||
AVAudioPCMBuffer *outputBuffer =
|
||||
[[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat
|
||||
frameCapacity:outputFrameCapacity];
|
||||
|
||||
// 格式转换
|
||||
NSError *conversionError = nil;
|
||||
AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
|
||||
AVAudioPacketCount inNumberOfPackets,
|
||||
AVAudioConverterInputStatus *_Nonnull outStatus) {
|
||||
*outStatus = AVAudioConverterInputStatus_HaveData;
|
||||
return buffer;
|
||||
};
|
||||
|
||||
AVAudioConverterOutputStatus status =
|
||||
[converter convertToBuffer:outputBuffer
|
||||
error:&conversionError
|
||||
withInputFromBlock:inputBlock];
|
||||
|
||||
if (status == AVAudioConverterOutputStatus_Error) {
|
||||
NSLog(@"[AudioCaptureManager] Conversion error: %@",
|
||||
conversionError.localizedDescription);
|
||||
return;
|
||||
}
|
||||
|
||||
// 获取 Int16 数据
|
||||
int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
|
||||
NSUInteger sampleCount = outputBuffer.frameLength;
|
||||
NSUInteger byteCount = sampleCount * sizeof(int16_t);
|
||||
|
||||
// 计算 RMS
|
||||
[self calculateAndReportRMS:samples sampleCount:sampleCount];
|
||||
|
||||
// 将数据添加到 ring buffer 并输出完整帧
|
||||
dispatch_async(self.audioQueue, ^{
|
||||
[self appendToRingBuffer:samples byteCount:byteCount];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)appendToRingBuffer:(int16_t *)samples byteCount:(NSUInteger)byteCount {
|
||||
// 将新数据追加到 ring buffer
|
||||
uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
|
||||
NSUInteger ringBufferLength = self.ringBuffer.length;
|
||||
|
||||
NSUInteger bytesToCopy = byteCount;
|
||||
NSUInteger sourceOffset = 0;
|
||||
|
||||
while (bytesToCopy > 0) {
|
||||
NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex;
|
||||
NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
|
||||
|
||||
memcpy(ringBufferBytes + self.ringBufferWriteIndex,
|
||||
(uint8_t *)samples + sourceOffset, copySize);
|
||||
self.ringBufferWriteIndex += copySize;
|
||||
sourceOffset += copySize;
|
||||
bytesToCopy -= copySize;
|
||||
|
||||
// 检查是否有完整的 20ms 帧
|
||||
while (self.ringBufferWriteIndex >= kAudioFrameBytes) {
|
||||
// 提取一个完整帧
|
||||
NSData *frame = [NSData dataWithBytes:ringBufferBytes
|
||||
length:kAudioFrameBytes];
|
||||
|
||||
// 移动剩余数据到开头
|
||||
NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes;
|
||||
if (remaining > 0) {
|
||||
memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining);
|
||||
}
|
||||
self.ringBufferWriteIndex = remaining;
|
||||
|
||||
// 回调输出帧
|
||||
[self outputPCMFrame:frame];
|
||||
}
|
||||
|
||||
// 如果 ring buffer 已满,从头开始
|
||||
if (self.ringBufferWriteIndex >= ringBufferLength) {
|
||||
self.ringBufferWriteIndex = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- (void)outputPCMFrame:(NSData *)frame {
|
||||
if (!self.capturing) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(audioCaptureManagerDidOutputPCMFrame:)]) {
|
||||
[self.delegate audioCaptureManagerDidOutputPCMFrame:frame];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)calculateAndReportRMS:(int16_t *)samples
|
||||
sampleCount:(NSUInteger)sampleCount {
|
||||
if (sampleCount == 0)
|
||||
return;
|
||||
|
||||
// 计算 RMS
|
||||
double sum = 0.0;
|
||||
for (NSUInteger i = 0; i < sampleCount; i++) {
|
||||
double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0
|
||||
sum += sample * sample;
|
||||
}
|
||||
double rms = sqrt(sum / sampleCount);
|
||||
float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) {
|
||||
[self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user