270 lines
8.0 KiB
Objective-C
270 lines
8.0 KiB
Objective-C
//
|
||
// AudioCaptureManager.m
|
||
// keyBoard
|
||
//
|
||
// Created by Mac on 2026/1/15.
|
||
//
|
||
|
||
#import "AudioCaptureManager.h"
|
||
#import <AVFoundation/AVFoundation.h>
|
||
|
||
// 音频采集参数常量
|
||
const double kAudioSampleRate = 16000.0;
|
||
const int kAudioChannels = 1;
|
||
const NSUInteger kAudioFrameDuration = 20; // ms
|
||
const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02
|
||
const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16)
|
||
|
||
@interface AudioCaptureManager ()
|
||
|
||
@property(nonatomic, strong) AVAudioEngine *audioEngine;
|
||
@property(nonatomic, strong) dispatch_queue_t audioQueue;
|
||
@property(nonatomic, assign) BOOL capturing;
|
||
|
||
// Ring buffer for accumulating samples to form 20ms frames
|
||
@property(nonatomic, strong) NSMutableData *ringBuffer;
|
||
@property(nonatomic, assign) NSUInteger ringBufferWriteIndex;
|
||
|
||
@end
|
||
|
||
@implementation AudioCaptureManager
|
||
|
||
- (instancetype)init {
|
||
self = [super init];
|
||
if (self) {
|
||
_audioEngine = [[AVAudioEngine alloc] init];
|
||
_audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture",
|
||
DISPATCH_QUEUE_SERIAL);
|
||
_ringBuffer = [[NSMutableData alloc]
|
||
initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames
|
||
_ringBufferWriteIndex = 0;
|
||
_capturing = NO;
|
||
}
|
||
return self;
|
||
}
|
||
|
||
- (void)dealloc {
|
||
[self stopCapture];
|
||
}
|
||
|
||
#pragma mark - Public Methods
|
||
|
||
- (BOOL)startCapture:(NSError **)error {
|
||
if (self.capturing) {
|
||
return YES;
|
||
}
|
||
|
||
AVAudioInputNode *inputNode = self.audioEngine.inputNode;
|
||
|
||
// 获取输入格式
|
||
AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0];
|
||
|
||
// 目标格式:16kHz, Mono, Int16
|
||
AVAudioFormat *targetFormat =
|
||
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
|
||
sampleRate:kAudioSampleRate
|
||
channels:kAudioChannels
|
||
interleaved:YES];
|
||
|
||
// 创建格式转换器
|
||
AVAudioConverter *converter =
|
||
[[AVAudioConverter alloc] initFromFormat:inputFormat
|
||
toFormat:targetFormat];
|
||
if (!converter) {
|
||
if (error) {
|
||
*error = [NSError errorWithDomain:@"AudioCaptureManager"
|
||
code:-1
|
||
userInfo:@{
|
||
NSLocalizedDescriptionKey :
|
||
@"Failed to create audio converter"
|
||
}];
|
||
}
|
||
return NO;
|
||
}
|
||
|
||
// 计算合适的 buffer size(约 20ms 的输入采样数)
|
||
AVAudioFrameCount bufferSize =
|
||
(AVAudioFrameCount)(inputFormat.sampleRate * 0.02);
|
||
|
||
// 安装 tap
|
||
__weak typeof(self) weakSelf = self;
|
||
[inputNode installTapOnBus:0
|
||
bufferSize:bufferSize
|
||
format:inputFormat
|
||
block:^(AVAudioPCMBuffer *_Nonnull buffer,
|
||
AVAudioTime *_Nonnull when) {
|
||
[weakSelf processAudioBuffer:buffer
|
||
withConverter:converter
|
||
targetFormat:targetFormat];
|
||
}];
|
||
|
||
// 启动引擎
|
||
NSError *startError = nil;
|
||
[self.audioEngine prepare];
|
||
|
||
if (![self.audioEngine startAndReturnError:&startError]) {
|
||
[inputNode removeTapOnBus:0];
|
||
if (error) {
|
||
*error = startError;
|
||
}
|
||
NSLog(@"[AudioCaptureManager] Failed to start engine: %@",
|
||
startError.localizedDescription);
|
||
return NO;
|
||
}
|
||
|
||
self.capturing = YES;
|
||
self.ringBufferWriteIndex = 0;
|
||
|
||
NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz",
|
||
inputFormat.sampleRate);
|
||
return YES;
|
||
}
|
||
|
||
- (void)stopCapture {
|
||
if (!self.capturing) {
|
||
return;
|
||
}
|
||
|
||
[self.audioEngine.inputNode removeTapOnBus:0];
|
||
[self.audioEngine stop];
|
||
|
||
self.capturing = NO;
|
||
self.ringBufferWriteIndex = 0;
|
||
|
||
NSLog(@"[AudioCaptureManager] Stopped capturing");
|
||
}
|
||
|
||
#pragma mark - Audio Processing
|
||
|
||
- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer
|
||
withConverter:(AVAudioConverter *)converter
|
||
targetFormat:(AVAudioFormat *)targetFormat {
|
||
|
||
if (!self.capturing) {
|
||
return;
|
||
}
|
||
|
||
// 计算输出帧数
|
||
AVAudioFrameCount outputFrameCapacity =
|
||
(AVAudioFrameCount)(buffer.frameLength *
|
||
(kAudioSampleRate / buffer.format.sampleRate)) +
|
||
1;
|
||
|
||
// 创建输出 buffer
|
||
AVAudioPCMBuffer *outputBuffer =
|
||
[[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat
|
||
frameCapacity:outputFrameCapacity];
|
||
|
||
// 格式转换
|
||
NSError *conversionError = nil;
|
||
AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
|
||
AVAudioPacketCount inNumberOfPackets,
|
||
AVAudioConverterInputStatus *_Nonnull outStatus) {
|
||
*outStatus = AVAudioConverterInputStatus_HaveData;
|
||
return buffer;
|
||
};
|
||
|
||
AVAudioConverterOutputStatus status =
|
||
[converter convertToBuffer:outputBuffer
|
||
error:&conversionError
|
||
withInputFromBlock:inputBlock];
|
||
|
||
if (status == AVAudioConverterOutputStatus_Error) {
|
||
NSLog(@"[AudioCaptureManager] Conversion error: %@",
|
||
conversionError.localizedDescription);
|
||
return;
|
||
}
|
||
|
||
// 获取 Int16 数据
|
||
int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
|
||
NSUInteger sampleCount = outputBuffer.frameLength;
|
||
NSUInteger byteCount = sampleCount * sizeof(int16_t);
|
||
|
||
// 计算 RMS
|
||
[self calculateAndReportRMS:samples sampleCount:sampleCount];
|
||
|
||
// 将数据添加到 ring buffer 并输出完整帧
|
||
dispatch_async(self.audioQueue, ^{
|
||
[self appendToRingBuffer:samples byteCount:byteCount];
|
||
});
|
||
}
|
||
|
||
- (void)appendToRingBuffer:(int16_t *)samples byteCount:(NSUInteger)byteCount {
|
||
// 将新数据追加到 ring buffer
|
||
uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
|
||
NSUInteger ringBufferLength = self.ringBuffer.length;
|
||
|
||
NSUInteger bytesToCopy = byteCount;
|
||
NSUInteger sourceOffset = 0;
|
||
|
||
while (bytesToCopy > 0) {
|
||
NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex;
|
||
NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
|
||
|
||
memcpy(ringBufferBytes + self.ringBufferWriteIndex,
|
||
(uint8_t *)samples + sourceOffset, copySize);
|
||
self.ringBufferWriteIndex += copySize;
|
||
sourceOffset += copySize;
|
||
bytesToCopy -= copySize;
|
||
|
||
// 检查是否有完整的 20ms 帧
|
||
while (self.ringBufferWriteIndex >= kAudioFrameBytes) {
|
||
// 提取一个完整帧
|
||
NSData *frame = [NSData dataWithBytes:ringBufferBytes
|
||
length:kAudioFrameBytes];
|
||
|
||
// 移动剩余数据到开头
|
||
NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes;
|
||
if (remaining > 0) {
|
||
memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining);
|
||
}
|
||
self.ringBufferWriteIndex = remaining;
|
||
|
||
// 回调输出帧
|
||
[self outputPCMFrame:frame];
|
||
}
|
||
|
||
// 如果 ring buffer 已满,从头开始
|
||
if (self.ringBufferWriteIndex >= ringBufferLength) {
|
||
self.ringBufferWriteIndex = 0;
|
||
}
|
||
}
|
||
}
|
||
|
||
- (void)outputPCMFrame:(NSData *)frame {
|
||
if (!self.capturing) {
|
||
return;
|
||
}
|
||
|
||
dispatch_async(dispatch_get_main_queue(), ^{
|
||
if ([self.delegate respondsToSelector:@selector
|
||
(audioCaptureManagerDidOutputPCMFrame:)]) {
|
||
[self.delegate audioCaptureManagerDidOutputPCMFrame:frame];
|
||
}
|
||
});
|
||
}
|
||
|
||
- (void)calculateAndReportRMS:(int16_t *)samples
|
||
sampleCount:(NSUInteger)sampleCount {
|
||
if (sampleCount == 0)
|
||
return;
|
||
|
||
// 计算 RMS
|
||
double sum = 0.0;
|
||
for (NSUInteger i = 0; i < sampleCount; i++) {
|
||
double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0
|
||
sum += sample * sample;
|
||
}
|
||
double rms = sqrt(sum / sampleCount);
|
||
float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0
|
||
|
||
dispatch_async(dispatch_get_main_queue(), ^{
|
||
if ([self.delegate
|
||
respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) {
|
||
[self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat];
|
||
}
|
||
});
|
||
}
|
||
|
||
@end
|