Files
keyboard/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m
2026-01-22 13:47:34 +08:00

337 lines
9.7 KiB
Objective-C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// AudioCaptureManager.m
// keyBoard
//
// Created by Mac on 2026/1/15.
//
#import "AudioCaptureManager.h"
#import <AVFoundation/AVFoundation.h>
// 音频采集参数常量
const double kAudioSampleRate = 16000.0;
const int kAudioChannels = 1;
const NSUInteger kAudioFrameDuration = 20; // ms
const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02
const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16)
static const float kAudioSoftwareGain = 2.5f;
@interface AudioCaptureManager ()
@property(nonatomic, strong) AVAudioEngine *audioEngine;
@property(nonatomic, strong) dispatch_queue_t audioQueue;
@property(nonatomic, assign) BOOL capturing;
// Ring buffer for accumulating samples to form 20ms frames
@property(nonatomic, strong) NSMutableData *ringBuffer;
@property(nonatomic, assign) NSUInteger ringBufferWriteIndex;
@property(nonatomic, assign) NSTimeInterval lastStatsLogTime;
@end
@implementation AudioCaptureManager
- (instancetype)init {
self = [super init];
if (self) {
_audioEngine = [[AVAudioEngine alloc] init];
_audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture",
DISPATCH_QUEUE_SERIAL);
_ringBuffer = [[NSMutableData alloc]
initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames
_ringBufferWriteIndex = 0;
_capturing = NO;
_lastStatsLogTime = 0;
}
return self;
}
- (void)dealloc {
[self stopCapture];
}
#pragma mark - Public Methods
- (BOOL)startCapture:(NSError **)error {
if (self.capturing) {
return YES;
}
AVAudioInputNode *inputNode = self.audioEngine.inputNode;
// 获取输入格式
AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0];
// 目标格式16kHz, Mono, Int16
AVAudioFormat *targetFormat =
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
sampleRate:kAudioSampleRate
channels:kAudioChannels
interleaved:YES];
// 创建格式转换器
AVAudioConverter *converter =
[[AVAudioConverter alloc] initFromFormat:inputFormat
toFormat:targetFormat];
if (!converter) {
if (error) {
*error = [NSError errorWithDomain:@"AudioCaptureManager"
code:-1
userInfo:@{
NSLocalizedDescriptionKey :
@"Failed to create audio converter"
}];
}
return NO;
}
// 计算合适的 buffer size约 20ms 的输入采样数)
AVAudioFrameCount bufferSize =
(AVAudioFrameCount)(inputFormat.sampleRate * 0.02);
// 安装 tap
__weak typeof(self) weakSelf = self;
[inputNode installTapOnBus:0
bufferSize:bufferSize
format:inputFormat
block:^(AVAudioPCMBuffer *_Nonnull buffer,
AVAudioTime *_Nonnull when) {
[weakSelf processAudioBuffer:buffer
withConverter:converter
targetFormat:targetFormat];
}];
// 启动引擎
NSError *startError = nil;
[self.audioEngine prepare];
if (![self.audioEngine startAndReturnError:&startError]) {
[inputNode removeTapOnBus:0];
if (error) {
*error = startError;
}
NSLog(@"[AudioCaptureManager] Failed to start engine: %@",
startError.localizedDescription);
return NO;
}
self.capturing = YES;
self.ringBufferWriteIndex = 0;
NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz",
inputFormat.sampleRate);
return YES;
}
- (void)stopCapture {
if (!self.capturing) {
return;
}
[self.audioEngine.inputNode removeTapOnBus:0];
[self.audioEngine stop];
self.capturing = NO;
self.ringBufferWriteIndex = 0;
NSLog(@"[AudioCaptureManager] Stopped capturing");
}
#pragma mark - Audio Processing
- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer
withConverter:(AVAudioConverter *)converter
targetFormat:(AVAudioFormat *)targetFormat {
if (!self.capturing) {
return;
}
// 计算输出帧数
AVAudioFrameCount outputFrameCapacity =
(AVAudioFrameCount)(buffer.frameLength *
(kAudioSampleRate / buffer.format.sampleRate)) +
1;
// 创建输出 buffer
AVAudioPCMBuffer *outputBuffer =
[[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat
frameCapacity:outputFrameCapacity];
// 格式转换
NSError *conversionError = nil;
AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
AVAudioPacketCount inNumberOfPackets,
AVAudioConverterInputStatus *_Nonnull outStatus) {
*outStatus = AVAudioConverterInputStatus_HaveData;
return buffer;
};
AVAudioConverterOutputStatus status =
[converter convertToBuffer:outputBuffer
error:&conversionError
withInputFromBlock:inputBlock];
if (status == AVAudioConverterOutputStatus_Error) {
NSLog(@"[AudioCaptureManager] Conversion error: %@",
conversionError.localizedDescription);
return;
}
// 获取 Int16 数据
if (!outputBuffer.int16ChannelData) {
NSLog(@"[AudioCaptureManager] Int16 channel data is null");
return;
}
int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
NSUInteger sampleCount = outputBuffer.frameLength;
NSUInteger byteCount = sampleCount * sizeof(int16_t);
[self applySoftwareGainIfNeeded:samples sampleCount:sampleCount];
// 计算 RMS
[self calculateAndReportRMS:samples sampleCount:sampleCount];
[self logAudioStatsIfNeeded:samples sampleCount:sampleCount];
if (byteCount == 0) {
return;
}
NSData *pcmData = [NSData dataWithBytes:samples length:byteCount];
// 将数据添加到 ring buffer 并输出完整帧
dispatch_async(self.audioQueue, ^{
[self appendToRingBuffer:(const uint8_t *)pcmData.bytes
byteCount:pcmData.length];
});
}
- (void)appendToRingBuffer:(const uint8_t *)bytes byteCount:(NSUInteger)byteCount {
// 将新数据追加到 ring buffer
uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
NSUInteger ringBufferLength = self.ringBuffer.length;
NSUInteger bytesToCopy = byteCount;
NSUInteger sourceOffset = 0;
while (bytesToCopy > 0) {
NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex;
NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
memcpy(ringBufferBytes + self.ringBufferWriteIndex,
bytes + sourceOffset, copySize);
self.ringBufferWriteIndex += copySize;
sourceOffset += copySize;
bytesToCopy -= copySize;
// 检查是否有完整的 20ms 帧
while (self.ringBufferWriteIndex >= kAudioFrameBytes) {
// 提取一个完整帧
NSData *frame = [NSData dataWithBytes:ringBufferBytes
length:kAudioFrameBytes];
// 移动剩余数据到开头
NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes;
if (remaining > 0) {
memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining);
}
self.ringBufferWriteIndex = remaining;
// 回调输出帧
[self outputPCMFrame:frame];
}
// 如果 ring buffer 已满,从头开始
if (self.ringBufferWriteIndex >= ringBufferLength) {
self.ringBufferWriteIndex = 0;
}
}
}
- (void)outputPCMFrame:(NSData *)frame {
if (!self.capturing) {
return;
}
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(audioCaptureManagerDidOutputPCMFrame:)]) {
[self.delegate audioCaptureManagerDidOutputPCMFrame:frame];
}
});
}
- (void)calculateAndReportRMS:(int16_t *)samples
sampleCount:(NSUInteger)sampleCount {
if (sampleCount == 0)
return;
// 计算 RMS
double sum = 0.0;
for (NSUInteger i = 0; i < sampleCount; i++) {
double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0
sum += sample * sample;
}
double rms = sqrt(sum / sampleCount);
float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) {
[self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat];
}
});
}
- (void)applySoftwareGainIfNeeded:(int16_t *)samples
sampleCount:(NSUInteger)sampleCount {
if (kAudioSoftwareGain <= 1.0f || sampleCount == 0) {
return;
}
for (NSUInteger i = 0; i < sampleCount; i++) {
float scaled = (float)samples[i] * kAudioSoftwareGain;
if (scaled > 32767.0f) {
samples[i] = 32767;
} else if (scaled < -32768.0f) {
samples[i] = -32768;
} else {
samples[i] = (int16_t)scaled;
}
}
}
- (void)logAudioStatsIfNeeded:(int16_t *)samples
sampleCount:(NSUInteger)sampleCount {
NSTimeInterval now = [[NSDate date] timeIntervalSince1970];
if (now - self.lastStatsLogTime < 1.0) {
return;
}
self.lastStatsLogTime = now;
if (sampleCount == 0) {
return;
}
NSUInteger nonZeroCount = 0;
int16_t peak = 0;
for (NSUInteger i = 0; i < sampleCount; i++) {
int16_t value = samples[i];
if (value != 0) {
nonZeroCount++;
}
int16_t absValue = (int16_t)abs(value);
if (absValue > peak) {
peak = absValue;
}
}
double nonZeroRatio = (double)nonZeroCount / (double)sampleCount;
double peakNormalized = (double)peak / 32768.0;
NSLog(@"[AudioCaptureManager] Stats: peak=%.3f nonZero=%.2f%%",
peakNormalized, nonZeroRatio * 100.0);
}
@end