// // AudioCaptureManager.m // keyBoard // // Created by Mac on 2026/1/15. // #import "AudioCaptureManager.h" #import // 音频采集参数常量 const double kAudioSampleRate = 16000.0; const int kAudioChannels = 1; const NSUInteger kAudioFrameDuration = 20; // ms const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02 const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16) static const float kAudioSoftwareGain = 2.5f; @interface AudioCaptureManager () @property(nonatomic, strong) AVAudioEngine *audioEngine; @property(nonatomic, strong) dispatch_queue_t audioQueue; @property(nonatomic, assign) BOOL capturing; // Ring buffer for accumulating samples to form 20ms frames @property(nonatomic, strong) NSMutableData *ringBuffer; @property(nonatomic, assign) NSUInteger ringBufferWriteIndex; @property(nonatomic, assign) NSTimeInterval lastStatsLogTime; @end @implementation AudioCaptureManager - (instancetype)init { self = [super init]; if (self) { _audioEngine = [[AVAudioEngine alloc] init]; _audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture", DISPATCH_QUEUE_SERIAL); _ringBuffer = [[NSMutableData alloc] initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames _ringBufferWriteIndex = 0; _capturing = NO; _lastStatsLogTime = 0; } return self; } - (void)dealloc { [self stopCapture]; } #pragma mark - Public Methods - (BOOL)startCapture:(NSError **)error { if (self.capturing) { return YES; } AVAudioInputNode *inputNode = self.audioEngine.inputNode; // 获取输入格式 AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0]; // 目标格式:16kHz, Mono, Int16 AVAudioFormat *targetFormat = [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16 sampleRate:kAudioSampleRate channels:kAudioChannels interleaved:YES]; // 创建格式转换器 AVAudioConverter *converter = [[AVAudioConverter alloc] initFromFormat:inputFormat toFormat:targetFormat]; if (!converter) { if (error) { *error = [NSError errorWithDomain:@"AudioCaptureManager" code:-1 userInfo:@{ NSLocalizedDescriptionKey : @"Failed to create audio converter" }]; } return NO; } // 计算合适的 buffer size(约 20ms 的输入采样数) AVAudioFrameCount bufferSize = (AVAudioFrameCount)(inputFormat.sampleRate * 0.02); // 安装 tap __weak typeof(self) weakSelf = self; [inputNode installTapOnBus:0 bufferSize:bufferSize format:inputFormat block:^(AVAudioPCMBuffer *_Nonnull buffer, AVAudioTime *_Nonnull when) { [weakSelf processAudioBuffer:buffer withConverter:converter targetFormat:targetFormat]; }]; // 启动引擎 NSError *startError = nil; [self.audioEngine prepare]; if (![self.audioEngine startAndReturnError:&startError]) { [inputNode removeTapOnBus:0]; if (error) { *error = startError; } NSLog(@"[AudioCaptureManager] Failed to start engine: %@", startError.localizedDescription); return NO; } self.capturing = YES; self.ringBufferWriteIndex = 0; NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz", inputFormat.sampleRate); return YES; } - (void)stopCapture { if (!self.capturing) { return; } [self.audioEngine.inputNode removeTapOnBus:0]; [self.audioEngine stop]; self.capturing = NO; self.ringBufferWriteIndex = 0; NSLog(@"[AudioCaptureManager] Stopped capturing"); } #pragma mark - Audio Processing - (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer withConverter:(AVAudioConverter *)converter targetFormat:(AVAudioFormat *)targetFormat { if (!self.capturing) { return; } // 计算输出帧数 AVAudioFrameCount outputFrameCapacity = (AVAudioFrameCount)(buffer.frameLength * (kAudioSampleRate / buffer.format.sampleRate)) + 1; // 创建输出 buffer AVAudioPCMBuffer *outputBuffer = [[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat frameCapacity:outputFrameCapacity]; // 格式转换 NSError *conversionError = nil; AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable( AVAudioPacketCount inNumberOfPackets, AVAudioConverterInputStatus *_Nonnull outStatus) { *outStatus = AVAudioConverterInputStatus_HaveData; return buffer; }; AVAudioConverterOutputStatus status = [converter convertToBuffer:outputBuffer error:&conversionError withInputFromBlock:inputBlock]; if (status == AVAudioConverterOutputStatus_Error) { NSLog(@"[AudioCaptureManager] Conversion error: %@", conversionError.localizedDescription); return; } // 获取 Int16 数据 if (!outputBuffer.int16ChannelData) { NSLog(@"[AudioCaptureManager] Int16 channel data is null"); return; } int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0]; NSUInteger sampleCount = outputBuffer.frameLength; NSUInteger byteCount = sampleCount * sizeof(int16_t); [self applySoftwareGainIfNeeded:samples sampleCount:sampleCount]; // 计算 RMS [self calculateAndReportRMS:samples sampleCount:sampleCount]; [self logAudioStatsIfNeeded:samples sampleCount:sampleCount]; if (byteCount == 0) { return; } NSData *pcmData = [NSData dataWithBytes:samples length:byteCount]; // 将数据添加到 ring buffer 并输出完整帧 dispatch_async(self.audioQueue, ^{ [self appendToRingBuffer:(const uint8_t *)pcmData.bytes byteCount:pcmData.length]; }); } - (void)appendToRingBuffer:(const uint8_t *)bytes byteCount:(NSUInteger)byteCount { // 将新数据追加到 ring buffer uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes; NSUInteger ringBufferLength = self.ringBuffer.length; NSUInteger bytesToCopy = byteCount; NSUInteger sourceOffset = 0; while (bytesToCopy > 0) { NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex; NSUInteger copySize = MIN(bytesToCopy, spaceAvailable); memcpy(ringBufferBytes + self.ringBufferWriteIndex, bytes + sourceOffset, copySize); self.ringBufferWriteIndex += copySize; sourceOffset += copySize; bytesToCopy -= copySize; // 检查是否有完整的 20ms 帧 while (self.ringBufferWriteIndex >= kAudioFrameBytes) { // 提取一个完整帧 NSData *frame = [NSData dataWithBytes:ringBufferBytes length:kAudioFrameBytes]; // 移动剩余数据到开头 NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes; if (remaining > 0) { memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining); } self.ringBufferWriteIndex = remaining; // 回调输出帧 [self outputPCMFrame:frame]; } // 如果 ring buffer 已满,从头开始 if (self.ringBufferWriteIndex >= ringBufferLength) { self.ringBufferWriteIndex = 0; } } } - (void)outputPCMFrame:(NSData *)frame { if (!self.capturing) { return; } dispatch_async(dispatch_get_main_queue(), ^{ if ([self.delegate respondsToSelector:@selector (audioCaptureManagerDidOutputPCMFrame:)]) { [self.delegate audioCaptureManagerDidOutputPCMFrame:frame]; } }); } - (void)calculateAndReportRMS:(int16_t *)samples sampleCount:(NSUInteger)sampleCount { if (sampleCount == 0) return; // 计算 RMS double sum = 0.0; for (NSUInteger i = 0; i < sampleCount; i++) { double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0 sum += sample * sample; } double rms = sqrt(sum / sampleCount); float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0 dispatch_async(dispatch_get_main_queue(), ^{ if ([self.delegate respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) { [self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat]; } }); } - (void)applySoftwareGainIfNeeded:(int16_t *)samples sampleCount:(NSUInteger)sampleCount { if (kAudioSoftwareGain <= 1.0f || sampleCount == 0) { return; } for (NSUInteger i = 0; i < sampleCount; i++) { float scaled = (float)samples[i] * kAudioSoftwareGain; if (scaled > 32767.0f) { samples[i] = 32767; } else if (scaled < -32768.0f) { samples[i] = -32768; } else { samples[i] = (int16_t)scaled; } } } - (void)logAudioStatsIfNeeded:(int16_t *)samples sampleCount:(NSUInteger)sampleCount { NSTimeInterval now = [[NSDate date] timeIntervalSince1970]; if (now - self.lastStatsLogTime < 1.0) { return; } self.lastStatsLogTime = now; if (sampleCount == 0) { return; } NSUInteger nonZeroCount = 0; int16_t peak = 0; for (NSUInteger i = 0; i < sampleCount; i++) { int16_t value = samples[i]; if (value != 0) { nonZeroCount++; } int16_t absValue = (int16_t)abs(value); if (absValue > peak) { peak = absValue; } } double nonZeroRatio = (double)nonZeroCount / (double)sampleCount; double peakNormalized = (double)peak / 32768.0; NSLog(@"[AudioCaptureManager] Stats: peak=%.3f nonZero=%.2f%%", peakNormalized, nonZeroRatio * 100.0); } @end