From 06a572c08aee611c770301ec2eece6eaf7b9b603 Mon Sep 17 00:00:00 2001 From: CodeST <694468528@qq.com> Date: Wed, 21 Jan 2026 17:59:12 +0800 Subject: [PATCH] 1 --- keyBoard/Class/AiTalk/VC/KBAiMainVC.m | 2 +- .../Class/AiTalk/VM/AudioCaptureManager.m | 55 +++++++++++++++++++ .../Class/AiTalk/VM/AudioSessionManager.m | 36 +++++++++++- .../AiTalk/VM/VoiceChatWebSocketClient.m | 2 + 4 files changed, 93 insertions(+), 2 deletions(-) diff --git a/keyBoard/Class/AiTalk/VC/KBAiMainVC.m b/keyBoard/Class/AiTalk/VC/KBAiMainVC.m index 71ddccf..cc3b9df 100644 --- a/keyBoard/Class/AiTalk/VC/KBAiMainVC.m +++ b/keyBoard/Class/AiTalk/VC/KBAiMainVC.m @@ -402,7 +402,7 @@ self.statusLabel.text = @"正在连接..."; self.recordButton.state = KBAiRecordButtonStateRecording; - [self.streamingManager startWithToken:token language:@"en" voiceId:nil]; + [self.streamingManager startWithToken:token language:@"en-US" voiceId:nil]; } - (void)recordButtonDidEndPress:(KBAiRecordButton *)button { diff --git a/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m index e0a37ed..64d8b5e 100644 --- a/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m +++ b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m @@ -14,6 +14,7 @@ const int kAudioChannels = 1; const NSUInteger kAudioFrameDuration = 20; // ms const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02 const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16) +static const float kAudioSoftwareGain = 2.5f; @interface AudioCaptureManager () @@ -24,6 +25,7 @@ const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16) // Ring buffer for accumulating samples to form 20ms frames @property(nonatomic, strong) NSMutableData *ringBuffer; @property(nonatomic, assign) NSUInteger ringBufferWriteIndex; +@property(nonatomic, assign) NSTimeInterval lastStatsLogTime; @end @@ -39,6 +41,7 @@ const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16) initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames _ringBufferWriteIndex = 0; _capturing = NO; + _lastStatsLogTime = 0; } return self; } @@ -180,8 +183,11 @@ const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16) NSUInteger sampleCount = outputBuffer.frameLength; NSUInteger byteCount = sampleCount * sizeof(int16_t); + [self applySoftwareGainIfNeeded:samples sampleCount:sampleCount]; + // 计算 RMS [self calculateAndReportRMS:samples sampleCount:sampleCount]; + [self logAudioStatsIfNeeded:samples sampleCount:sampleCount]; // 将数据添加到 ring buffer 并输出完整帧 dispatch_async(self.audioQueue, ^{ @@ -266,4 +272,53 @@ const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16) }); } +- (void)applySoftwareGainIfNeeded:(int16_t *)samples + sampleCount:(NSUInteger)sampleCount { + if (kAudioSoftwareGain <= 1.0f || sampleCount == 0) { + return; + } + + for (NSUInteger i = 0; i < sampleCount; i++) { + float scaled = (float)samples[i] * kAudioSoftwareGain; + if (scaled > 32767.0f) { + samples[i] = 32767; + } else if (scaled < -32768.0f) { + samples[i] = -32768; + } else { + samples[i] = (int16_t)scaled; + } + } +} + +- (void)logAudioStatsIfNeeded:(int16_t *)samples + sampleCount:(NSUInteger)sampleCount { + NSTimeInterval now = [[NSDate date] timeIntervalSince1970]; + if (now - self.lastStatsLogTime < 1.0) { + return; + } + self.lastStatsLogTime = now; + + if (sampleCount == 0) { + return; + } + + NSUInteger nonZeroCount = 0; + int16_t peak = 0; + for (NSUInteger i = 0; i < sampleCount; i++) { + int16_t value = samples[i]; + if (value != 0) { + nonZeroCount++; + } + int16_t absValue = (int16_t)abs(value); + if (absValue > peak) { + peak = absValue; + } + } + + double nonZeroRatio = (double)nonZeroCount / (double)sampleCount; + double peakNormalized = (double)peak / 32768.0; + NSLog(@"[AudioCaptureManager] Stats: peak=%.3f nonZero=%.2f%%", + peakNormalized, nonZeroRatio * 100.0); +} + @end diff --git a/keyBoard/Class/AiTalk/VM/AudioSessionManager.m b/keyBoard/Class/AiTalk/VM/AudioSessionManager.m index 6cdbe8b..84e9b1c 100644 --- a/keyBoard/Class/AiTalk/VM/AudioSessionManager.m +++ b/keyBoard/Class/AiTalk/VM/AudioSessionManager.m @@ -141,7 +141,7 @@ // 配置为录音+播放模式 // Category: PlayAndRecord - 同时支持录音和播放 - // Mode: VoiceChat - 优化语音通话场景 + // Mode: VoiceChat - 允许系统进行 AGC/降噪,提升输入音量 // Options: // - DefaultToSpeaker: 默认使用扬声器 // - AllowBluetooth: 允许蓝牙设备 @@ -162,6 +162,31 @@ return NO; } + // 优先使用后端要求的采样率和缓冲时长,减少重采样误差 + NSError *sampleRateError = nil; + [session setPreferredSampleRate:16000 error:&sampleRateError]; + if (sampleRateError) { + NSLog(@"[AudioSessionManager] Failed to set sample rate: %@", + sampleRateError.localizedDescription); + } + + NSError *bufferError = nil; + [session setPreferredIOBufferDuration:0.02 error:&bufferError]; + if (bufferError) { + NSLog(@"[AudioSessionManager] Failed to set IO buffer: %@", + bufferError.localizedDescription); + } + + if ([session respondsToSelector:@selector(setPreferredInputNumberOfChannels: + error:)]) { + NSError *channelsError = nil; + [session setPreferredInputNumberOfChannels:1 error:&channelsError]; + if (channelsError) { + NSLog(@"[AudioSessionManager] Failed to set input channels: %@", + channelsError.localizedDescription); + } + } + return YES; } @@ -206,6 +231,15 @@ return NO; } + if (session.isInputGainSettable) { + NSError *gainError = nil; + [session setInputGain:1.0 error:&gainError]; + if (gainError) { + NSLog(@"[AudioSessionManager] Failed to set input gain: %@", + gainError.localizedDescription); + } + } + self.isSessionActive = YES; return YES; } diff --git a/keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m b/keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m index 36c2a2a..dcb194a 100644 --- a/keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m +++ b/keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m @@ -96,6 +96,8 @@ static NSString *const kVoiceChatWebSocketClientErrorDomain = message[@"config"] = config; } + NSLog(@"[VoiceChatWebSocketClient] Sending session_start: %@", + message); [self sendJSON:message]; }