1
This commit is contained in:
376
keyBoard/Class/AiTalk/VM/VoiceChatStreamingManager.m
Normal file
376
keyBoard/Class/AiTalk/VM/VoiceChatStreamingManager.m
Normal file
@@ -0,0 +1,376 @@
|
||||
//
|
||||
// VoiceChatStreamingManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "VoiceChatStreamingManager.h"
|
||||
#import "AudioCaptureManager.h"
|
||||
#import "AudioSessionManager.h"
|
||||
#import "VoiceChatWebSocketClient.h"
|
||||
|
||||
static NSString *const kVoiceChatStreamingManagerErrorDomain =
|
||||
@"VoiceChatStreamingManager";
|
||||
|
||||
@interface VoiceChatStreamingManager () <AudioSessionManagerDelegate,
|
||||
AudioCaptureManagerDelegate,
|
||||
VoiceChatWebSocketClientDelegate>
|
||||
|
||||
@property(nonatomic, strong) AudioSessionManager *audioSession;
|
||||
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
|
||||
@property(nonatomic, strong) VoiceChatWebSocketClient *webSocketClient;
|
||||
@property(nonatomic, strong) dispatch_queue_t stateQueue;
|
||||
|
||||
@property(nonatomic, assign) BOOL streaming;
|
||||
@property(nonatomic, copy) NSString *sessionId;
|
||||
|
||||
@property(nonatomic, copy) NSString *pendingToken;
|
||||
@property(nonatomic, copy) NSString *pendingLanguage;
|
||||
@property(nonatomic, copy) NSString *pendingVoiceId;
|
||||
|
||||
@end
|
||||
|
||||
@implementation VoiceChatStreamingManager
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_stateQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.manager",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
|
||||
_audioSession = [AudioSessionManager sharedManager];
|
||||
_audioSession.delegate = self;
|
||||
|
||||
_audioCapture = [[AudioCaptureManager alloc] init];
|
||||
_audioCapture.delegate = self;
|
||||
|
||||
_webSocketClient = [[VoiceChatWebSocketClient alloc] init];
|
||||
_webSocketClient.delegate = self;
|
||||
|
||||
_serverURL = @"ws://192.168.2.21:7529/api/ws/chat?token=";
|
||||
_webSocketClient.serverURL = _serverURL;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self disconnect];
|
||||
}
|
||||
|
||||
- (void)setServerURL:(NSString *)serverURL {
|
||||
_serverURL = [serverURL copy];
|
||||
self.webSocketClient.serverURL = _serverURL;
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)startWithToken:(NSString *)token
|
||||
language:(nullable NSString *)language
|
||||
voiceId:(nullable NSString *)voiceId {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.pendingToken = token ?: @"";
|
||||
self.pendingLanguage = language ?: @"";
|
||||
self.pendingVoiceId = voiceId ?: @"";
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self startInternal];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)stopAndFinalize {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self.webSocketClient endAudio];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self.webSocketClient cancel];
|
||||
self.sessionId = nil;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self.webSocketClient disconnect];
|
||||
[self.audioSession deactivateSession];
|
||||
self.sessionId = nil;
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)startInternal {
|
||||
if (self.pendingToken.length == 0) {
|
||||
NSLog(@"[VoiceChatStreamingManager] Start failed: token is empty");
|
||||
[self reportErrorWithMessage:@"Token is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession hasMicrophonePermission]) {
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
if (!granted) {
|
||||
[strongSelf reportErrorWithMessage:@"Microphone permission denied"];
|
||||
return;
|
||||
}
|
||||
dispatch_async(strongSelf.stateQueue, ^{
|
||||
[strongSelf startInternal];
|
||||
});
|
||||
}];
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *error = nil;
|
||||
if (![self.audioSession configureForConversation:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession activateSession:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.serverURL.length == 0) {
|
||||
NSLog(@"[VoiceChatStreamingManager] Start failed: server URL is empty");
|
||||
[self reportErrorWithMessage:@"Server URL is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSLog(@"[VoiceChatStreamingManager] Start streaming, server: %@",
|
||||
self.serverURL);
|
||||
self.webSocketClient.serverURL = self.serverURL;
|
||||
[self.webSocketClient connectWithToken:self.pendingToken];
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidFail:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : message ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
#pragma mark - AudioCaptureManagerDelegate
|
||||
|
||||
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
|
||||
if (!self.streaming) {
|
||||
return;
|
||||
}
|
||||
[self.webSocketClient sendAudioPCMFrame:pcmFrame];
|
||||
}
|
||||
|
||||
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidUpdateRMS:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidUpdateRMS:rms];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioSessionManagerDelegate
|
||||
|
||||
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
|
||||
if (type == KBAudioSessionInterruptionTypeBegan) {
|
||||
[self cancel];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)audioSessionManagerMicrophonePermissionDenied {
|
||||
[self reportErrorWithMessage:@"Microphone permission denied"];
|
||||
}
|
||||
|
||||
#pragma mark - VoiceChatWebSocketClientDelegate
|
||||
|
||||
- (void)voiceChatClientDidConnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
[self.webSocketClient startSessionWithLanguage:self.pendingLanguage
|
||||
voiceId:self.pendingVoiceId];
|
||||
});
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidConnect)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidConnect];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidDisconnect:(NSError *_Nullable)error {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.audioSession deactivateSession];
|
||||
self.sessionId = nil;
|
||||
});
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidDisconnect:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidStartSession:(NSString *)sessionId {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.sessionId = sessionId;
|
||||
|
||||
NSError *error = nil;
|
||||
if (![self.audioCapture startCapture:&error]) {
|
||||
[self reportError:error];
|
||||
[self.webSocketClient cancel];
|
||||
return;
|
||||
}
|
||||
|
||||
self.streaming = YES;
|
||||
[self.webSocketClient enableAudioSending];
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidStartSession:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidStartSession:sessionId];
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidStartTurn:(NSInteger)turnIndex {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidStartTurn:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidStartTurn:turnIndex];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
|
||||
confidence:(double)confidence {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:
|
||||
confidence:)]) {
|
||||
[self.delegate
|
||||
voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:text
|
||||
confidence:confidence];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidResumeTurn {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidResumeTurn)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidResumeTurn];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveInterimTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveInterimTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveFinalTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveFinalTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveLLMStart {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveLLMStart)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveLLMStart];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveLLMToken:(NSString *)token {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveLLMToken:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveLLMToken:token];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveAudioChunk:(NSData *)audioData {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveAudioChunk:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveAudioChunk:audioData];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidCompleteWithTranscript:(NSString *)transcript
|
||||
aiResponse:(NSString *)aiResponse {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidCompleteWithTranscript:
|
||||
aiResponse:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidCompleteWithTranscript:transcript
|
||||
aiResponse:aiResponse];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveErrorCode:(NSString *)code
|
||||
message:(NSString *)message {
|
||||
NSString *desc = message.length > 0 ? message : @"Server error";
|
||||
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
|
||||
code:-2
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : desc,
|
||||
@"code" : code ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidFail:(NSError *)error {
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user