This commit is contained in:
2026-01-21 17:25:38 +08:00
parent d1d47336c2
commit 36c0b0b210
10 changed files with 1877 additions and 10 deletions

View File

@@ -90,6 +90,11 @@
// TTS Client
self.ttsClient = [[TTSServiceClient alloc] init];
self.ttsClient.delegate = self;
// ElevenLabs
self.ttsClient.voiceId = @"JBFqnCBsd6RMkjVDRZzb"; // George
self.ttsClient.languageCode = @"zh"; //
self.ttsClient.expectedPayloadType =
TTSPayloadTypeURL; // 使 URL
// Playback Pipeline
self.playbackPipeline = [[TTSPlaybackPipeline alloc] init];

View File

@@ -41,6 +41,12 @@ typedef NS_ENUM(NSInteger, TTSPayloadType) {
/// TTS 服务器 URL
@property(nonatomic, copy) NSString *serverURL;
/// 语音 IDElevenLabs voice ID
@property(nonatomic, copy) NSString *voiceId;
/// 语言代码(如 "zh", "en"
@property(nonatomic, copy) NSString *languageCode;
/// 当前期望的返回类型(由服务端配置决定)
@property(nonatomic, assign) TTSPayloadType expectedPayloadType;

View File

@@ -94,6 +94,8 @@
NSDictionary *body = @{
@"text" : text,
@"segmentId" : segmentId,
@"voiceId" : self.voiceId ?: @"JBFqnCBsd6RMkjVDRZzb",
@"languageCode" : self.languageCode ?: @"zh",
@"format" : @"mp3" // m4a
};
@@ -184,6 +186,8 @@
NSDictionary *requestDict = @{
@"text" : text,
@"segmentId" : segmentId,
@"voiceId" : self.voiceId ?: @"JBFqnCBsd6RMkjVDRZzb",
@"languageCode" : self.languageCode ?: @"zh",
@"format" : [self formatStringForPayloadType:self.expectedPayloadType]
};

View File

@@ -0,0 +1,53 @@
//
// VoiceChatStreamingManager.h
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@protocol VoiceChatStreamingManagerDelegate <NSObject>
@optional
- (void)voiceChatStreamingManagerDidConnect;
- (void)voiceChatStreamingManagerDidDisconnect:(NSError *_Nullable)error;
- (void)voiceChatStreamingManagerDidStartSession:(NSString *)sessionId;
- (void)voiceChatStreamingManagerDidStartTurn:(NSInteger)turnIndex;
- (void)voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
confidence:(double)confidence;
- (void)voiceChatStreamingManagerDidResumeTurn;
- (void)voiceChatStreamingManagerDidUpdateRMS:(float)rms;
- (void)voiceChatStreamingManagerDidReceiveInterimTranscript:(NSString *)text;
- (void)voiceChatStreamingManagerDidReceiveFinalTranscript:(NSString *)text;
- (void)voiceChatStreamingManagerDidReceiveLLMStart;
- (void)voiceChatStreamingManagerDidReceiveLLMToken:(NSString *)token;
- (void)voiceChatStreamingManagerDidReceiveAudioChunk:(NSData *)audioData;
- (void)voiceChatStreamingManagerDidCompleteWithTranscript:(NSString *)transcript
aiResponse:(NSString *)aiResponse;
- (void)voiceChatStreamingManagerDidFail:(NSError *)error;
@end
/// Manager for realtime recording and streaming.
@interface VoiceChatStreamingManager : NSObject
@property(nonatomic, weak) id<VoiceChatStreamingManagerDelegate> delegate;
/// Base WebSocket URL, e.g. wss://api.yourdomain.com/api/ws/chat
@property(nonatomic, copy) NSString *serverURL;
@property(nonatomic, assign, readonly, getter=isStreaming) BOOL streaming;
@property(nonatomic, copy, readonly, nullable) NSString *sessionId;
- (void)startWithToken:(NSString *)token
language:(nullable NSString *)language
voiceId:(nullable NSString *)voiceId;
- (void)stopAndFinalize;
- (void)cancel;
- (void)disconnect;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,376 @@
//
// VoiceChatStreamingManager.m
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import "VoiceChatStreamingManager.h"
#import "AudioCaptureManager.h"
#import "AudioSessionManager.h"
#import "VoiceChatWebSocketClient.h"
static NSString *const kVoiceChatStreamingManagerErrorDomain =
@"VoiceChatStreamingManager";
@interface VoiceChatStreamingManager () <AudioSessionManagerDelegate,
AudioCaptureManagerDelegate,
VoiceChatWebSocketClientDelegate>
@property(nonatomic, strong) AudioSessionManager *audioSession;
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
@property(nonatomic, strong) VoiceChatWebSocketClient *webSocketClient;
@property(nonatomic, strong) dispatch_queue_t stateQueue;
@property(nonatomic, assign) BOOL streaming;
@property(nonatomic, copy) NSString *sessionId;
@property(nonatomic, copy) NSString *pendingToken;
@property(nonatomic, copy) NSString *pendingLanguage;
@property(nonatomic, copy) NSString *pendingVoiceId;
@end
@implementation VoiceChatStreamingManager
- (instancetype)init {
self = [super init];
if (self) {
_stateQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.manager",
DISPATCH_QUEUE_SERIAL);
_audioSession = [AudioSessionManager sharedManager];
_audioSession.delegate = self;
_audioCapture = [[AudioCaptureManager alloc] init];
_audioCapture.delegate = self;
_webSocketClient = [[VoiceChatWebSocketClient alloc] init];
_webSocketClient.delegate = self;
_serverURL = @"ws://192.168.2.21:7529/api/ws/chat?token=";
_webSocketClient.serverURL = _serverURL;
}
return self;
}
- (void)dealloc {
[self disconnect];
}
- (void)setServerURL:(NSString *)serverURL {
_serverURL = [serverURL copy];
self.webSocketClient.serverURL = _serverURL;
}
#pragma mark - Public Methods
- (void)startWithToken:(NSString *)token
language:(nullable NSString *)language
voiceId:(nullable NSString *)voiceId {
dispatch_async(self.stateQueue, ^{
self.pendingToken = token ?: @"";
self.pendingLanguage = language ?: @"";
self.pendingVoiceId = voiceId ?: @"";
[self.webSocketClient disableAudioSending];
[self startInternal];
});
}
- (void)stopAndFinalize {
dispatch_async(self.stateQueue, ^{
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.webSocketClient disableAudioSending];
[self.webSocketClient endAudio];
});
}
- (void)cancel {
dispatch_async(self.stateQueue, ^{
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.webSocketClient disableAudioSending];
[self.webSocketClient cancel];
self.sessionId = nil;
});
}
- (void)disconnect {
dispatch_async(self.stateQueue, ^{
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.webSocketClient disableAudioSending];
[self.webSocketClient disconnect];
[self.audioSession deactivateSession];
self.sessionId = nil;
});
}
#pragma mark - Private Methods
- (void)startInternal {
if (self.pendingToken.length == 0) {
NSLog(@"[VoiceChatStreamingManager] Start failed: token is empty");
[self reportErrorWithMessage:@"Token is required"];
return;
}
if (![self.audioSession hasMicrophonePermission]) {
__weak typeof(self) weakSelf = self;
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
__strong typeof(weakSelf) strongSelf = weakSelf;
if (!strongSelf) {
return;
}
if (!granted) {
[strongSelf reportErrorWithMessage:@"Microphone permission denied"];
return;
}
dispatch_async(strongSelf.stateQueue, ^{
[strongSelf startInternal];
});
}];
return;
}
NSError *error = nil;
if (![self.audioSession configureForConversation:&error]) {
[self reportError:error];
return;
}
if (![self.audioSession activateSession:&error]) {
[self reportError:error];
return;
}
if (self.serverURL.length == 0) {
NSLog(@"[VoiceChatStreamingManager] Start failed: server URL is empty");
[self reportErrorWithMessage:@"Server URL is required"];
return;
}
NSLog(@"[VoiceChatStreamingManager] Start streaming, server: %@",
self.serverURL);
self.webSocketClient.serverURL = self.serverURL;
[self.webSocketClient connectWithToken:self.pendingToken];
}
- (void)reportError:(NSError *)error {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidFail:)]) {
[self.delegate voiceChatStreamingManagerDidFail:error];
}
});
}
- (void)reportErrorWithMessage:(NSString *)message {
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
code:-1
userInfo:@{
NSLocalizedDescriptionKey : message ?: @""
}];
[self reportError:error];
}
#pragma mark - AudioCaptureManagerDelegate
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
if (!self.streaming) {
return;
}
[self.webSocketClient sendAudioPCMFrame:pcmFrame];
}
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidUpdateRMS:)]) {
[self.delegate voiceChatStreamingManagerDidUpdateRMS:rms];
}
});
}
#pragma mark - AudioSessionManagerDelegate
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
if (type == KBAudioSessionInterruptionTypeBegan) {
[self cancel];
}
}
- (void)audioSessionManagerMicrophonePermissionDenied {
[self reportErrorWithMessage:@"Microphone permission denied"];
}
#pragma mark - VoiceChatWebSocketClientDelegate
- (void)voiceChatClientDidConnect {
dispatch_async(self.stateQueue, ^{
[self.webSocketClient startSessionWithLanguage:self.pendingLanguage
voiceId:self.pendingVoiceId];
});
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidConnect)]) {
[self.delegate voiceChatStreamingManagerDidConnect];
}
});
}
- (void)voiceChatClientDidDisconnect:(NSError *_Nullable)error {
dispatch_async(self.stateQueue, ^{
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.audioSession deactivateSession];
self.sessionId = nil;
});
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidDisconnect:)]) {
[self.delegate voiceChatStreamingManagerDidDisconnect:error];
}
});
}
- (void)voiceChatClientDidStartSession:(NSString *)sessionId {
dispatch_async(self.stateQueue, ^{
self.sessionId = sessionId;
NSError *error = nil;
if (![self.audioCapture startCapture:&error]) {
[self reportError:error];
[self.webSocketClient cancel];
return;
}
self.streaming = YES;
[self.webSocketClient enableAudioSending];
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidStartSession:)]) {
[self.delegate voiceChatStreamingManagerDidStartSession:sessionId];
}
});
});
}
- (void)voiceChatClientDidStartTurn:(NSInteger)turnIndex {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidStartTurn:)]) {
[self.delegate voiceChatStreamingManagerDidStartTurn:turnIndex];
}
});
}
- (void)voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
confidence:(double)confidence {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector
(voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:
confidence:)]) {
[self.delegate
voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:text
confidence:confidence];
}
});
}
- (void)voiceChatClientDidResumeTurn {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidResumeTurn)]) {
[self.delegate voiceChatStreamingManagerDidResumeTurn];
}
});
}
- (void)voiceChatClientDidReceiveInterimTranscript:(NSString *)text {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidReceiveInterimTranscript:)]) {
[self.delegate voiceChatStreamingManagerDidReceiveInterimTranscript:text];
}
});
}
- (void)voiceChatClientDidReceiveFinalTranscript:(NSString *)text {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidReceiveFinalTranscript:)]) {
[self.delegate voiceChatStreamingManagerDidReceiveFinalTranscript:text];
}
});
}
- (void)voiceChatClientDidReceiveLLMStart {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidReceiveLLMStart)]) {
[self.delegate voiceChatStreamingManagerDidReceiveLLMStart];
}
});
}
- (void)voiceChatClientDidReceiveLLMToken:(NSString *)token {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidReceiveLLMToken:)]) {
[self.delegate voiceChatStreamingManagerDidReceiveLLMToken:token];
}
});
}
- (void)voiceChatClientDidReceiveAudioChunk:(NSData *)audioData {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidReceiveAudioChunk:)]) {
[self.delegate voiceChatStreamingManagerDidReceiveAudioChunk:audioData];
}
});
}
- (void)voiceChatClientDidCompleteWithTranscript:(NSString *)transcript
aiResponse:(NSString *)aiResponse {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatStreamingManagerDidCompleteWithTranscript:
aiResponse:)]) {
[self.delegate voiceChatStreamingManagerDidCompleteWithTranscript:transcript
aiResponse:aiResponse];
}
});
}
- (void)voiceChatClientDidReceiveErrorCode:(NSString *)code
message:(NSString *)message {
NSString *desc = message.length > 0 ? message : @"Server error";
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
code:-2
userInfo:@{
NSLocalizedDescriptionKey : desc,
@"code" : code ?: @""
}];
[self reportError:error];
}
- (void)voiceChatClientDidFail:(NSError *)error {
[self reportError:error];
}
@end

View File

@@ -0,0 +1,57 @@
//
// VoiceChatWebSocketClient.h
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@protocol VoiceChatWebSocketClientDelegate <NSObject>
@optional
- (void)voiceChatClientDidConnect;
- (void)voiceChatClientDidDisconnect:(NSError *_Nullable)error;
- (void)voiceChatClientDidStartSession:(NSString *)sessionId;
- (void)voiceChatClientDidStartTurn:(NSInteger)turnIndex;
- (void)voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
confidence:(double)confidence;
- (void)voiceChatClientDidResumeTurn;
- (void)voiceChatClientDidReceiveInterimTranscript:(NSString *)text;
- (void)voiceChatClientDidReceiveFinalTranscript:(NSString *)text;
- (void)voiceChatClientDidReceiveLLMStart;
- (void)voiceChatClientDidReceiveLLMToken:(NSString *)token;
- (void)voiceChatClientDidReceiveAudioChunk:(NSData *)audioData;
- (void)voiceChatClientDidCompleteWithTranscript:(NSString *)transcript
aiResponse:(NSString *)aiResponse;
- (void)voiceChatClientDidReceiveErrorCode:(NSString *)code
message:(NSString *)message;
- (void)voiceChatClientDidFail:(NSError *)error;
@end
/// WebSocket client for realtime voice chat.
@interface VoiceChatWebSocketClient : NSObject
@property(nonatomic, weak) id<VoiceChatWebSocketClientDelegate> delegate;
/// Base WebSocket URL, e.g. wss://api.yourdomain.com/api/ws/chat
@property(nonatomic, copy) NSString *serverURL;
@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected;
@property(nonatomic, copy, readonly, nullable) NSString *sessionId;
- (void)connectWithToken:(NSString *)token;
- (void)disconnect;
- (void)startSessionWithLanguage:(nullable NSString *)language
voiceId:(nullable NSString *)voiceId;
- (void)enableAudioSending;
- (void)disableAudioSending;
- (void)sendAudioPCMFrame:(NSData *)pcmFrame;
- (void)endAudio;
- (void)cancel;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,457 @@
//
// VoiceChatWebSocketClient.m
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import "VoiceChatWebSocketClient.h"
static NSString *const kVoiceChatWebSocketClientErrorDomain =
@"VoiceChatWebSocketClient";
@interface VoiceChatWebSocketClient () <NSURLSessionWebSocketDelegate>
@property(nonatomic, strong) NSURLSession *urlSession;
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
@property(nonatomic, strong) dispatch_queue_t networkQueue;
@property(nonatomic, assign) BOOL connected;
@property(nonatomic, copy) NSString *sessionId;
@property(nonatomic, assign) BOOL audioSendingEnabled;
@end
@implementation VoiceChatWebSocketClient
- (instancetype)init {
self = [super init];
if (self) {
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.ws",
DISPATCH_QUEUE_SERIAL);
_serverURL = @"wss://api.yourdomain.com/api/ws/chat";
_audioSendingEnabled = NO;
}
return self;
}
- (void)dealloc {
[self disconnect];
}
#pragma mark - Public Methods
- (void)connectWithToken:(NSString *)token {
dispatch_async(self.networkQueue, ^{
[self disconnectInternal];
NSURL *url = [self buildURLWithToken:token];
if (!url) {
[self reportErrorWithMessage:@"Invalid server URL"];
return;
}
NSLog(@"[VoiceChatWebSocketClient] Connecting: %@", url.absoluteString);
NSURLSessionConfiguration *config =
[NSURLSessionConfiguration defaultSessionConfiguration];
config.timeoutIntervalForRequest = 30;
config.timeoutIntervalForResource = 300;
self.urlSession = [NSURLSession sessionWithConfiguration:config
delegate:self
delegateQueue:nil];
self.webSocketTask = [self.urlSession webSocketTaskWithURL:url];
[self.webSocketTask resume];
[self receiveMessage];
});
}
- (void)disconnect {
dispatch_async(self.networkQueue, ^{
BOOL shouldNotify = self.webSocketTask != nil;
if (shouldNotify) {
NSLog(@"[VoiceChatWebSocketClient] Disconnect requested");
}
[self disconnectInternal];
if (shouldNotify) {
[self notifyDisconnect:nil];
}
});
}
- (void)startSessionWithLanguage:(nullable NSString *)language
voiceId:(nullable NSString *)voiceId {
NSMutableDictionary *message = [NSMutableDictionary dictionary];
message[@"type"] = @"session_start";
NSMutableDictionary *config = [NSMutableDictionary dictionary];
if (language.length > 0) {
config[@"language"] = language;
}
if (voiceId.length > 0) {
config[@"voice_id"] = voiceId;
}
if (config.count > 0) {
message[@"config"] = config;
}
[self sendJSON:message];
}
- (void)enableAudioSending {
dispatch_async(self.networkQueue, ^{
self.audioSendingEnabled = YES;
});
}
- (void)disableAudioSending {
dispatch_async(self.networkQueue, ^{
self.audioSendingEnabled = NO;
});
}
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
return;
}
dispatch_async(self.networkQueue, ^{
if (!self.audioSendingEnabled) {
return;
}
if (!self.connected || !self.webSocketTask) {
return;
}
NSURLSessionWebSocketMessage *message =
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
[self.webSocketTask
sendMessage:message
completionHandler:^(NSError *_Nullable error) {
if (error) {
[self reportError:error];
} else {
NSLog(@"[VoiceChatWebSocketClient] Sent audio frame: %lu bytes",
(unsigned long)pcmFrame.length);
}
}];
});
}
- (void)endAudio {
NSLog(@"[VoiceChatWebSocketClient] Sending audio_end");
[self sendJSON:@{ @"type" : @"audio_end" }];
}
- (void)cancel {
NSLog(@"[VoiceChatWebSocketClient] Sending cancel");
[self sendJSON:@{ @"type" : @"cancel" }];
}
#pragma mark - Private Methods
- (NSURL *)buildURLWithToken:(NSString *)token {
if (self.serverURL.length == 0) {
return nil;
}
NSURLComponents *components =
[NSURLComponents componentsWithString:self.serverURL];
if (!components) {
return nil;
}
if (token.length > 0) {
NSMutableArray<NSURLQueryItem *> *items =
components.queryItems.mutableCopy ?: [NSMutableArray array];
BOOL didReplace = NO;
for (NSUInteger i = 0; i < items.count; i++) {
NSURLQueryItem *item = items[i];
if ([item.name isEqualToString:@"token"]) {
items[i] = [NSURLQueryItem queryItemWithName:@"token" value:token];
didReplace = YES;
break;
}
}
if (!didReplace) {
[items addObject:[NSURLQueryItem queryItemWithName:@"token"
value:token]];
}
components.queryItems = items;
}
return components.URL;
}
- (void)sendJSON:(NSDictionary *)dict {
if (!self.webSocketTask) {
return;
}
NSError *jsonError = nil;
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
options:0
error:&jsonError];
if (jsonError) {
[self reportError:jsonError];
return;
}
NSString *jsonString =
[[NSString alloc] initWithData:jsonData
encoding:NSUTF8StringEncoding];
if (!jsonString) {
[self reportErrorWithMessage:@"Failed to encode JSON message"];
return;
}
dispatch_async(self.networkQueue, ^{
NSURLSessionWebSocketMessage *message =
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
[self.webSocketTask
sendMessage:message
completionHandler:^(NSError *_Nullable error) {
if (error) {
[self reportError:error];
}
}];
});
}
- (void)receiveMessage {
if (!self.webSocketTask) {
return;
}
__weak typeof(self) weakSelf = self;
[self.webSocketTask receiveMessageWithCompletionHandler:^(
NSURLSessionWebSocketMessage *_Nullable message,
NSError *_Nullable error) {
__strong typeof(weakSelf) strongSelf = weakSelf;
if (!strongSelf) {
return;
}
if (error) {
if (error.code != NSURLErrorCancelled && error.code != 57) {
[strongSelf notifyDisconnect:error];
[strongSelf disconnectInternal];
}
return;
}
if (message.type == NSURLSessionWebSocketMessageTypeString) {
NSLog(@"[VoiceChatWebSocketClient] Received text: %@", message.string);
[strongSelf handleTextMessage:message.string];
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
NSLog(@"[VoiceChatWebSocketClient] Received binary: %lu bytes",
(unsigned long)message.data.length);
[strongSelf handleBinaryMessage:message.data];
}
[strongSelf receiveMessage];
}];
}
- (void)handleTextMessage:(NSString *)text {
if (text.length == 0) {
return;
}
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
if (!data) {
return;
}
NSError *jsonError = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
options:0
error:&jsonError];
if (jsonError) {
[self reportError:jsonError];
return;
}
NSString *type = json[@"type"];
if (type.length == 0) {
return;
}
if ([type isEqualToString:@"session_started"]) {
NSString *sessionId = json[@"session_id"] ?: @"";
self.sessionId = sessionId;
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidStartSession:)]) {
[self.delegate voiceChatClientDidStartSession:sessionId];
}
});
} else if ([type isEqualToString:@"transcript_interim"]) {
NSString *transcript = json[@"text"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveInterimTranscript:)]) {
[self.delegate voiceChatClientDidReceiveInterimTranscript:transcript];
}
});
} else if ([type isEqualToString:@"transcript_final"]) {
NSString *transcript = json[@"text"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveFinalTranscript:)]) {
[self.delegate voiceChatClientDidReceiveFinalTranscript:transcript];
}
});
} else if ([type isEqualToString:@"turn_start"]) {
NSInteger turnIndex = [json[@"turn_index"] integerValue];
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidStartTurn:)]) {
[self.delegate voiceChatClientDidStartTurn:turnIndex];
}
});
} else if ([type isEqualToString:@"eager_eot"]) {
NSString *transcript = json[@"transcript"] ?: @"";
double confidence = [json[@"confidence"] doubleValue];
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:
confidence:)]) {
[self.delegate
voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:transcript
confidence:confidence];
}
});
} else if ([type isEqualToString:@"turn_resumed"]) {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidResumeTurn)]) {
[self.delegate voiceChatClientDidResumeTurn];
}
});
} else if ([type isEqualToString:@"llm_start"]) {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(voiceChatClientDidReceiveLLMStart)]) {
[self.delegate voiceChatClientDidReceiveLLMStart];
}
});
} else if ([type isEqualToString:@"llm_token"]) {
NSString *token = json[@"token"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(voiceChatClientDidReceiveLLMToken:)]) {
[self.delegate voiceChatClientDidReceiveLLMToken:token];
}
});
} else if ([type isEqualToString:@"complete"]) {
NSString *transcript = json[@"transcript"] ?: @"";
NSString *aiResponse = json[@"ai_response"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidCompleteWithTranscript:
aiResponse:)]) {
[self.delegate voiceChatClientDidCompleteWithTranscript:transcript
aiResponse:aiResponse];
}
});
} else if ([type isEqualToString:@"error"]) {
NSString *code = json[@"code"] ?: @"";
NSString *message = json[@"message"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveErrorCode:message:)]) {
[self.delegate voiceChatClientDidReceiveErrorCode:code
message:message];
}
});
}
}
- (void)handleBinaryMessage:(NSData *)data {
if (data.length == 0) {
return;
}
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(voiceChatClientDidReceiveAudioChunk:)]) {
[self.delegate voiceChatClientDidReceiveAudioChunk:data];
}
});
}
- (void)disconnectInternal {
self.connected = NO;
self.sessionId = nil;
self.audioSendingEnabled = NO;
if (self.webSocketTask) {
[self.webSocketTask
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
reason:nil];
self.webSocketTask = nil;
}
if (self.urlSession) {
[self.urlSession invalidateAndCancel];
self.urlSession = nil;
}
}
- (void)reportError:(NSError *)error {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidFail:)]) {
[self.delegate voiceChatClientDidFail:error];
}
});
}
- (void)reportErrorWithMessage:(NSString *)message {
NSError *error = [NSError errorWithDomain:kVoiceChatWebSocketClientErrorDomain
code:-1
userInfo:@{
NSLocalizedDescriptionKey : message ?: @""
}];
[self reportError:error];
}
- (void)notifyDisconnect:(NSError *_Nullable)error {
self.connected = NO;
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidDisconnect:)]) {
[self.delegate voiceChatClientDidDisconnect:error];
}
});
}
#pragma mark - NSURLSessionWebSocketDelegate
- (void)URLSession:(NSURLSession *)session
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
didOpenWithProtocol:(NSString *)protocol {
self.connected = YES;
NSLog(@"[VoiceChatWebSocketClient] Connected");
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidConnect)]) {
[self.delegate voiceChatClientDidConnect];
}
});
}
- (void)URLSession:(NSURLSession *)session
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
reason:(NSData *)reason {
if (!self.webSocketTask) {
return;
}
NSLog(@"[VoiceChatWebSocketClient] Closed with code: %ld",
(long)closeCode);
[self notifyDisconnect:nil];
[self disconnectInternal];
}
@end