Files
keyboard/keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m
2026-01-21 17:59:12 +08:00

460 lines
14 KiB
Objective-C

//
// VoiceChatWebSocketClient.m
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import "VoiceChatWebSocketClient.h"
static NSString *const kVoiceChatWebSocketClientErrorDomain =
@"VoiceChatWebSocketClient";
@interface VoiceChatWebSocketClient () <NSURLSessionWebSocketDelegate>
@property(nonatomic, strong) NSURLSession *urlSession;
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
@property(nonatomic, strong) dispatch_queue_t networkQueue;
@property(nonatomic, assign) BOOL connected;
@property(nonatomic, copy) NSString *sessionId;
@property(nonatomic, assign) BOOL audioSendingEnabled;
@end
@implementation VoiceChatWebSocketClient
- (instancetype)init {
self = [super init];
if (self) {
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.ws",
DISPATCH_QUEUE_SERIAL);
_serverURL = @"wss://api.yourdomain.com/api/ws/chat";
_audioSendingEnabled = NO;
}
return self;
}
- (void)dealloc {
[self disconnect];
}
#pragma mark - Public Methods
- (void)connectWithToken:(NSString *)token {
dispatch_async(self.networkQueue, ^{
[self disconnectInternal];
NSURL *url = [self buildURLWithToken:token];
if (!url) {
[self reportErrorWithMessage:@"Invalid server URL"];
return;
}
NSLog(@"[VoiceChatWebSocketClient] Connecting: %@", url.absoluteString);
NSURLSessionConfiguration *config =
[NSURLSessionConfiguration defaultSessionConfiguration];
config.timeoutIntervalForRequest = 30;
config.timeoutIntervalForResource = 300;
self.urlSession = [NSURLSession sessionWithConfiguration:config
delegate:self
delegateQueue:nil];
self.webSocketTask = [self.urlSession webSocketTaskWithURL:url];
[self.webSocketTask resume];
[self receiveMessage];
});
}
- (void)disconnect {
dispatch_async(self.networkQueue, ^{
BOOL shouldNotify = self.webSocketTask != nil;
if (shouldNotify) {
NSLog(@"[VoiceChatWebSocketClient] Disconnect requested");
}
[self disconnectInternal];
if (shouldNotify) {
[self notifyDisconnect:nil];
}
});
}
- (void)startSessionWithLanguage:(nullable NSString *)language
voiceId:(nullable NSString *)voiceId {
NSMutableDictionary *message = [NSMutableDictionary dictionary];
message[@"type"] = @"session_start";
NSMutableDictionary *config = [NSMutableDictionary dictionary];
if (language.length > 0) {
config[@"language"] = language;
}
if (voiceId.length > 0) {
config[@"voice_id"] = voiceId;
}
if (config.count > 0) {
message[@"config"] = config;
}
NSLog(@"[VoiceChatWebSocketClient] Sending session_start: %@",
message);
[self sendJSON:message];
}
- (void)enableAudioSending {
dispatch_async(self.networkQueue, ^{
self.audioSendingEnabled = YES;
});
}
- (void)disableAudioSending {
dispatch_async(self.networkQueue, ^{
self.audioSendingEnabled = NO;
});
}
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
return;
}
dispatch_async(self.networkQueue, ^{
if (!self.audioSendingEnabled) {
return;
}
if (!self.connected || !self.webSocketTask) {
return;
}
NSURLSessionWebSocketMessage *message =
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
[self.webSocketTask
sendMessage:message
completionHandler:^(NSError *_Nullable error) {
if (error) {
[self reportError:error];
} else {
NSLog(@"[VoiceChatWebSocketClient] Sent audio frame: %lu bytes",
(unsigned long)pcmFrame.length);
}
}];
});
}
- (void)endAudio {
NSLog(@"[VoiceChatWebSocketClient] Sending audio_end");
[self sendJSON:@{ @"type" : @"audio_end" }];
}
- (void)cancel {
NSLog(@"[VoiceChatWebSocketClient] Sending cancel");
[self sendJSON:@{ @"type" : @"cancel" }];
}
#pragma mark - Private Methods
- (NSURL *)buildURLWithToken:(NSString *)token {
if (self.serverURL.length == 0) {
return nil;
}
NSURLComponents *components =
[NSURLComponents componentsWithString:self.serverURL];
if (!components) {
return nil;
}
if (token.length > 0) {
NSMutableArray<NSURLQueryItem *> *items =
components.queryItems.mutableCopy ?: [NSMutableArray array];
BOOL didReplace = NO;
for (NSUInteger i = 0; i < items.count; i++) {
NSURLQueryItem *item = items[i];
if ([item.name isEqualToString:@"token"]) {
items[i] = [NSURLQueryItem queryItemWithName:@"token" value:token];
didReplace = YES;
break;
}
}
if (!didReplace) {
[items addObject:[NSURLQueryItem queryItemWithName:@"token"
value:token]];
}
components.queryItems = items;
}
return components.URL;
}
- (void)sendJSON:(NSDictionary *)dict {
if (!self.webSocketTask) {
return;
}
NSError *jsonError = nil;
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
options:0
error:&jsonError];
if (jsonError) {
[self reportError:jsonError];
return;
}
NSString *jsonString =
[[NSString alloc] initWithData:jsonData
encoding:NSUTF8StringEncoding];
if (!jsonString) {
[self reportErrorWithMessage:@"Failed to encode JSON message"];
return;
}
dispatch_async(self.networkQueue, ^{
NSURLSessionWebSocketMessage *message =
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
[self.webSocketTask
sendMessage:message
completionHandler:^(NSError *_Nullable error) {
if (error) {
[self reportError:error];
}
}];
});
}
- (void)receiveMessage {
if (!self.webSocketTask) {
return;
}
__weak typeof(self) weakSelf = self;
[self.webSocketTask receiveMessageWithCompletionHandler:^(
NSURLSessionWebSocketMessage *_Nullable message,
NSError *_Nullable error) {
__strong typeof(weakSelf) strongSelf = weakSelf;
if (!strongSelf) {
return;
}
if (error) {
if (error.code != NSURLErrorCancelled && error.code != 57) {
[strongSelf notifyDisconnect:error];
[strongSelf disconnectInternal];
}
return;
}
if (message.type == NSURLSessionWebSocketMessageTypeString) {
NSLog(@"[VoiceChatWebSocketClient] Received text: %@", message.string);
[strongSelf handleTextMessage:message.string];
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
NSLog(@"[VoiceChatWebSocketClient] Received binary: %lu bytes",
(unsigned long)message.data.length);
[strongSelf handleBinaryMessage:message.data];
}
[strongSelf receiveMessage];
}];
}
- (void)handleTextMessage:(NSString *)text {
if (text.length == 0) {
return;
}
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
if (!data) {
return;
}
NSError *jsonError = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
options:0
error:&jsonError];
if (jsonError) {
[self reportError:jsonError];
return;
}
NSString *type = json[@"type"];
if (type.length == 0) {
return;
}
if ([type isEqualToString:@"session_started"]) {
NSString *sessionId = json[@"session_id"] ?: @"";
self.sessionId = sessionId;
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidStartSession:)]) {
[self.delegate voiceChatClientDidStartSession:sessionId];
}
});
} else if ([type isEqualToString:@"transcript_interim"]) {
NSString *transcript = json[@"text"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveInterimTranscript:)]) {
[self.delegate voiceChatClientDidReceiveInterimTranscript:transcript];
}
});
} else if ([type isEqualToString:@"transcript_final"]) {
NSString *transcript = json[@"text"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveFinalTranscript:)]) {
[self.delegate voiceChatClientDidReceiveFinalTranscript:transcript];
}
});
} else if ([type isEqualToString:@"turn_start"]) {
NSInteger turnIndex = [json[@"turn_index"] integerValue];
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidStartTurn:)]) {
[self.delegate voiceChatClientDidStartTurn:turnIndex];
}
});
} else if ([type isEqualToString:@"eager_eot"]) {
NSString *transcript = json[@"transcript"] ?: @"";
double confidence = [json[@"confidence"] doubleValue];
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:
confidence:)]) {
[self.delegate
voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:transcript
confidence:confidence];
}
});
} else if ([type isEqualToString:@"turn_resumed"]) {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidResumeTurn)]) {
[self.delegate voiceChatClientDidResumeTurn];
}
});
} else if ([type isEqualToString:@"llm_start"]) {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(voiceChatClientDidReceiveLLMStart)]) {
[self.delegate voiceChatClientDidReceiveLLMStart];
}
});
} else if ([type isEqualToString:@"llm_token"]) {
NSString *token = json[@"token"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(voiceChatClientDidReceiveLLMToken:)]) {
[self.delegate voiceChatClientDidReceiveLLMToken:token];
}
});
} else if ([type isEqualToString:@"complete"]) {
NSString *transcript = json[@"transcript"] ?: @"";
NSString *aiResponse = json[@"ai_response"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidCompleteWithTranscript:
aiResponse:)]) {
[self.delegate voiceChatClientDidCompleteWithTranscript:transcript
aiResponse:aiResponse];
}
});
} else if ([type isEqualToString:@"error"]) {
NSString *code = json[@"code"] ?: @"";
NSString *message = json[@"message"] ?: @"";
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidReceiveErrorCode:message:)]) {
[self.delegate voiceChatClientDidReceiveErrorCode:code
message:message];
}
});
}
}
- (void)handleBinaryMessage:(NSData *)data {
if (data.length == 0) {
return;
}
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(voiceChatClientDidReceiveAudioChunk:)]) {
[self.delegate voiceChatClientDidReceiveAudioChunk:data];
}
});
}
- (void)disconnectInternal {
self.connected = NO;
self.sessionId = nil;
self.audioSendingEnabled = NO;
if (self.webSocketTask) {
[self.webSocketTask
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
reason:nil];
self.webSocketTask = nil;
}
if (self.urlSession) {
[self.urlSession invalidateAndCancel];
self.urlSession = nil;
}
}
- (void)reportError:(NSError *)error {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidFail:)]) {
[self.delegate voiceChatClientDidFail:error];
}
});
}
- (void)reportErrorWithMessage:(NSString *)message {
NSError *error = [NSError errorWithDomain:kVoiceChatWebSocketClientErrorDomain
code:-1
userInfo:@{
NSLocalizedDescriptionKey : message ?: @""
}];
[self reportError:error];
}
- (void)notifyDisconnect:(NSError *_Nullable)error {
self.connected = NO;
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(voiceChatClientDidDisconnect:)]) {
[self.delegate voiceChatClientDidDisconnect:error];
}
});
}
#pragma mark - NSURLSessionWebSocketDelegate
- (void)URLSession:(NSURLSession *)session
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
didOpenWithProtocol:(NSString *)protocol {
self.connected = YES;
NSLog(@"[VoiceChatWebSocketClient] Connected");
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidConnect)]) {
[self.delegate voiceChatClientDidConnect];
}
});
}
- (void)URLSession:(NSURLSession *)session
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
reason:(NSData *)reason {
if (!self.webSocketTask) {
return;
}
NSLog(@"[VoiceChatWebSocketClient] Closed with code: %ld",
(long)closeCode);
[self notifyDisconnect:nil];
[self disconnectInternal];
}
@end