460 lines
14 KiB
Objective-C
460 lines
14 KiB
Objective-C
//
|
|
// VoiceChatWebSocketClient.m
|
|
// keyBoard
|
|
//
|
|
// Created by Mac on 2026/1/21.
|
|
//
|
|
|
|
#import "VoiceChatWebSocketClient.h"
|
|
|
|
static NSString *const kVoiceChatWebSocketClientErrorDomain =
|
|
@"VoiceChatWebSocketClient";
|
|
|
|
@interface VoiceChatWebSocketClient () <NSURLSessionWebSocketDelegate>
|
|
|
|
@property(nonatomic, strong) NSURLSession *urlSession;
|
|
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
|
|
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
|
@property(nonatomic, assign) BOOL connected;
|
|
@property(nonatomic, copy) NSString *sessionId;
|
|
@property(nonatomic, assign) BOOL audioSendingEnabled;
|
|
|
|
@end
|
|
|
|
@implementation VoiceChatWebSocketClient
|
|
|
|
- (instancetype)init {
|
|
self = [super init];
|
|
if (self) {
|
|
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.ws",
|
|
DISPATCH_QUEUE_SERIAL);
|
|
_serverURL = @"wss://api.yourdomain.com/api/ws/chat";
|
|
_audioSendingEnabled = NO;
|
|
}
|
|
return self;
|
|
}
|
|
|
|
- (void)dealloc {
|
|
[self disconnect];
|
|
}
|
|
|
|
#pragma mark - Public Methods
|
|
|
|
- (void)connectWithToken:(NSString *)token {
|
|
dispatch_async(self.networkQueue, ^{
|
|
[self disconnectInternal];
|
|
|
|
NSURL *url = [self buildURLWithToken:token];
|
|
if (!url) {
|
|
[self reportErrorWithMessage:@"Invalid server URL"];
|
|
return;
|
|
}
|
|
|
|
NSLog(@"[VoiceChatWebSocketClient] Connecting: %@", url.absoluteString);
|
|
|
|
NSURLSessionConfiguration *config =
|
|
[NSURLSessionConfiguration defaultSessionConfiguration];
|
|
config.timeoutIntervalForRequest = 30;
|
|
config.timeoutIntervalForResource = 300;
|
|
|
|
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
|
delegate:self
|
|
delegateQueue:nil];
|
|
|
|
self.webSocketTask = [self.urlSession webSocketTaskWithURL:url];
|
|
[self.webSocketTask resume];
|
|
[self receiveMessage];
|
|
});
|
|
}
|
|
|
|
- (void)disconnect {
|
|
dispatch_async(self.networkQueue, ^{
|
|
BOOL shouldNotify = self.webSocketTask != nil;
|
|
if (shouldNotify) {
|
|
NSLog(@"[VoiceChatWebSocketClient] Disconnect requested");
|
|
}
|
|
[self disconnectInternal];
|
|
if (shouldNotify) {
|
|
[self notifyDisconnect:nil];
|
|
}
|
|
});
|
|
}
|
|
|
|
- (void)startSessionWithLanguage:(nullable NSString *)language
|
|
voiceId:(nullable NSString *)voiceId {
|
|
NSMutableDictionary *message = [NSMutableDictionary dictionary];
|
|
message[@"type"] = @"session_start";
|
|
|
|
NSMutableDictionary *config = [NSMutableDictionary dictionary];
|
|
if (language.length > 0) {
|
|
config[@"language"] = language;
|
|
}
|
|
if (voiceId.length > 0) {
|
|
config[@"voice_id"] = voiceId;
|
|
}
|
|
if (config.count > 0) {
|
|
message[@"config"] = config;
|
|
}
|
|
|
|
NSLog(@"[VoiceChatWebSocketClient] Sending session_start: %@",
|
|
message);
|
|
[self sendJSON:message];
|
|
}
|
|
|
|
- (void)enableAudioSending {
|
|
dispatch_async(self.networkQueue, ^{
|
|
self.audioSendingEnabled = YES;
|
|
});
|
|
}
|
|
|
|
- (void)disableAudioSending {
|
|
dispatch_async(self.networkQueue, ^{
|
|
self.audioSendingEnabled = NO;
|
|
});
|
|
}
|
|
|
|
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
|
|
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
|
|
return;
|
|
}
|
|
|
|
dispatch_async(self.networkQueue, ^{
|
|
if (!self.audioSendingEnabled) {
|
|
return;
|
|
}
|
|
if (!self.connected || !self.webSocketTask) {
|
|
return;
|
|
}
|
|
NSURLSessionWebSocketMessage *message =
|
|
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
|
|
[self.webSocketTask
|
|
sendMessage:message
|
|
completionHandler:^(NSError *_Nullable error) {
|
|
if (error) {
|
|
[self reportError:error];
|
|
} else {
|
|
NSLog(@"[VoiceChatWebSocketClient] Sent audio frame: %lu bytes",
|
|
(unsigned long)pcmFrame.length);
|
|
}
|
|
}];
|
|
});
|
|
}
|
|
|
|
- (void)endAudio {
|
|
NSLog(@"[VoiceChatWebSocketClient] Sending audio_end");
|
|
[self sendJSON:@{ @"type" : @"audio_end" }];
|
|
}
|
|
|
|
- (void)cancel {
|
|
NSLog(@"[VoiceChatWebSocketClient] Sending cancel");
|
|
[self sendJSON:@{ @"type" : @"cancel" }];
|
|
}
|
|
|
|
#pragma mark - Private Methods
|
|
|
|
- (NSURL *)buildURLWithToken:(NSString *)token {
|
|
if (self.serverURL.length == 0) {
|
|
return nil;
|
|
}
|
|
|
|
NSURLComponents *components =
|
|
[NSURLComponents componentsWithString:self.serverURL];
|
|
if (!components) {
|
|
return nil;
|
|
}
|
|
|
|
if (token.length > 0) {
|
|
NSMutableArray<NSURLQueryItem *> *items =
|
|
components.queryItems.mutableCopy ?: [NSMutableArray array];
|
|
BOOL didReplace = NO;
|
|
for (NSUInteger i = 0; i < items.count; i++) {
|
|
NSURLQueryItem *item = items[i];
|
|
if ([item.name isEqualToString:@"token"]) {
|
|
items[i] = [NSURLQueryItem queryItemWithName:@"token" value:token];
|
|
didReplace = YES;
|
|
break;
|
|
}
|
|
}
|
|
if (!didReplace) {
|
|
[items addObject:[NSURLQueryItem queryItemWithName:@"token"
|
|
value:token]];
|
|
}
|
|
components.queryItems = items;
|
|
}
|
|
|
|
return components.URL;
|
|
}
|
|
|
|
- (void)sendJSON:(NSDictionary *)dict {
|
|
if (!self.webSocketTask) {
|
|
return;
|
|
}
|
|
|
|
NSError *jsonError = nil;
|
|
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
|
|
options:0
|
|
error:&jsonError];
|
|
if (jsonError) {
|
|
[self reportError:jsonError];
|
|
return;
|
|
}
|
|
|
|
NSString *jsonString =
|
|
[[NSString alloc] initWithData:jsonData
|
|
encoding:NSUTF8StringEncoding];
|
|
if (!jsonString) {
|
|
[self reportErrorWithMessage:@"Failed to encode JSON message"];
|
|
return;
|
|
}
|
|
|
|
dispatch_async(self.networkQueue, ^{
|
|
NSURLSessionWebSocketMessage *message =
|
|
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
|
[self.webSocketTask
|
|
sendMessage:message
|
|
completionHandler:^(NSError *_Nullable error) {
|
|
if (error) {
|
|
[self reportError:error];
|
|
}
|
|
}];
|
|
});
|
|
}
|
|
|
|
- (void)receiveMessage {
|
|
if (!self.webSocketTask) {
|
|
return;
|
|
}
|
|
|
|
__weak typeof(self) weakSelf = self;
|
|
[self.webSocketTask receiveMessageWithCompletionHandler:^(
|
|
NSURLSessionWebSocketMessage *_Nullable message,
|
|
NSError *_Nullable error) {
|
|
__strong typeof(weakSelf) strongSelf = weakSelf;
|
|
if (!strongSelf) {
|
|
return;
|
|
}
|
|
|
|
if (error) {
|
|
if (error.code != NSURLErrorCancelled && error.code != 57) {
|
|
[strongSelf notifyDisconnect:error];
|
|
[strongSelf disconnectInternal];
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (message.type == NSURLSessionWebSocketMessageTypeString) {
|
|
NSLog(@"[VoiceChatWebSocketClient] Received text: %@", message.string);
|
|
[strongSelf handleTextMessage:message.string];
|
|
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
|
|
NSLog(@"[VoiceChatWebSocketClient] Received binary: %lu bytes",
|
|
(unsigned long)message.data.length);
|
|
[strongSelf handleBinaryMessage:message.data];
|
|
}
|
|
|
|
[strongSelf receiveMessage];
|
|
}];
|
|
}
|
|
|
|
- (void)handleTextMessage:(NSString *)text {
|
|
if (text.length == 0) {
|
|
return;
|
|
}
|
|
|
|
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
|
|
if (!data) {
|
|
return;
|
|
}
|
|
|
|
NSError *jsonError = nil;
|
|
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
|
|
options:0
|
|
error:&jsonError];
|
|
if (jsonError) {
|
|
[self reportError:jsonError];
|
|
return;
|
|
}
|
|
|
|
NSString *type = json[@"type"];
|
|
if (type.length == 0) {
|
|
return;
|
|
}
|
|
|
|
if ([type isEqualToString:@"session_started"]) {
|
|
NSString *sessionId = json[@"session_id"] ?: @"";
|
|
self.sessionId = sessionId;
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidStartSession:)]) {
|
|
[self.delegate voiceChatClientDidStartSession:sessionId];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"transcript_interim"]) {
|
|
NSString *transcript = json[@"text"] ?: @"";
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidReceiveInterimTranscript:)]) {
|
|
[self.delegate voiceChatClientDidReceiveInterimTranscript:transcript];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"transcript_final"]) {
|
|
NSString *transcript = json[@"text"] ?: @"";
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidReceiveFinalTranscript:)]) {
|
|
[self.delegate voiceChatClientDidReceiveFinalTranscript:transcript];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"turn_start"]) {
|
|
NSInteger turnIndex = [json[@"turn_index"] integerValue];
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidStartTurn:)]) {
|
|
[self.delegate voiceChatClientDidStartTurn:turnIndex];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"eager_eot"]) {
|
|
NSString *transcript = json[@"transcript"] ?: @"";
|
|
double confidence = [json[@"confidence"] doubleValue];
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:
|
|
confidence:)]) {
|
|
[self.delegate
|
|
voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:transcript
|
|
confidence:confidence];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"turn_resumed"]) {
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidResumeTurn)]) {
|
|
[self.delegate voiceChatClientDidResumeTurn];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"llm_start"]) {
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate
|
|
respondsToSelector:@selector(voiceChatClientDidReceiveLLMStart)]) {
|
|
[self.delegate voiceChatClientDidReceiveLLMStart];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"llm_token"]) {
|
|
NSString *token = json[@"token"] ?: @"";
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate
|
|
respondsToSelector:@selector(voiceChatClientDidReceiveLLMToken:)]) {
|
|
[self.delegate voiceChatClientDidReceiveLLMToken:token];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"complete"]) {
|
|
NSString *transcript = json[@"transcript"] ?: @"";
|
|
NSString *aiResponse = json[@"ai_response"] ?: @"";
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidCompleteWithTranscript:
|
|
aiResponse:)]) {
|
|
[self.delegate voiceChatClientDidCompleteWithTranscript:transcript
|
|
aiResponse:aiResponse];
|
|
}
|
|
});
|
|
} else if ([type isEqualToString:@"error"]) {
|
|
NSString *code = json[@"code"] ?: @"";
|
|
NSString *message = json[@"message"] ?: @"";
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidReceiveErrorCode:message:)]) {
|
|
[self.delegate voiceChatClientDidReceiveErrorCode:code
|
|
message:message];
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
- (void)handleBinaryMessage:(NSData *)data {
|
|
if (data.length == 0) {
|
|
return;
|
|
}
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate
|
|
respondsToSelector:@selector(voiceChatClientDidReceiveAudioChunk:)]) {
|
|
[self.delegate voiceChatClientDidReceiveAudioChunk:data];
|
|
}
|
|
});
|
|
}
|
|
|
|
- (void)disconnectInternal {
|
|
self.connected = NO;
|
|
self.sessionId = nil;
|
|
self.audioSendingEnabled = NO;
|
|
|
|
if (self.webSocketTask) {
|
|
[self.webSocketTask
|
|
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
|
|
reason:nil];
|
|
self.webSocketTask = nil;
|
|
}
|
|
|
|
if (self.urlSession) {
|
|
[self.urlSession invalidateAndCancel];
|
|
self.urlSession = nil;
|
|
}
|
|
}
|
|
|
|
- (void)reportError:(NSError *)error {
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidFail:)]) {
|
|
[self.delegate voiceChatClientDidFail:error];
|
|
}
|
|
});
|
|
}
|
|
|
|
- (void)reportErrorWithMessage:(NSString *)message {
|
|
NSError *error = [NSError errorWithDomain:kVoiceChatWebSocketClientErrorDomain
|
|
code:-1
|
|
userInfo:@{
|
|
NSLocalizedDescriptionKey : message ?: @""
|
|
}];
|
|
[self reportError:error];
|
|
}
|
|
|
|
- (void)notifyDisconnect:(NSError *_Nullable)error {
|
|
self.connected = NO;
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceChatClientDidDisconnect:)]) {
|
|
[self.delegate voiceChatClientDidDisconnect:error];
|
|
}
|
|
});
|
|
}
|
|
|
|
#pragma mark - NSURLSessionWebSocketDelegate
|
|
|
|
- (void)URLSession:(NSURLSession *)session
|
|
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
|
didOpenWithProtocol:(NSString *)protocol {
|
|
self.connected = YES;
|
|
NSLog(@"[VoiceChatWebSocketClient] Connected");
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidConnect)]) {
|
|
[self.delegate voiceChatClientDidConnect];
|
|
}
|
|
});
|
|
}
|
|
|
|
- (void)URLSession:(NSURLSession *)session
|
|
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
|
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
|
|
reason:(NSData *)reason {
|
|
if (!self.webSocketTask) {
|
|
return;
|
|
}
|
|
NSLog(@"[VoiceChatWebSocketClient] Closed with code: %ld",
|
|
(long)closeCode);
|
|
[self notifyDisconnect:nil];
|
|
[self disconnectInternal];
|
|
}
|
|
|
|
@end
|