1
This commit is contained in:
457
keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m
Normal file
457
keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m
Normal file
@@ -0,0 +1,457 @@
|
||||
//
|
||||
// VoiceChatWebSocketClient.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "VoiceChatWebSocketClient.h"
|
||||
|
||||
static NSString *const kVoiceChatWebSocketClientErrorDomain =
|
||||
@"VoiceChatWebSocketClient";
|
||||
|
||||
@interface VoiceChatWebSocketClient () <NSURLSessionWebSocketDelegate>
|
||||
|
||||
@property(nonatomic, strong) NSURLSession *urlSession;
|
||||
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
|
||||
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
||||
@property(nonatomic, assign) BOOL connected;
|
||||
@property(nonatomic, copy) NSString *sessionId;
|
||||
@property(nonatomic, assign) BOOL audioSendingEnabled;
|
||||
|
||||
@end
|
||||
|
||||
@implementation VoiceChatWebSocketClient
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.ws",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_serverURL = @"wss://api.yourdomain.com/api/ws/chat";
|
||||
_audioSendingEnabled = NO;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self disconnect];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)connectWithToken:(NSString *)token {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self disconnectInternal];
|
||||
|
||||
NSURL *url = [self buildURLWithToken:token];
|
||||
if (!url) {
|
||||
[self reportErrorWithMessage:@"Invalid server URL"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSLog(@"[VoiceChatWebSocketClient] Connecting: %@", url.absoluteString);
|
||||
|
||||
NSURLSessionConfiguration *config =
|
||||
[NSURLSessionConfiguration defaultSessionConfiguration];
|
||||
config.timeoutIntervalForRequest = 30;
|
||||
config.timeoutIntervalForResource = 300;
|
||||
|
||||
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
||||
delegate:self
|
||||
delegateQueue:nil];
|
||||
|
||||
self.webSocketTask = [self.urlSession webSocketTaskWithURL:url];
|
||||
[self.webSocketTask resume];
|
||||
[self receiveMessage];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
BOOL shouldNotify = self.webSocketTask != nil;
|
||||
if (shouldNotify) {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Disconnect requested");
|
||||
}
|
||||
[self disconnectInternal];
|
||||
if (shouldNotify) {
|
||||
[self notifyDisconnect:nil];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)startSessionWithLanguage:(nullable NSString *)language
|
||||
voiceId:(nullable NSString *)voiceId {
|
||||
NSMutableDictionary *message = [NSMutableDictionary dictionary];
|
||||
message[@"type"] = @"session_start";
|
||||
|
||||
NSMutableDictionary *config = [NSMutableDictionary dictionary];
|
||||
if (language.length > 0) {
|
||||
config[@"language"] = language;
|
||||
}
|
||||
if (voiceId.length > 0) {
|
||||
config[@"voice_id"] = voiceId;
|
||||
}
|
||||
if (config.count > 0) {
|
||||
message[@"config"] = config;
|
||||
}
|
||||
|
||||
[self sendJSON:message];
|
||||
}
|
||||
|
||||
- (void)enableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = YES;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = NO;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
|
||||
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
if (!self.audioSendingEnabled) {
|
||||
return;
|
||||
}
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
} else {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Sent audio frame: %lu bytes",
|
||||
(unsigned long)pcmFrame.length);
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)endAudio {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Sending audio_end");
|
||||
[self sendJSON:@{ @"type" : @"audio_end" }];
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Sending cancel");
|
||||
[self sendJSON:@{ @"type" : @"cancel" }];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (NSURL *)buildURLWithToken:(NSString *)token {
|
||||
if (self.serverURL.length == 0) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSURLComponents *components =
|
||||
[NSURLComponents componentsWithString:self.serverURL];
|
||||
if (!components) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
if (token.length > 0) {
|
||||
NSMutableArray<NSURLQueryItem *> *items =
|
||||
components.queryItems.mutableCopy ?: [NSMutableArray array];
|
||||
BOOL didReplace = NO;
|
||||
for (NSUInteger i = 0; i < items.count; i++) {
|
||||
NSURLQueryItem *item = items[i];
|
||||
if ([item.name isEqualToString:@"token"]) {
|
||||
items[i] = [NSURLQueryItem queryItemWithName:@"token" value:token];
|
||||
didReplace = YES;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!didReplace) {
|
||||
[items addObject:[NSURLQueryItem queryItemWithName:@"token"
|
||||
value:token]];
|
||||
}
|
||||
components.queryItems = items;
|
||||
}
|
||||
|
||||
return components.URL;
|
||||
}
|
||||
|
||||
- (void)sendJSON:(NSDictionary *)dict {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *jsonString =
|
||||
[[NSString alloc] initWithData:jsonData
|
||||
encoding:NSUTF8StringEncoding];
|
||||
if (!jsonString) {
|
||||
[self reportErrorWithMessage:@"Failed to encode JSON message"];
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)receiveMessage {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.webSocketTask receiveMessageWithCompletionHandler:^(
|
||||
NSURLSessionWebSocketMessage *_Nullable message,
|
||||
NSError *_Nullable error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
if (error.code != NSURLErrorCancelled && error.code != 57) {
|
||||
[strongSelf notifyDisconnect:error];
|
||||
[strongSelf disconnectInternal];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type == NSURLSessionWebSocketMessageTypeString) {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Received text: %@", message.string);
|
||||
[strongSelf handleTextMessage:message.string];
|
||||
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Received binary: %lu bytes",
|
||||
(unsigned long)message.data.length);
|
||||
[strongSelf handleBinaryMessage:message.data];
|
||||
}
|
||||
|
||||
[strongSelf receiveMessage];
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)handleTextMessage:(NSString *)text {
|
||||
if (text.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *type = json[@"type"];
|
||||
if (type.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ([type isEqualToString:@"session_started"]) {
|
||||
NSString *sessionId = json[@"session_id"] ?: @"";
|
||||
self.sessionId = sessionId;
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidStartSession:)]) {
|
||||
[self.delegate voiceChatClientDidStartSession:sessionId];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"transcript_interim"]) {
|
||||
NSString *transcript = json[@"text"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveInterimTranscript:transcript];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"transcript_final"]) {
|
||||
NSString *transcript = json[@"text"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveFinalTranscript:transcript];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"turn_start"]) {
|
||||
NSInteger turnIndex = [json[@"turn_index"] integerValue];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidStartTurn:)]) {
|
||||
[self.delegate voiceChatClientDidStartTurn:turnIndex];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"eager_eot"]) {
|
||||
NSString *transcript = json[@"transcript"] ?: @"";
|
||||
double confidence = [json[@"confidence"] doubleValue];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:
|
||||
confidence:)]) {
|
||||
[self.delegate
|
||||
voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:transcript
|
||||
confidence:confidence];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"turn_resumed"]) {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidResumeTurn)]) {
|
||||
[self.delegate voiceChatClientDidResumeTurn];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"llm_start"]) {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(voiceChatClientDidReceiveLLMStart)]) {
|
||||
[self.delegate voiceChatClientDidReceiveLLMStart];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"llm_token"]) {
|
||||
NSString *token = json[@"token"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(voiceChatClientDidReceiveLLMToken:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveLLMToken:token];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"complete"]) {
|
||||
NSString *transcript = json[@"transcript"] ?: @"";
|
||||
NSString *aiResponse = json[@"ai_response"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidCompleteWithTranscript:
|
||||
aiResponse:)]) {
|
||||
[self.delegate voiceChatClientDidCompleteWithTranscript:transcript
|
||||
aiResponse:aiResponse];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"error"]) {
|
||||
NSString *code = json[@"code"] ?: @"";
|
||||
NSString *message = json[@"message"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveErrorCode:message:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveErrorCode:code
|
||||
message:message];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
- (void)handleBinaryMessage:(NSData *)data {
|
||||
if (data.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(voiceChatClientDidReceiveAudioChunk:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveAudioChunk:data];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnectInternal {
|
||||
self.connected = NO;
|
||||
self.sessionId = nil;
|
||||
self.audioSendingEnabled = NO;
|
||||
|
||||
if (self.webSocketTask) {
|
||||
[self.webSocketTask
|
||||
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
|
||||
reason:nil];
|
||||
self.webSocketTask = nil;
|
||||
}
|
||||
|
||||
if (self.urlSession) {
|
||||
[self.urlSession invalidateAndCancel];
|
||||
self.urlSession = nil;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidFail:)]) {
|
||||
[self.delegate voiceChatClientDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error = [NSError errorWithDomain:kVoiceChatWebSocketClientErrorDomain
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : message ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)notifyDisconnect:(NSError *_Nullable)error {
|
||||
self.connected = NO;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidDisconnect:)]) {
|
||||
[self.delegate voiceChatClientDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - NSURLSessionWebSocketDelegate
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didOpenWithProtocol:(NSString *)protocol {
|
||||
self.connected = YES;
|
||||
NSLog(@"[VoiceChatWebSocketClient] Connected");
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidConnect)]) {
|
||||
[self.delegate voiceChatClientDidConnect];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
|
||||
reason:(NSData *)reason {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
NSLog(@"[VoiceChatWebSocketClient] Closed with code: %ld",
|
||||
(long)closeCode);
|
||||
[self notifyDisconnect:nil];
|
||||
[self disconnectInternal];
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user