// // Segmenter.m // keyBoard // // Created by Mac on 2026/1/15. // #import "Segmenter.h" @interface Segmenter () @property(nonatomic, strong) NSMutableString *buffer; @property(nonatomic, strong) NSMutableArray *readySegments; @end @implementation Segmenter - (instancetype)init { self = [super init]; if (self) { _buffer = [[NSMutableString alloc] init]; _readySegments = [[NSMutableArray alloc] init]; _maxCharacterThreshold = 30; } return self; } #pragma mark - Public Methods - (void)appendToken:(NSString *)token { if (!token || token.length == 0) { return; } [self.buffer appendString:token]; // 检查是否需要切分 [self checkAndSplit]; } - (NSArray *)popReadySegments { NSArray *segments = [self.readySegments copy]; [self.readySegments removeAllObjects]; return segments; } - (NSString *)flushRemainingSegment { NSString *remaining = [self.buffer copy]; [self.buffer setString:@""]; // 去除首尾空白 remaining = [remaining stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; return remaining.length > 0 ? remaining : nil; } - (void)reset { [self.buffer setString:@""]; [self.readySegments removeAllObjects]; } #pragma mark - Private Methods - (void)checkAndSplit { // 句子结束标点 NSCharacterSet *sentenceEnders = [NSCharacterSet characterSetWithCharactersInString:@"。!?\n"]; while (YES) { NSString *currentBuffer = self.buffer; // 查找第一个句子结束标点 NSRange range = [currentBuffer rangeOfCharacterFromSet:sentenceEnders]; if (range.location != NSNotFound) { // 找到结束标点,切分 NSUInteger endIndex = range.location + 1; NSString *segment = [currentBuffer substringToIndex:endIndex]; segment = [segment stringByTrimmingCharactersInSet: [NSCharacterSet whitespaceAndNewlineCharacterSet]]; if (segment.length > 0) { [self.readySegments addObject:segment]; } // 移除已切分的部分 [self.buffer deleteCharactersInRange:NSMakeRange(0, endIndex)]; } else if (currentBuffer.length >= self.maxCharacterThreshold) { // 未找到标点,但超过阈值,强制切分 // 尝试在空格或逗号处切分 NSRange breakRange = [self findBestBreakPoint:currentBuffer]; if (breakRange.location != NSNotFound) { NSString *segment = [currentBuffer substringToIndex:breakRange.location + 1]; segment = [segment stringByTrimmingCharactersInSet: [NSCharacterSet whitespaceAndNewlineCharacterSet]]; if (segment.length > 0) { [self.readySegments addObject:segment]; } [self.buffer deleteCharactersInRange:NSMakeRange(0, breakRange.location + 1)]; } else { // 无法找到合适的断点,直接切分 NSString *segment = [currentBuffer substringToIndex:self.maxCharacterThreshold]; segment = [segment stringByTrimmingCharactersInSet: [NSCharacterSet whitespaceAndNewlineCharacterSet]]; if (segment.length > 0) { [self.readySegments addObject:segment]; } [self.buffer deleteCharactersInRange:NSMakeRange(0, self.maxCharacterThreshold)]; } } else { // 未达到切分条件 break; } } } - (NSRange)findBestBreakPoint:(NSString *)text { // 优先在逗号、分号等处断开 NSCharacterSet *breakChars = [NSCharacterSet characterSetWithCharactersInString:@",,、;;:: "]; // 从后往前查找,尽可能多包含内容 for (NSInteger i = text.length - 1; i >= self.maxCharacterThreshold / 2; i--) { unichar c = [text characterAtIndex:i]; if ([breakChars characterIsMember:c]) { return NSMakeRange(i, 1); } } return NSMakeRange(NSNotFound, 0); } @end