20250904-初步功能已完成
This commit is contained in:
319
Utils/AiUtils.py
319
Utils/AiUtils.py
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
@@ -9,6 +8,7 @@ import unicodedata
|
||||
import wda
|
||||
from Utils.LogManager import LogManager
|
||||
import xml.etree.ElementTree as ET
|
||||
import re, html
|
||||
from lxml import etree
|
||||
from wda import Client
|
||||
|
||||
@@ -298,108 +298,313 @@ class AiUtils(object):
|
||||
print(f"btn:{btn}")
|
||||
return cls.findNumber(btn.label)
|
||||
|
||||
# @classmethod
|
||||
# def extract_messages_from_xml(cls, xml: str):
|
||||
# """
|
||||
# 仅返回当前屏幕中“可见的”聊天内容(含时间分隔)
|
||||
# """
|
||||
# from lxml import etree
|
||||
# root = etree.fromstring(xml.encode("utf-8"))
|
||||
# items = []
|
||||
#
|
||||
# # 屏幕宽度
|
||||
# app = root.xpath('/XCUIElementTypeApplication')
|
||||
# screen_w = cls.parse_float(app[0], 'width', 414.0) if app else 414.0
|
||||
#
|
||||
# # 找 Table 的可见范围
|
||||
# table = root.xpath('//XCUIElementTypeTable')
|
||||
# if table:
|
||||
# table = table[0]
|
||||
# table_top = cls.parse_float(table, 'y', 0.0)
|
||||
# table_h = cls.parse_float(table, 'height', 0.0)
|
||||
# table_bottom = table_top + table_h
|
||||
# else:
|
||||
# table_top, table_bottom = 0.0, cls.parse_float(app[0], 'height', 736.0) if app else 736.0
|
||||
#
|
||||
# def in_view(el) -> bool:
|
||||
# """元素在聊天区内并且可见"""
|
||||
# if el.get('visible') != 'true':
|
||||
# return False
|
||||
# y = cls.parse_float(el, 'y', -1e9)
|
||||
# h = cls.parse_float(el, 'height', 0.0)
|
||||
# by = y + h
|
||||
# return not (by <= table_top or y >= table_bottom)
|
||||
#
|
||||
# # 时间分隔
|
||||
# for t in root.xpath('//XCUIElementTypeStaticText[contains(@traits, "Header")]'):
|
||||
# if not in_view(t):
|
||||
# continue
|
||||
# txt = (t.get('label') or t.get('name') or t.get('value') or '').strip()
|
||||
# if txt:
|
||||
# items.append({'type': 'time', 'text': txt, 'y': cls.parse_float(t, 'y')})
|
||||
#
|
||||
# # 消息气泡
|
||||
# EXCLUDES = {'Heart', 'Lol', 'ThumbsUp', '分享发布内容', '视频贴纸标签页', '双击发送表情'}
|
||||
#
|
||||
# # —— 新增:系统横幅/提示卡片过滤(只文本判断,最小改动)——
|
||||
# SYSTEM_BANNER_PATTERNS = [
|
||||
# r"回复时接收通知", r"开启私信通知", r"开启通知",
|
||||
# r"Turn on (DM|message|direct message)?\s*notifications",
|
||||
# r"Enable notifications",
|
||||
# r"Get notified when .* replies",
|
||||
# ]
|
||||
# SYSTEM_BANNER_REGEX = re.compile("|".join(SYSTEM_BANNER_PATTERNS), re.IGNORECASE)
|
||||
#
|
||||
# msg_nodes = table.xpath(
|
||||
# './/XCUIElementTypeCell[@visible="true"]'
|
||||
# '//XCUIElementTypeOther[@visible="true" and (@name or @label) and not(ancestor::XCUIElementTypeCollectionView)]'
|
||||
# ) if table is not None else []
|
||||
#
|
||||
# for o in msg_nodes:
|
||||
# # 这里补上 value,避免少数节点只在 value 上有文本时漏读
|
||||
# text = (o.get('label') or o.get('name') or o.get('value') or '').strip()
|
||||
# if not text or text in EXCLUDES:
|
||||
# continue
|
||||
# # 命中 TikTok 自带的“开启通知/回复时接收通知”类提示 → 直接剔除
|
||||
# if SYSTEM_BANNER_REGEX.search(text):
|
||||
# continue
|
||||
# if not in_view(o):
|
||||
# continue
|
||||
#
|
||||
# # 找所在 Cell
|
||||
# cell = o.getparent()
|
||||
# while cell is not None and cell.get('type') != 'XCUIElementTypeCell':
|
||||
# cell = cell.getparent()
|
||||
#
|
||||
# x = cls.parse_float(o, 'x')
|
||||
# y = cls.parse_float(o, 'y')
|
||||
# w = cls.parse_float(o, 'width')
|
||||
# right_edge = x + w
|
||||
#
|
||||
# direction = None
|
||||
# # 头像位置判定
|
||||
# if cell is not None:
|
||||
# avatar_btns = cell.xpath(
|
||||
# './/XCUIElementTypeButton[@visible="true" and (@name="图片头像" or @label="图片头像")]')
|
||||
# if avatar_btns:
|
||||
# ax = cls.parse_float(avatar_btns[0], 'x')
|
||||
# direction = 'in' if ax < (screen_w / 2) else 'out'
|
||||
# # 右对齐兜底
|
||||
# if direction is None:
|
||||
# direction = 'out' if right_edge > (screen_w - 20) else 'in'
|
||||
#
|
||||
# items.append({'type': 'msg', 'dir': direction, 'text': text, 'y': y})
|
||||
#
|
||||
# # 排序 & 清理
|
||||
# items.sort(key=lambda i: i['y'])
|
||||
# for it in items:
|
||||
# it.pop('y', None)
|
||||
# return items
|
||||
#
|
||||
# @classmethod
|
||||
# def parse_float(cls, el, attr, default=0.0):
|
||||
# try:
|
||||
# return float(el.get(attr, default))
|
||||
# except Exception:
|
||||
# return default
|
||||
|
||||
@classmethod
|
||||
def extract_messages_from_xml(cls, xml: str):
|
||||
"""
|
||||
仅返回当前屏幕中“可见的”聊天内容(含时间分隔)
|
||||
解析 TikTok 聊天 XML,返回当前屏幕可见的消息与时间分隔:
|
||||
[{"type":"time","text":"..."}, {"type":"msg","dir":"in|out","text":"..."}]
|
||||
兼容 Table / CollectionView / ScrollView;过滤系统提示/底部工具栏;可见性使用“重叠可视+容差”。
|
||||
"""
|
||||
from lxml import etree
|
||||
root = etree.fromstring(xml.encode("utf-8"))
|
||||
items = []
|
||||
if not isinstance(xml, str) or not xml.strip():
|
||||
return []
|
||||
try:
|
||||
root = etree.fromstring(xml.encode("utf-8"))
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
# 屏幕宽度
|
||||
# ---------- 小工具 ----------
|
||||
def get_text(el):
|
||||
s = (el.get('label') or el.get('name') or el.get('value') or '') or ''
|
||||
return html.unescape(s.strip())
|
||||
|
||||
def is_visible(el):
|
||||
"""无 visible 属性按可见处理;有且为 'false' 才视为不可见。"""
|
||||
v = el.get('visible')
|
||||
return (v is None) or (v.lower() == 'true')
|
||||
|
||||
# ---------- 屏幕尺寸 ----------
|
||||
app = root.xpath('/XCUIElementTypeApplication')
|
||||
screen_w = cls.parse_float(app[0], 'width', 414.0) if app else 414.0
|
||||
screen_h = cls.parse_float(app[0], 'height', 736.0) if app else 736.0
|
||||
|
||||
# 找 Table 的可见范围
|
||||
table = root.xpath('//XCUIElementTypeTable')
|
||||
if table:
|
||||
table = table[0]
|
||||
table_top = cls.parse_float(table, 'y', 0.0)
|
||||
table_h = cls.parse_float(table, 'height', 0.0)
|
||||
table_bottom = table_top + table_h
|
||||
# ---------- 主容器探测(评分选择最像聊天区的容器) ----------
|
||||
def pick_container():
|
||||
cands = []
|
||||
for xp, ctype in (
|
||||
('//XCUIElementTypeTable', 'table'),
|
||||
('//XCUIElementTypeCollectionView', 'collection'),
|
||||
('//XCUIElementTypeScrollView', 'scroll'),
|
||||
):
|
||||
nodes = [n for n in root.xpath(xp) if is_visible(n)]
|
||||
for n in nodes:
|
||||
y = cls.parse_float(n, 'y', 0.0)
|
||||
h = cls.parse_float(n, 'height', screen_h)
|
||||
# Cell 数越多越像聊天列表;越靠中间越像
|
||||
cells = n.xpath('.//XCUIElementTypeCell')
|
||||
score = len(cells) * 10 - abs((y + h / 2) - screen_h / 2)
|
||||
cands.append((score, n, ctype))
|
||||
if cands:
|
||||
cands.sort(key=lambda t: t[0], reverse=True)
|
||||
return cands[0][1], cands[0][2]
|
||||
return None, None
|
||||
|
||||
container, container_type = pick_container()
|
||||
|
||||
# ---------- 可视区(area_top, area_bot) ----------
|
||||
if container is not None:
|
||||
area_top = cls.parse_float(container, 'y', 0.0)
|
||||
area_h = cls.parse_float(container, 'height', screen_h)
|
||||
area_bot = area_top + area_h
|
||||
else:
|
||||
table_top, table_bottom = 0.0, cls.parse_float(app[0], 'height', 736.0) if app else 736.0
|
||||
# 顶栏底缘作为上边界(选最靠上的宽>200的块)
|
||||
blocks = [n for n in root.xpath('//XCUIElementTypeOther[@y and @height and @width>="200"]') if
|
||||
is_visible(n)]
|
||||
area_top = 0.0
|
||||
if blocks:
|
||||
blocks.sort(key=lambda n: cls.parse_float(n, 'y', 0.0))
|
||||
b = blocks[0]
|
||||
area_top = cls.parse_float(b, 'y', 0.0) + cls.parse_float(b, 'height', 0.0)
|
||||
# 输入框 TextView 顶边作为下边界
|
||||
tvs = [n for n in root.xpath('//XCUIElementTypeTextView') if is_visible(n)]
|
||||
if tvs:
|
||||
tvs.sort(key=lambda n: cls.parse_float(n, 'y', 0.0))
|
||||
area_bot = cls.parse_float(tvs[-1], 'y', screen_h)
|
||||
else:
|
||||
area_bot = screen_h
|
||||
if area_bot - area_top < 100:
|
||||
area_top, area_bot = 0.0, screen_h
|
||||
|
||||
def in_view(el) -> bool:
|
||||
"""元素在聊天区内并且可见"""
|
||||
if el.get('visible') != 'true':
|
||||
if not is_visible(el):
|
||||
return False
|
||||
y = cls.parse_float(el, 'y', -1e9)
|
||||
h = cls.parse_float(el, 'height', 0.0)
|
||||
by = y + h
|
||||
return not (by <= table_top or y >= table_bottom)
|
||||
tol = 8.0 # 容差,避免边缘误判
|
||||
return not (by <= area_top + tol or y >= area_bot - tol)
|
||||
|
||||
# 时间分隔
|
||||
# ---------- 时间分隔(Header) ----------
|
||||
items = []
|
||||
for t in root.xpath('//XCUIElementTypeStaticText[contains(@traits, "Header")]'):
|
||||
if not in_view(t):
|
||||
continue
|
||||
txt = (t.get('label') or t.get('name') or t.get('value') or '').strip()
|
||||
txt = get_text(t)
|
||||
if txt:
|
||||
items.append({'type': 'time', 'text': txt, 'y': cls.parse_float(t, 'y')})
|
||||
items.append({'type': 'time', 'text': txt, 'y': cls.parse_float(t, 'y', 0.0)})
|
||||
|
||||
# 消息气泡
|
||||
EXCLUDES = {'Heart', 'Lol', 'ThumbsUp', '分享发布内容', '视频贴纸标签页', '双击发送表情'}
|
||||
|
||||
# —— 新增:系统横幅/提示卡片过滤(只文本判断,最小改动)——
|
||||
SYSTEM_BANNER_PATTERNS = [
|
||||
r"回复时接收通知", r"开启私信通知", r"开启通知",
|
||||
# ---------- 系统提示/横幅过滤 ----------
|
||||
EXCLUDES_LITERAL = {
|
||||
'Heart', 'Lol', 'ThumbsUp',
|
||||
'分享发布内容', '视频贴纸标签页', '双击发送表情', '贴纸',
|
||||
}
|
||||
SYSTEM_PATTERNS = [
|
||||
r"回复时接收通知", r"开启(私信)?通知", r"开启通知",
|
||||
r"你打开了这个与 .* 的聊天。.*隐私",
|
||||
r"在此用户接受你的消息请求之前,你最多只能发送 ?\d+ 条消息。?",
|
||||
r"聊天消息条数已达上限,你将无法向该用户发送消息。?",
|
||||
r"未发送$",
|
||||
r"Turn on (DM|message|direct message)?\s*notifications",
|
||||
r"Enable notifications",
|
||||
r"Get notified when .* replies",
|
||||
r"You opened this chat .* privacy",
|
||||
r"Only \d+ message can be sent .* accepts .* request",
|
||||
]
|
||||
SYSTEM_BANNER_REGEX = re.compile("|".join(SYSTEM_BANNER_PATTERNS), re.IGNORECASE)
|
||||
SYSTEM_RE = re.compile("|".join(SYSTEM_PATTERNS), re.IGNORECASE)
|
||||
|
||||
msg_nodes = table.xpath(
|
||||
'.//XCUIElementTypeCell[@visible="true"]'
|
||||
'//XCUIElementTypeOther[@visible="true" and (@name or @label) and not(ancestor::XCUIElementTypeCollectionView)]'
|
||||
) if table is not None else []
|
||||
# 排除底部贴纸/GIF/分享栏(通常是位于底部、较矮的一排 CollectionView)
|
||||
def is_toolbar_like(o) -> bool:
|
||||
txt = get_text(o)
|
||||
if txt in EXCLUDES_LITERAL:
|
||||
return True
|
||||
y = cls.parse_float(o, 'y', 0.0)
|
||||
h = cls.parse_float(o, 'height', 0.0)
|
||||
near_bottom = (area_bot - (y + h)) < 48
|
||||
is_short = h <= 40
|
||||
return near_bottom and is_short
|
||||
|
||||
# ---------- 收集消息候选 ----------
|
||||
msg_nodes = []
|
||||
if container is not None:
|
||||
# 容器内优先找 Cell 下的文本节点(Other/StaticText/TextView)
|
||||
cand = container.xpath(
|
||||
'.//XCUIElementTypeCell//*[self::XCUIElementTypeOther or self::XCUIElementTypeStaticText or self::XCUIElementTypeTextView]'
|
||||
'[@y and (@name or @label or @value)]'
|
||||
)
|
||||
for o in cand:
|
||||
if not in_view(o):
|
||||
continue
|
||||
if is_toolbar_like(o):
|
||||
continue
|
||||
txt = get_text(o)
|
||||
if not txt or SYSTEM_RE.search(txt):
|
||||
continue
|
||||
msg_nodes.append(o)
|
||||
else:
|
||||
# 全局兜底:排除直接挂在 CollectionView(底部工具栏)下的节点
|
||||
cand = root.xpath(
|
||||
'//XCUIElementTypeOther[@y and (@name or @label or @value)]'
|
||||
' | //XCUIElementTypeStaticText[@y and (@name or @label or @value)]'
|
||||
' | //XCUIElementTypeTextView[@y and (@name or @label or @value)]'
|
||||
)
|
||||
for o in cand:
|
||||
p = o.getparent()
|
||||
if p is not None and p.get('type') == 'XCUIElementTypeCollectionView':
|
||||
continue
|
||||
if not in_view(o) or is_toolbar_like(o):
|
||||
continue
|
||||
txt = get_text(o)
|
||||
if not txt or SYSTEM_RE.search(txt):
|
||||
continue
|
||||
msg_nodes.append(o)
|
||||
|
||||
# ---------- 方向判定 & 组装 ----------
|
||||
for o in msg_nodes:
|
||||
# 这里补上 value,避免少数节点只在 value 上有文本时漏读
|
||||
text = (o.get('label') or o.get('name') or o.get('value') or '').strip()
|
||||
if not text or text in EXCLUDES:
|
||||
continue
|
||||
# 命中 TikTok 自带的“开启通知/回复时接收通知”类提示 → 直接剔除
|
||||
if SYSTEM_BANNER_REGEX.search(text):
|
||||
continue
|
||||
if not in_view(o):
|
||||
txt = get_text(o)
|
||||
if not txt or txt in EXCLUDES_LITERAL:
|
||||
continue
|
||||
|
||||
# 找所在 Cell
|
||||
# 找所在 Cell(用于查头像)
|
||||
cell = o.getparent()
|
||||
while cell is not None and cell.get('type') != 'XCUIElementTypeCell':
|
||||
cell = cell.getparent()
|
||||
|
||||
x = cls.parse_float(o, 'x')
|
||||
y = cls.parse_float(o, 'y')
|
||||
w = cls.parse_float(o, 'width')
|
||||
x = cls.parse_float(o, 'x', 0.0)
|
||||
y = cls.parse_float(o, 'y', 0.0)
|
||||
w = cls.parse_float(o, 'width', 0.0)
|
||||
right_edge = x + w
|
||||
|
||||
direction = None
|
||||
# 头像位置判定
|
||||
if cell is not None:
|
||||
avatar_btns = cell.xpath(
|
||||
'.//XCUIElementTypeButton[@visible="true" and (@name="图片头像" or @label="图片头像")]')
|
||||
if avatar_btns:
|
||||
ax = cls.parse_float(avatar_btns[0], 'x')
|
||||
avatars = [a for a in cell.xpath(
|
||||
'.//XCUIElementTypeButton[@visible="true" and (@name="图片头像" or @label="图片头像")]'
|
||||
) if is_visible(a)]
|
||||
if avatars:
|
||||
ax = cls.parse_float(avatars[0], 'x', 0.0)
|
||||
direction = 'in' if ax < (screen_w / 2) else 'out'
|
||||
# 右对齐兜底
|
||||
if direction is None:
|
||||
direction = 'out' if right_edge > (screen_w - 20) else 'in'
|
||||
direction = 'out' if right_edge > (screen_w * 0.75) else 'in'
|
||||
|
||||
items.append({'type': 'msg', 'dir': direction, 'text': text, 'y': y})
|
||||
items.append({'type': 'msg', 'dir': direction, 'text': txt, 'y': y})
|
||||
|
||||
# 排序 & 清理
|
||||
items.sort(key=lambda i: i['y'])
|
||||
for it in items:
|
||||
it.pop('y', None)
|
||||
# ---------- 排序 & 收尾 ----------
|
||||
if items:
|
||||
items.sort(key=lambda i: i.get('y', 0.0))
|
||||
for it in items:
|
||||
it.pop('y', None)
|
||||
return items
|
||||
|
||||
@classmethod
|
||||
def parse_float(cls, el, attr, default=0.0):
|
||||
try:
|
||||
return float(el.get(attr, default))
|
||||
v = el.get(attr)
|
||||
if v is None:
|
||||
return default
|
||||
return float(v)
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
@@ -486,7 +691,6 @@ class AiUtils(object):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@classmethod
|
||||
def _read_json_list(cls, file_path: Path) -> list:
|
||||
"""读取为 list;读取失败或不是 list 则返回空数组"""
|
||||
@@ -609,4 +813,3 @@ class AiUtils(object):
|
||||
except Exception as e:
|
||||
LogManager.error(f"[delete_anchors_by_ids] 写入失败: {e}")
|
||||
return deleted
|
||||
|
||||
|
||||
Reference in New Issue
Block a user