删除多余文件

This commit is contained in:
2025-11-25 18:13:02 +08:00
parent 3b2b6ce741
commit e56a309825
2177 changed files with 293 additions and 889419 deletions

View File

@@ -9,304 +9,262 @@ import threading
import time
from pathlib import Path
from typing import Optional, Union, Dict, List
from Utils.LogManager import LogManager
class FlaskSubprocessManager:
"""Flask 子进程守护 + 看门狗 + 稳定增强"""
"""
超稳定版 Flask 子进程守护
- 单线程 watchdog唯一监控点
- 强制端口检测
- 端口不通 / 子进程退出 → 100% 重启
- 完整支持 exe + Python 模式
- 自动恢复设备列表快照
"""
_instance: Optional['FlaskSubprocessManager'] = None
_instance = None
_lock = threading.RLock()
def __new__(cls):
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._init_manager()
return cls._instance
cls._instance._initialize()
return cls._instance
def _init_manager(self):
# ========================= 初始化 =========================
def _initialize(self):
self.process: Optional[subprocess.Popen] = None
self.comm_port = 34566
self._watchdog_running = False
self._stop_event = threading.Event()
self._monitor_thread: Optional[threading.Thread] = None
# 看门狗参数
self._FAIL_THRESHOLD = int(os.getenv("FLASK_WD_FAIL_THRESHOLD", "5")) # 连续失败多少次重启
self._COOLDOWN_SEC = float(os.getenv("FLASK_WD_COOLDOWN", "10")) # 两次重启间隔
self._MAX_RESTARTS = int(os.getenv("FLASK_WD_MAX_RESTARTS", "5")) # 10分钟最多几次重启
self._RESTART_WINDOW = 600 # 10分钟
self._restart_times: List[float] = []
self._fail_count = 0
self._last_restart_time = 0.0
self._restart_cooldown = 5 # 每次重启最少间隔
self._restart_fail_threshold = 3 # 端口检查连续失败几次才重启
self._restart_fail_count = 0
self._watchdog_thread = None # ✅ 初始化
self._running = False # ✅ 初始化
self._restart_window = 600 # 10 分钟
self._restart_limit = 5 # 最多次数
self._restart_record: List[float] = []
# Windows 隐藏子窗口启动参数
self._si = None
if os.name == "nt":
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = 0
self._si = si
else:
self._si = None
self._kill_orphan_flask()
atexit.register(self.stop)
self._log("info", "FlaskSubprocessManager 初始化完成")
self._kill_orphans()
# ========= 日志工具 =========
def _log(self, level: str, msg: str, udid="flask"):
"""同时写 LogManager + 控制台"""
try:
if level == "info":
LogManager.info(msg, udid=udid)
elif level in ("warn", "warning"):
LogManager.warning(msg, udid=udid)
elif level == "error":
LogManager.error(msg, udid=udid)
else:
LogManager.info(msg, udid=udid)
except Exception:
pass
LogManager.info("FlaskSubprocessManager 初始化完成", udid="flask")
# ========================= 工具 =========================
def _log(self, level, msg):
print(msg)
if level == "info":
LogManager.info(msg, udid="flask")
elif level == "warn":
LogManager.warning(msg, udid="flask")
else:
LogManager.error(msg, udid="flask")
# ========= 杀残留 Flask =========
def _kill_orphan_flask(self):
# 杀死残留 python.exe 占用端口
def _kill_orphans(self):
try:
if os.name == "nt":
out = subprocess.check_output(["netstat", "-ano"], text=True, startupinfo=self._si)
out = subprocess.check_output(["netstat", "-ano"], text=True)
for line in out.splitlines():
if f"127.0.0.1:{self.comm_port}" in line and "LISTENING" in line:
pid = int(line.strip().split()[-1])
if pid != os.getpid():
subprocess.run(["taskkill", "/F", "/PID", str(pid)],
startupinfo=self._si, capture_output=True)
self._log("warn", f"[FlaskMgr] 杀死残留进程 PID={pid}")
else:
out = subprocess.check_output(["lsof", "-t", f"-iTCP:{self.comm_port}", "-sTCP:LISTEN"], text=True)
for pid in map(int, out.split()):
if pid != os.getpid():
os.kill(pid, 9)
self._log("warn", f"[FlaskMgr] 杀死残留进程 PID={pid}")
subprocess.run(
["taskkill", "/F", "/PID", str(pid)],
capture_output=True
)
self._log("warn", f"[FlaskMgr] 杀死残留 Flask 实例 PID={pid}")
except Exception:
pass
# ========= 启动 =========
def start(self):
with self._lock:
if self._is_alive():
self._log("warn", "[FlaskMgr] 子进程已在运行,无需重复启动")
return
env = os.environ.copy()
env["FLASK_COMM_PORT"] = str(self.comm_port)
exe_path = Path(sys.executable).resolve()
if exe_path.name.lower() in ("python.exe", "pythonw.exe"):
exe_path = Path(sys.argv[0]).resolve()
is_frozen = exe_path.suffix.lower() == ".exe" and exe_path.exists()
if is_frozen:
cmd = [str(exe_path), "--role=flask"]
cwd = str(exe_path.parent)
else:
project_root = Path(__file__).resolve().parents[1]
candidates = [
project_root / "Module" / "Main.py",
project_root / "Main.py",
]
main_path = next((p for p in candidates if p.is_file()), None)
if main_path:
cmd = [sys.executable, "-u", str(main_path), "--role=flask"]
else:
cmd = [sys.executable, "-u", "-m", "Module.Main", "--role=flask"]
cwd = str(project_root)
self._log("info", f"[FlaskMgr] 启动命令: {cmd}, cwd={cwd}")
self.process = subprocess.Popen(
cmd,
stdin=subprocess.DEVNULL,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
encoding="utf-8",
errors="replace",
bufsize=1,
env=env,
cwd=cwd,
start_new_session=True,
startupinfo=self._si
)
threading.Thread(target=self._flush_stdout, daemon=True).start()
self._log("info", f"[FlaskMgr] Flask 子进程已启动PID={self.process.pid}")
# 启动看门狗线程
self._watchdog_thread = threading.Thread(target=self.watchdog_loop, daemon=True)
self._watchdog_thread.start()
LogManager.info("[FlaskWD] 看门狗线程已启动", udid="flask")
if not self._wait_port_open(timeout=10):
self._log("error", "[FlaskMgr] 启动失败,端口未监听")
self.stop()
raise RuntimeError("Flask 启动后 10s 内未监听端口")
if not self._monitor_thread or not self._monitor_thread.is_alive():
self._monitor_thread = threading.Thread(target=self._monitor, daemon=True)
self._monitor_thread.start()
self._log("info", "[FlaskWD] 守护线程已启动")
# ========= stdout捕获 =========
def _flush_stdout(self):
if not self.process or not self.process.stdout:
return
for line in iter(self.process.stdout.readline, ""):
if line:
self._log("info", line.rstrip())
self.process.stdout.close()
# ========= 发送 =========
def send(self, data: Union[str, Dict, List]) -> bool:
if isinstance(data, (dict, list)):
data = json.dumps(data, ensure_ascii=False)
try:
with socket.create_connection(("127.0.0.1", self.comm_port), timeout=3.0) as s:
s.sendall((data + "\n").encode("utf-8"))
self._log("info", f"[FlaskMgr] 数据已发送到端口 {self.comm_port}")
return True
except Exception as e:
self._log("error", f"[FlaskMgr] 发送失败: {e}")
return False
# ========= 停止 =========
def stop(self, *, stop_watchdog: bool = True):
with self._lock:
# 1) 先停子进程
if self.process:
try:
self.process.terminate()
except Exception:
pass
try:
self.process.wait(timeout=3)
except Exception:
pass
if self.process and self.process.poll() is None:
try:
self.process.kill()
except Exception:
pass
self.process = None
# 2) 再考虑是否停 watchdog
if stop_watchdog and self._watchdog_thread and self._watchdog_thread.is_alive():
# 关键:不要 join 自己
if threading.current_thread() is not self._watchdog_thread:
self._running = False
try:
self._watchdog_thread.join(timeout=2.0)
except Exception:
pass
self._watchdog_thread = None
else:
# 如果是 watchdog 自己触发的 stop绝不 join 自己
# 也不要把句柄清空,保持线程继续执行后面的重启流程
self._running = True
# ========= 看门狗 =========
def _monitor(self):
self._log("info", "[FlaskWD] 看门狗线程启动")
verbose = os.getenv("FLASK_WD_VERBOSE", "0") == "1"
last_ok = 0.0
while not self._stop_event.wait(2.0):
alive = self._port_alive()
if alive:
self._fail_count = 0
if verbose and (time.time() - last_ok) >= 60:
self._log("info", f"[FlaskWD] OK {self.comm_port} alive")
last_ok = time.time()
continue
self._fail_count += 1
self._log("warn", f"[FlaskWD] 探测失败 {self._fail_count}/{self._FAIL_THRESHOLD}")
if self._fail_count >= self._FAIL_THRESHOLD:
now = time.time()
if now - self._last_restart_time < self._COOLDOWN_SEC:
self._log("warn", "[FlaskWD] 冷却中,跳过重启")
continue
# 限速10分钟内超过MAX_RESTARTS则不再重启
self._restart_times = [t for t in self._restart_times if now - t < self._RESTART_WINDOW]
if len(self._restart_times) >= self._MAX_RESTARTS:
self._log("error", f"[FlaskWD] 10分钟内重启次数过多({len(self._restart_times)}次),暂停看门狗")
break
self._restart_times.append(now)
self._log("warn", "[FlaskWD] 端口不通,准备重启 Flask")
with self._lock:
try:
self.stop()
time.sleep(1)
self.start()
self._fail_count = 0
self._last_restart_time = now
self._log("info", "[FlaskWD] Flask 已成功重启")
from Module.DeviceInfo import DeviceInfo
info = DeviceInfo()
with info._lock:
for m in info._models.values():
try:
self.send(m.toDict())
except Exception:
pass
except Exception as e:
self._log("error", f"[FlaskWD] 自动重启失败: {e}")
time.sleep(3)
# ========= 辅助 =========
def _port_alive(self) -> bool:
def ping(p):
def _port_alive(self):
"""检测 Flask 与 Quart 的两个端口是否活着"""
def _check(p):
try:
with socket.create_connection(("127.0.0.1", p), timeout=0.6):
with socket.create_connection(("127.0.0.1", p), timeout=0.4):
return True
except Exception:
return False
p1 = self.comm_port
p2 = self.comm_port + 1
return ping(p1) or ping(p2)
return _check(self.comm_port) or _check(self.comm_port + 1)
def _wait_port_open(self, timeout: float) -> bool:
start = time.time()
while time.time() - start < timeout:
if self._port_alive():
return True
time.sleep(0.2)
return False
# ========================= 启动 =========================
# ========================= 启动 =========================
def start(self):
with self._lock:
# 已经有一个在跑了就别重复起
if self.process and self.process.poll() is None:
self._log("warn", "[FlaskMgr] Flask 已在运行,跳过")
return
def _is_alive(self) -> bool:
return self.process and self.process.poll() is None and self._port_alive()
# 设定环境变量,给子进程用
env = os.environ.copy()
env["FLASK_COMM_PORT"] = str(self.comm_port)
# ✅ 正确判断是否是 Nuitka/打包后的 exe
# - 被 Nuitka 打包sys.frozen 会存在/为 True
# - 直接用 python 跑 .pysys.frozen 不存在
is_frozen = bool(getattr(sys, "frozen", False))
if is_frozen:
# 打包后的 exe 模式:直接调用自己
exe = Path(sys.executable).resolve()
cmd = [str(exe), "--role=flask"]
cwd = str(exe.parent)
else:
# 开发模式:用 python 去跑 Module/Main.py --role=flask
project_root = Path(__file__).resolve().parents[1]
main_py = project_root / "Module" / "Main.py"
cmd = [sys.executable, "-u", str(main_py), "--role=flask"]
cwd = str(project_root)
self._log("info", f"[FlaskMgr] 启动 Flask: {cmd}")
self.process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
env=env,
cwd=cwd,
bufsize=1,
startupinfo=self._si,
start_new_session=True,
)
# 异步吃子进程 stdout顺便打日志
threading.Thread(target=self._read_stdout, daemon=True).start()
# 看门狗只需要起一次
if not self._watchdog_running:
threading.Thread(target=self._watchdog_loop, daemon=True).start()
self._watchdog_running = True
self._log("info", f"[FlaskMgr] Flask 子进程已启动 PID={self.process.pid}")
def _read_stdout(self):
if not self.process or not self.process.stdout:
return
for line in iter(self.process.stdout.readline, ""):
if line:
self._log("info", f"[Flask] {line.rstrip()}")
# ========================= 停止 =========================
def stop(self):
with self._lock:
if not self.process:
return
try:
self.process.terminate()
except Exception:
pass
try:
self.process.wait(timeout=3)
except Exception:
pass
if self.process.poll() is None:
try:
self.process.kill()
except Exception:
pass
self._log("warn", "[FlaskMgr] 已停止 Flask 子进程")
self.process = None
# ========================= 看门狗 =========================
def _watchdog_loop(self):
self._log("info", "[FlaskWD] 看门狗已启动")
while not self._stop_event.is_set():
time.sleep(1.2)
# 1) 子进程退出
if not self.process or self.process.poll() is not None:
self._log("error", "[FlaskWD] Flask 子进程退出,准备重启")
self._restart()
continue
# 2) 端口不通
if not self._port_alive():
self._restart_fail_count += 1
self._log("warn", f"[FlaskWD] 端口检测失败 {self._restart_fail_count}/"
f"{self._restart_fail_threshold}")
if self._restart_fail_count >= self._restart_fail_threshold:
self._restart()
continue
# 3) 端口正常
self._restart_fail_count = 0
# ========================= 重启核心逻辑 =========================
def _restart(self):
now = time.time()
# 10 分钟限频
self._restart_record = [t for t in self._restart_record if now - t < self._restart_window]
if len(self._restart_record) >= self._restart_limit:
self._log("error", "[FlaskWD] 10 分钟内重启次数太多,暂停监控")
return
# 冷却
if self._restart_record and now - self._restart_record[-1] < self._restart_cooldown:
self._log("warn", "[FlaskWD] 冷却中,暂不重启")
return
self._log("warn", "[FlaskWD] >>> 重启 Flask 子进程 <<<")
# 执行重启
try:
self.stop()
time.sleep(1)
self.start()
self._restart_record.append(now)
self._restart_fail_count = 0
except Exception as e:
self._log("error", f"[FlaskWD] 重启失败: {e}")
# 重启后推送设备快照
self._push_snapshot()
# ========================= 推送设备快照 =========================
def _push_snapshot(self):
"""Flask 重启后重新同步 DeviceInfo 内容"""
try:
from Module.DeviceInfo import DeviceInfo
info = DeviceInfo()
with info._lock:
for m in info._models.values():
self.send(m.toDict())
except Exception:
pass
# ========================= 发送数据 =========================
def send(self, data: Union[str, Dict]):
if isinstance(data, dict):
data = json.dumps(data, ensure_ascii=False)
try:
with socket.create_connection(("127.0.0.1", self.comm_port), timeout=2) as s:
s.sendall((data + "\n").encode())
return True
except Exception:
return False
@classmethod
def get_instance(cls) -> 'FlaskSubprocessManager':
return cls()
def watchdog_loop(self):
while True:
if self.process is not None:
code = self.process.poll()
if code is not None:
LogManager.error(
text=f"[FlaskWD] Flask 子进程退出exit code={code}",
udid="flask",
)
# 可以顺便触发一下 _stop_event看你愿不愿意
# self._stop_event.set()
break
time.sleep(1)
def get_instance(cls):
return cls()