montana/Русский/Логистика/monitor_scrapers.py

255 lines
7.7 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Continuous monitor for mt_detail_sweep.py and mt_green_fleet.py
Runs for 12 hours, checks every 5 minutes, auto-restarts dead processes.
NOTE: On this Windows+Git-Bash system:
bash /tmp = C:\\Users\\0\\AppData\\Local\\Temp
Python must use Windows paths for os.path.getmtime etc.
"""
import subprocess
import time
import os
import sys
from datetime import datetime
WORK_DIR = r"c:\project\Cursor\Логистика"
# Bash /tmp maps to Windows AppData\Local\Temp
WINDOWS_TEMP = r"C:\Users\0\AppData\Local\Temp"
LOG_DETAIL_BASH = "/tmp/mt_detail_live.txt"
LOG_GREEN_BASH = "/tmp/green_fleet_live.txt"
LOG_DETAIL_WIN = os.path.join(WINDOWS_TEMP, "mt_detail_live.txt")
LOG_GREEN_WIN = os.path.join(WINDOWS_TEMP, "green_fleet_live.txt")
START_TIME = time.time()
DURATION_HOURS = 12
CHECK_INTERVAL = 300 # 5 minutes between checks
STALE_THRESHOLD = 600 # 10 min stale = process considered dead
RESTART_WAIT = 120 # seconds to wait after restart before next check
check_count = 0
def ts():
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def log(msg):
print(f"[{ts()}] {msg}", flush=True)
def log_age_seconds(win_path):
"""Return seconds since log was last modified. Returns -1 if file missing."""
try:
mtime = os.path.getmtime(win_path)
return time.time() - mtime
except OSError:
return -1
def log_tail(win_path, n=3):
"""Return last N non-empty lines of log file."""
try:
with open(win_path, "r", encoding="utf-8", errors="replace") as f:
lines = f.readlines()
result = [l.rstrip() for l in lines if l.strip()]
return result[-n:] if result else ["<empty log>"]
except OSError:
return ["<log not found>"]
def find_pids(script_name):
"""Return list of PIDs running the given script (Windows python.exe)."""
try:
result = subprocess.run(
["wmic", "process", "where", "name='python.exe'",
"get", "processid,commandline"],
capture_output=True, text=True, timeout=15,
encoding="utf-8", errors="replace"
)
pids = []
for line in result.stdout.splitlines():
if script_name in line:
parts = line.strip().split()
for p in parts:
if p.isdigit():
pids.append(int(p))
return pids
except Exception as e:
log(f" [WARN] find_pids({script_name}) error: {e}")
return []
def kill_pid(pid):
"""Kill a process by PID using PowerShell."""
try:
subprocess.run(
["powershell", "-Command", f"Stop-Process -Id {pid} -Force -ErrorAction SilentlyContinue"],
capture_output=True, timeout=10
)
log(f" Killed PID {pid}")
except Exception as e:
log(f" [WARN] kill_pid({pid}): {e}")
def start_scraper(script, log_bash_path):
"""Start a scraper in background using bash, appending to log file."""
log(f" Launching: {script} -> {log_bash_path}")
try:
cmd = f'cd "{WORK_DIR}" && python -u {script} >> "{log_bash_path}" 2>&1'
proc = subprocess.Popen(
["bash", "-c", cmd],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
cwd=WORK_DIR,
)
log(f" Launched bash wrapper PID={proc.pid} for {script}")
return proc
except Exception as e:
log(f" [ERROR] Failed to start {script}: {e}")
return None
def check_and_maybe_restart(label, script, log_bash, log_win):
"""
Check if scraper is healthy. Restart if:
- No python.exe process found for it, OR
- Log file missing, OR
- Log not updated in STALE_THRESHOLD seconds
Returns True if restarted.
"""
pids = find_pids(script)
age = log_age_seconds(log_win)
age_min = age / 60 if age >= 0 else -1
if pids:
proc_status = f"RUNNING PIDs={pids}"
else:
proc_status = "NO PROCESS"
if age >= 0:
log_status = f"log_age={age_min:.1f}m"
else:
log_status = "log=MISSING"
log(f" [{label}] {proc_status} | {log_status}")
# Decide if restart needed
need_restart = False
reason = ""
if not pids:
need_restart = True
reason = "no process found"
elif age < 0:
# Process running but log missing — might still be starting (first 90s)
# Only flag after 2 minutes
need_restart = False
log(f" [{label}] Process alive but log missing — may be starting up, skipping")
elif age > STALE_THRESHOLD:
need_restart = True
reason = f"log stale {age_min:.1f}m > {STALE_THRESHOLD//60}m"
if need_restart:
log(f" !! [{label}] RESTARTING — {reason}")
for pid in pids:
kill_pid(pid)
time.sleep(3)
start_scraper(script, log_bash)
return True
# Kill duplicates if more than one instance
if len(pids) > 1:
log(f" [WARN] [{label}] {len(pids)} instances — killing duplicates")
for pid in pids[1:]:
kill_pid(pid)
return False
def print_report():
elapsed_h = (time.time() - START_TIME) / 3600
remaining_h = max(0, DURATION_HOURS - elapsed_h)
log("=" * 65)
log(f"30-MIN REPORT | elapsed={elapsed_h:.1f}h | remaining={remaining_h:.1f}h")
log("=" * 65)
for label, win_path in [("mt_detail_sweep", LOG_DETAIL_WIN), ("mt_green_fleet ", LOG_GREEN_WIN)]:
age = log_age_seconds(win_path)
age_str = f"{age/60:.1f}m" if age >= 0 else "MISSING"
log(f" [{label}] log_age={age_str}")
for line in log_tail(win_path, 4):
log(f" {line}")
log("=" * 65)
def main():
global check_count
log("=" * 65)
log("SCRAPER MONITOR STARTED")
log(f"Duration={DURATION_HOURS}h | Check={CHECK_INTERVAL//60}m | Stale>{STALE_THRESHOLD//60}m = restart")
log(f"Work dir: {WORK_DIR}")
log(f"Windows TEMP: {WINDOWS_TEMP}")
log("=" * 65)
end_time = START_TIME + DURATION_HOURS * 3600
last_report_time = START_TIME - 1800 # force immediate first report
# Initial status report
print_report()
# Initial check
log("--- Initial health check ---")
r1 = check_and_maybe_restart("detail", "mt_detail_sweep.py", LOG_DETAIL_BASH, LOG_DETAIL_WIN)
r2 = check_and_maybe_restart("green", "mt_green_fleet.py", LOG_GREEN_BASH, LOG_GREEN_WIN)
if r1 or r2:
log(f"Initial restarts done. Waiting {RESTART_WAIT}s before first regular check...")
time.sleep(RESTART_WAIT)
last_check_time = time.time()
while time.time() < end_time:
# Sleep until next check interval (in 10s chunks for responsiveness)
next_check = last_check_time + CHECK_INTERVAL
while time.time() < next_check and time.time() < end_time:
time.sleep(10)
if time.time() >= end_time:
break
check_count += 1
log(f"--- Check #{check_count} | {(time.time()-START_TIME)/3600:.1f}h elapsed ---")
r1 = check_and_maybe_restart("detail", "mt_detail_sweep.py", LOG_DETAIL_BASH, LOG_DETAIL_WIN)
r2 = check_and_maybe_restart("green", "mt_green_fleet.py", LOG_GREEN_BASH, LOG_GREEN_WIN)
if r1 or r2:
log(f"Restart(s) performed. Waiting {RESTART_WAIT}s...")
time.sleep(RESTART_WAIT)
last_check_time = time.time()
# Full report every 30 minutes
now = time.time()
if now - last_report_time >= 1800:
print_report()
last_report_time = now
log("=" * 65)
log(f"MONITOR FINISHED — {DURATION_HOURS}h complete")
log("=" * 65)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
log("Monitor stopped by user (Ctrl+C).")
sys.exit(0)