#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Continuous monitor for mt_detail_sweep.py and mt_green_fleet.py Runs for 12 hours, checks every 5 minutes, auto-restarts dead processes. NOTE: On this Windows+Git-Bash system: bash /tmp = C:\\Users\\0\\AppData\\Local\\Temp Python must use Windows paths for os.path.getmtime etc. """ import subprocess import time import os import sys from datetime import datetime WORK_DIR = r"c:\project\Cursor\Логистика" # Bash /tmp maps to Windows AppData\Local\Temp WINDOWS_TEMP = r"C:\Users\0\AppData\Local\Temp" LOG_DETAIL_BASH = "/tmp/mt_detail_live.txt" LOG_GREEN_BASH = "/tmp/green_fleet_live.txt" LOG_DETAIL_WIN = os.path.join(WINDOWS_TEMP, "mt_detail_live.txt") LOG_GREEN_WIN = os.path.join(WINDOWS_TEMP, "green_fleet_live.txt") START_TIME = time.time() DURATION_HOURS = 12 CHECK_INTERVAL = 300 # 5 minutes between checks STALE_THRESHOLD = 600 # 10 min stale = process considered dead RESTART_WAIT = 120 # seconds to wait after restart before next check check_count = 0 def ts(): return datetime.now().strftime("%Y-%m-%d %H:%M:%S") def log(msg): print(f"[{ts()}] {msg}", flush=True) def log_age_seconds(win_path): """Return seconds since log was last modified. Returns -1 if file missing.""" try: mtime = os.path.getmtime(win_path) return time.time() - mtime except OSError: return -1 def log_tail(win_path, n=3): """Return last N non-empty lines of log file.""" try: with open(win_path, "r", encoding="utf-8", errors="replace") as f: lines = f.readlines() result = [l.rstrip() for l in lines if l.strip()] return result[-n:] if result else [""] except OSError: return [""] def find_pids(script_name): """Return list of PIDs running the given script (Windows python.exe).""" try: result = subprocess.run( ["wmic", "process", "where", "name='python.exe'", "get", "processid,commandline"], capture_output=True, text=True, timeout=15, encoding="utf-8", errors="replace" ) pids = [] for line in result.stdout.splitlines(): if script_name in line: parts = line.strip().split() for p in parts: if p.isdigit(): pids.append(int(p)) return pids except Exception as e: log(f" [WARN] find_pids({script_name}) error: {e}") return [] def kill_pid(pid): """Kill a process by PID using PowerShell.""" try: subprocess.run( ["powershell", "-Command", f"Stop-Process -Id {pid} -Force -ErrorAction SilentlyContinue"], capture_output=True, timeout=10 ) log(f" Killed PID {pid}") except Exception as e: log(f" [WARN] kill_pid({pid}): {e}") def start_scraper(script, log_bash_path): """Start a scraper in background using bash, appending to log file.""" log(f" Launching: {script} -> {log_bash_path}") try: cmd = f'cd "{WORK_DIR}" && python -u {script} >> "{log_bash_path}" 2>&1' proc = subprocess.Popen( ["bash", "-c", cmd], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, cwd=WORK_DIR, ) log(f" Launched bash wrapper PID={proc.pid} for {script}") return proc except Exception as e: log(f" [ERROR] Failed to start {script}: {e}") return None def check_and_maybe_restart(label, script, log_bash, log_win): """ Check if scraper is healthy. Restart if: - No python.exe process found for it, OR - Log file missing, OR - Log not updated in STALE_THRESHOLD seconds Returns True if restarted. """ pids = find_pids(script) age = log_age_seconds(log_win) age_min = age / 60 if age >= 0 else -1 if pids: proc_status = f"RUNNING PIDs={pids}" else: proc_status = "NO PROCESS" if age >= 0: log_status = f"log_age={age_min:.1f}m" else: log_status = "log=MISSING" log(f" [{label}] {proc_status} | {log_status}") # Decide if restart needed need_restart = False reason = "" if not pids: need_restart = True reason = "no process found" elif age < 0: # Process running but log missing — might still be starting (first 90s) # Only flag after 2 minutes need_restart = False log(f" [{label}] Process alive but log missing — may be starting up, skipping") elif age > STALE_THRESHOLD: need_restart = True reason = f"log stale {age_min:.1f}m > {STALE_THRESHOLD//60}m" if need_restart: log(f" !! [{label}] RESTARTING — {reason}") for pid in pids: kill_pid(pid) time.sleep(3) start_scraper(script, log_bash) return True # Kill duplicates if more than one instance if len(pids) > 1: log(f" [WARN] [{label}] {len(pids)} instances — killing duplicates") for pid in pids[1:]: kill_pid(pid) return False def print_report(): elapsed_h = (time.time() - START_TIME) / 3600 remaining_h = max(0, DURATION_HOURS - elapsed_h) log("=" * 65) log(f"30-MIN REPORT | elapsed={elapsed_h:.1f}h | remaining={remaining_h:.1f}h") log("=" * 65) for label, win_path in [("mt_detail_sweep", LOG_DETAIL_WIN), ("mt_green_fleet ", LOG_GREEN_WIN)]: age = log_age_seconds(win_path) age_str = f"{age/60:.1f}m" if age >= 0 else "MISSING" log(f" [{label}] log_age={age_str}") for line in log_tail(win_path, 4): log(f" {line}") log("=" * 65) def main(): global check_count log("=" * 65) log("SCRAPER MONITOR STARTED") log(f"Duration={DURATION_HOURS}h | Check={CHECK_INTERVAL//60}m | Stale>{STALE_THRESHOLD//60}m = restart") log(f"Work dir: {WORK_DIR}") log(f"Windows TEMP: {WINDOWS_TEMP}") log("=" * 65) end_time = START_TIME + DURATION_HOURS * 3600 last_report_time = START_TIME - 1800 # force immediate first report # Initial status report print_report() # Initial check log("--- Initial health check ---") r1 = check_and_maybe_restart("detail", "mt_detail_sweep.py", LOG_DETAIL_BASH, LOG_DETAIL_WIN) r2 = check_and_maybe_restart("green", "mt_green_fleet.py", LOG_GREEN_BASH, LOG_GREEN_WIN) if r1 or r2: log(f"Initial restarts done. Waiting {RESTART_WAIT}s before first regular check...") time.sleep(RESTART_WAIT) last_check_time = time.time() while time.time() < end_time: # Sleep until next check interval (in 10s chunks for responsiveness) next_check = last_check_time + CHECK_INTERVAL while time.time() < next_check and time.time() < end_time: time.sleep(10) if time.time() >= end_time: break check_count += 1 log(f"--- Check #{check_count} | {(time.time()-START_TIME)/3600:.1f}h elapsed ---") r1 = check_and_maybe_restart("detail", "mt_detail_sweep.py", LOG_DETAIL_BASH, LOG_DETAIL_WIN) r2 = check_and_maybe_restart("green", "mt_green_fleet.py", LOG_GREEN_BASH, LOG_GREEN_WIN) if r1 or r2: log(f"Restart(s) performed. Waiting {RESTART_WAIT}s...") time.sleep(RESTART_WAIT) last_check_time = time.time() # Full report every 30 minutes now = time.time() if now - last_report_time >= 1800: print_report() last_report_time = now log("=" * 65) log(f"MONITOR FINISHED — {DURATION_HOURS}h complete") log("=" * 65) if __name__ == "__main__": try: main() except KeyboardInterrupt: log("Monitor stopped by user (Ctrl+C).") sys.exit(0)