#!/usr/bin/env python3 """ MT Global Tile Scanner — Fetch ALL cargo+tanker vessels worldwide via tile API. Strategy: 1. Login + load map once via page.goto() 2. Move map via window.mtMap.setView() (no page reload, 1.7x faster) 3. Capture tile responses via page.on('response') ~1957 navigation points × ~3.5s = ~114 minutes for full global scan. Usage: python mt_global_tiles.py # Full global scan python mt_global_tiles.py --probe # Test first 15 navigations python mt_global_tiles.py --start 200 # Resume from navigation #200 """ import asyncio, json, sys, os, time, struct, hmac, hashlib, base64, argparse import threading import psycopg2 # ---- WATCHDOG ---- _heartbeat_time = time.time() _WATCHDOG_TIMEOUT = 300 # 5 min def _update_heartbeat(): global _heartbeat_time _heartbeat_time = time.time() def _watchdog_thread(): while True: time.sleep(10) if time.time() - _heartbeat_time > _WATCHDOG_TIMEOUT: print(f"\n[WATCHDOG] No heartbeat for {time.time() - _heartbeat_time:.0f}s -- exiting!", flush=True) os._exit(2) _wd = threading.Thread(target=_watchdog_thread, daemon=True) _wd.start() os.chdir(os.path.dirname(os.path.abspath(__file__))) if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(encoding='utf-8', errors='replace', line_buffering=True) if hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(encoding='utf-8', errors='replace', line_buffering=True) EMAIL = "operation@mrlogisticcorp.com" PASSWORD = "NKh9i8Z!7fU9jfi" TOTP_SECRET = "MNWTEPTFJZBUC32GJFEWY6LVKQ2GGYKH" DB_URL = 'postgresql://seafare:SF_m0ntana_2026@127.0.0.1:15432/seafare_db' ZOOM = 10 SETVIEW_WAIT = 3.0 # seconds to wait after setView for tiles to load NAV_DELAY = 0.5 # seconds between navigations # GT_SHIPTYPE codes for cargo + tanker vessels CARGO_GT_TYPES = { '6', '9', '11', '12', '21', '70', '122', # Cargo (green) '4', '17', '18', '19', '56', '71', '88', # Tanker (red) } CARGO_TYPE_KEYWORDS = { 'bulk', 'cargo', 'container', 'tanker', 'chemical', 'oil', 'lpg', 'lng', 'reefer', 'ro-ro', 'roro', 'multipurpose', 'general', 'ore', 'cement', 'vehicle', 'barge', 'heavy load', 'fpso', 'fso', 'bitumen', 'asphalt', 'product', } def generate_global_grid(): """Generate ~1950 navigation points covering all world shipping routes.""" points = [] def grid(lat_min, lat_max, lon_min, lon_max, step_lat, step_lon, label=""): lat = lat_min while lat <= lat_max: lon = lon_min while lon <= lon_max: points.append((round(lat, 1), round(lon, 1), label)) lon += step_lon lat += step_lat # === EUROPE === grid(30, 46, -6, 36, 1.5, 2.0, "Mediterranean") grid(40, 47, 27, 42, 1.5, 2.0, "Black Sea") grid(53, 66, 10, 30, 1.5, 2.0, "Baltic") grid(48, 62, -5, 10, 2.0, 2.5, "North Sea") grid(36, 48, -10, -1, 2.5, 2.5, "Biscay/Iberia") grid(58, 71, 5, 20, 2.5, 3.0, "Norway") # === MIDDLE EAST === grid(12, 30, 32, 44, 1.5, 2.0, "Red Sea") grid(23, 30, 47, 57, 1.5, 2.0, "Persian Gulf") grid(10, 25, 55, 70, 2.5, 3.0, "Arabian Sea") # === CASPIAN === grid(36, 47, 46, 55, 2.0, 2.0, "Caspian") # === SOUTH ASIA / INDIAN OCEAN === grid(8, 23, 68, 78, 2.5, 2.5, "India West") grid(5, 22, 78, 92, 2.5, 2.5, "Bay of Bengal") grid(0, 10, 72, 82, 3.0, 3.0, "Sri Lanka") grid(-15, 5, 38, 52, 3.0, 3.0, "East Africa") grid(-26, -10, 32, 48, 3.0, 4.0, "Mozambique") grid(-5, 15, 55, 95, 4.0, 5.0, "Indian Ocean lanes") # === SOUTHEAST ASIA === grid(-2, 8, 95, 106, 1.5, 2.0, "Malacca") grid(0, 22, 105, 120, 2.5, 2.5, "South China Sea") grid(-8, 0, 105, 135, 2.5, 3.0, "Indonesia") grid(5, 18, 118, 128, 2.5, 2.5, "Philippines") # === EAST ASIA === grid(20, 40, 110, 125, 1.5, 2.0, "China coast") grid(30, 42, 125, 142, 2.5, 2.5, "Korea/Japan") grid(30, 45, 135, 146, 2.5, 3.0, "Japan East") grid(22, 26, 117, 122, 1.5, 1.5, "Taiwan Strait") # === OCEANIA === grid(-38, -10, 140, 155, 3.0, 3.0, "Australia East") grid(-35, -15, 112, 125, 3.0, 3.0, "Australia West") grid(-15, -8, 125, 145, 3.0, 4.0, "Australia North") grid(-47, -34, 166, 178, 3.0, 4.0, "New Zealand") # === NORTH ATLANTIC === grid(25, 45, -82, -65, 2.5, 2.5, "US East Coast") grid(18, 30, -98, -80, 2.5, 3.0, "Gulf of Mexico") grid(10, 22, -85, -60, 3.0, 4.0, "Caribbean") grid(43, 52, -70, -50, 3.0, 4.0, "Canada East") grid(40, 55, -50, -10, 4.0, 5.0, "North Atlantic lanes") # === SOUTH ATLANTIC === grid(-30, 0, -50, -30, 3.0, 4.0, "Brazil") grid(-5, 15, -20, 5, 3.0, 4.0, "West Africa") grid(-35, -25, 15, 35, 3.0, 4.0, "South Africa") grid(-35, -5, -30, 10, 5.0, 6.0, "South Atlantic lanes") # === NORTH PACIFIC === grid(30, 50, -130, -115, 2.5, 3.0, "US West Coast") grid(5, 15, -85, -75, 2.5, 2.5, "Panama") grid(30, 50, -170, -130, 5.0, 6.0, "North Pacific lanes") grid(20, 35, 140, 180, 4.0, 5.0, "West Pacific") # === SOUTH PACIFIC === grid(-40, -5, -80, -70, 4.0, 3.0, "South America West") # === ARCTIC / SUB-ARCTIC === grid(65, 72, 20, 70, 4.0, 5.0, "NSR approach") # Deduplicate: remove points within ~0.5 degrees of each other unique = [] seen = set() for lat, lon, label in points: key = (round(lat * 2) / 2, round(lon * 2) / 2) if key not in seen: seen.add(key) unique.append((lat, lon, label)) return unique def is_cargo_vessel(row): gt = str(row.get('GT_SHIPTYPE', '') or '') if gt in CARGO_GT_TYPES: return True st = str(row.get('SHIPTYPE', '') or '').lower() for kw in CARGO_TYPE_KEYWORDS: if kw in st: return True return False def classify_vessel(row): gt = str(row.get('GT_SHIPTYPE', '') or '') if gt == '6': return 'bulk' elif gt == '11': return 'container' elif gt == '12': return 'roro' elif gt in ('4', '17', '18', '19', '56', '71', '88'): return 'tanker' elif gt in ('9', '70', '122', '21'): return 'general' st = str(row.get('SHIPTYPE', '') or '').lower() if 'bulk' in st: return 'bulk' elif 'container' in st: return 'container' elif any(k in st for k in ('tanker', 'oil', 'chemical', 'lpg', 'lng')): return 'tanker' elif any(k in st for k in ('ro-ro', 'roro', 'vehicle')): return 'roro' return 'general' def totp(secret): s = secret.upper().replace(' ', '') pad = (-len(s)) % 8 key = base64.b32decode(s + '=' * pad) counter = int(time.time()) // 30 msg = struct.pack('>Q', counter) h = hmac.new(key, msg, hashlib.sha1).digest() offset = h[-1] & 0x0f code = struct.unpack('>I', h[offset:offset+4])[0] & 0x7fffffff return str(code % 1000000).zfill(6) def totp_wait_for_fresh(min_remaining=8): while True: remaining = 30 - (int(time.time()) % 30) if remaining >= min_remaining: break time.sleep(remaining + 1) code = totp(TOTP_SECRET) remaining = 30 - (int(time.time()) % 30) print(f" [TOTP] Code={code} | {remaining}s remaining", flush=True) return code def db_connect(): return psycopg2.connect( DB_URL, connect_timeout=15, keepalives=1, keepalives_idle=30, keepalives_interval=10, keepalives_count=5 ) def _restart_ssh_tunnel(): import subprocess try: subprocess.run(['taskkill', '/F', '/IM', 'ssh.exe'], capture_output=True, timeout=5) except Exception: pass time.sleep(2) try: subprocess.Popen( ['ssh', '-o', 'ServerAliveInterval=5', '-o', 'ServerAliveCountMax=120', '-o', 'TCPKeepAlive=yes', '-o', 'StrictHostKeyChecking=no', '-L', '15432:127.0.0.1:5432', '-N', 'root@89.19.208.158'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) print(f" [SSH] Tunnel restarted, waiting 5s...", flush=True) time.sleep(5) except Exception as e: print(f" [SSH] Failed: {e}", flush=True) def db_reconnect(conn): try: conn.close() except Exception: pass for attempt in range(5): try: time.sleep(3) c = db_connect() print(f" [DB] Reconnected (attempt {attempt+1})", flush=True) return c, c.cursor() except Exception as e: print(f" [DB] Attempt {attempt+1} failed: {e}", flush=True) print(f" [DB] Restarting SSH tunnel...", flush=True) _restart_ssh_tunnel() for attempt in range(10): try: time.sleep(5) c = db_connect() print(f" [DB] Reconnected after tunnel restart (attempt {attempt+1})", flush=True) return c, c.cursor() except Exception as e: print(f" [DB] Post-restart attempt {attempt+1} failed: {e}", flush=True) if attempt == 4: _restart_ssh_tunnel() raise Exception("DB reconnect failed") def db_safe_execute(conn, cur, query, params=None): try: cur.execute(query, params) return conn, cur except (psycopg2.InterfaceError, psycopg2.OperationalError) as e: print(f" [DB] Lost on execute ({e}), reconnecting...", flush=True) conn, cur = db_reconnect(conn) cur.execute(query, params) return conn, cur except psycopg2.Error as e: try: conn.rollback() except Exception: pass raise async def do_login(page, max_retries=3): for attempt in range(max_retries): _update_heartbeat() print(f"LOGIN (attempt {attempt+1}/{max_retries})...", flush=True) try: await asyncio.wait_for( page.goto('https://www.marinetraffic.com/en/users/login', wait_until='domcontentloaded', timeout=30000), timeout=35.0) except Exception: pass await asyncio.sleep(3) _update_heartbeat() try: await page.wait_for_selector('input[name="username"]', timeout=20000) await page.fill('input[name="username"]', EMAIL, timeout=15000) await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }') try: await page.wait_for_url('**/login/password**', timeout=20000) except Exception: await asyncio.sleep(5) except Exception as e: print(f" email step error: {e}", flush=True) await asyncio.sleep(5) try: await page.wait_for_selector('input[name="password"]', timeout=20000) await page.fill('input[name="password"]', PASSWORD, timeout=15000) await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }') except Exception as e: print(f" password step error: {e}", flush=True) try: await page.wait_for_url('**/mfa**', timeout=25000) except Exception: await asyncio.sleep(5) url = page.url if 'auth.kpler' in url: if 'mfa-login-options' in url: try: await page.evaluate("""() => { for (const b of document.querySelectorAll('button')) { if (b.textContent.includes('Authenticator')) { b.click(); return; } } }""") await page.wait_for_url('**/mfa-otp-challenge**', timeout=20000) except Exception: await asyncio.sleep(5) if 'mfa-otp-challenge' in page.url or 'auth.kpler' in page.url: _update_heartbeat() otp = totp_wait_for_fresh(8) try: await page.wait_for_selector('input[name="code"]', timeout=15000) await page.fill('input[name="code"]', otp, timeout=15000) await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }') except Exception: pass _update_heartbeat() try: await page.wait_for_url('**marinetraffic.com**', timeout=25000) except Exception: await asyncio.sleep(15) _update_heartbeat() ok = 'marinetraffic.com' in page.url and 'auth.kpler' not in page.url if ok: print(f" Login: OK | {page.url[:60]}", flush=True) return True print(f" Login failed | {page.url[:80]}", flush=True) if attempt < max_retries - 1: try: await page.context.clear_cookies() except Exception: pass await asyncio.sleep(10) return False async def main(): parser = argparse.ArgumentParser() parser.add_argument('--probe', action='store_true', help='Test first 15 navigations') parser.add_argument('--start', type=int, default=0, help='Start from navigation index N') parser.add_argument('--wait', type=float, default=SETVIEW_WAIT, help='Wait after setView (default 3.0)') parser.add_argument('--delay', type=float, default=NAV_DELAY, help='Delay between navs (default 0.5)') args = parser.parse_args() grid = generate_global_grid() total_navs = len(grid) print(f"Generated {total_navs} navigation points (zoom={ZOOM})") if args.probe: grid = grid[:15] total_navs = 15 print(f"PROBE MODE: testing first {total_navs} navigations") # DB connection try: conn = db_connect() cur = conn.cursor() cur.execute('SELECT count(*) FROM mt_bulk_staging') existing_count = cur.fetchone()[0] cur.execute('SELECT ship_id FROM mt_bulk_staging WHERE ship_id IS NOT NULL') existing_ids = set(str(r[0]) for r in cur.fetchall()) print(f"DB: {existing_count} existing vessels | {len(existing_ids)} ship_ids") except Exception as e: print(f"DB ERROR: {e}") print("Start SSH tunnel: ssh -L 15432:127.0.0.1:5432 -N root@89.19.208.158") return from playwright.async_api import async_playwright async with async_playwright() as p: browser = await p.chromium.launch( headless=False, args=['--no-sandbox', '--disable-blink-features=AutomationControlled', '--disable-dev-shm-usage'] ) ctx = await browser.new_context( viewport={'width': 1200, 'height': 800}, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', ) page = await ctx.new_page() COOKIE_FILE = 'mt_session_cookies.json' # Try cookies for resume cookie_loaded = False if args.start > 0 and os.path.exists(COOKIE_FILE): try: with open(COOKIE_FILE) as f: saved_cookies = json.load(f) await ctx.add_cookies(saved_cookies) cookie_loaded = True print("Loaded saved cookies for resume") except Exception as e: print(f"Cookie load failed: {e}") if not cookie_loaded: if not await do_login(page): print("LOGIN FAILED!") await browser.close() conn.close() return try: await page.wait_for_url('**/ais/home**', timeout=15000) except Exception: await asyncio.sleep(5) try: cookies = await ctx.cookies() with open(COOKIE_FILE, 'w') as f: json.dump(cookies, f) print(f"Saved {len(cookies)} cookies") except Exception: pass # Load map once (page.goto) — all subsequent moves via setView _update_heartbeat() print("\nLoading map (initial page.goto)...", flush=True) # Capture tile responses captured_rows = [] async def on_tile_response(response): if 'getData/get_data_json_4' not in response.url: return try: body = await response.body() parsed = json.loads(body) rows = parsed.get('data', {}).get('rows', []) captured_rows.extend(rows) except Exception: pass page.on('response', on_tile_response) try: await asyncio.wait_for( page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:29/centery:40/zoom:{ZOOM}', wait_until='load', timeout=30000), timeout=35) except Exception: pass await asyncio.sleep(8) test_cargo = sum(1 for r in captured_rows if is_cargo_vessel(r)) if len(captured_rows) == 0: print("WARNING: No tile data from initial load. Retrying...", flush=True) try: await asyncio.wait_for( page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:4/centery:52/zoom:{ZOOM}', wait_until='load', timeout=30000), timeout=35) except Exception: pass await asyncio.sleep(10) test_cargo = sum(1 for r in captured_rows if is_cargo_vessel(r)) if len(captured_rows) == 0: print("FATAL: Cannot capture tile data. Exiting.", flush=True) await browser.close() conn.close() return print(f"Map loaded! Test: {len(captured_rows)} vessels, {test_cargo} cargo/tanker", flush=True) # Verify mtMap.setView is available has_setview = await page.evaluate("() => typeof window.mtMap !== 'undefined' && typeof window.mtMap.setView === 'function'") if not has_setview: print("ERROR: window.mtMap.setView not available! Falling back to page.goto.", flush=True) use_setview = False else: print("mtMap.setView() available -- using fast mode!", flush=True) use_setview = True # === MAIN SCAN LOOP === total_new = 0 total_cargo = 0 total_navs_done = 0 total_vessels_seen = 0 empty_navs = 0 t0 = time.time() checkpoint_file = 'mt_global_tiles_progress.json' # Load checkpoint if args.start > 0 and os.path.exists(checkpoint_file): try: with open(checkpoint_file) as f: cp = json.load(f) total_new = cp.get('total_new', 0) total_cargo = cp.get('total_cargo', 0) print(f"Resuming from nav {args.start} | new={total_new}") except Exception: pass print(f"\n{'='*65}") print(f"GLOBAL TILE SCAN: {total_navs} navigations, zoom={ZOOM}") print(f"Wait: {args.wait}s | Delay: {args.delay}s | Start: {args.start}") print(f"Mode: {'setView (fast)' if use_setview else 'page.goto (slow)'}") print(f"{'='*65}\n") consecutive_errors = 0 last_status_time = time.time() for idx in range(args.start, total_navs): lat, lon, label = grid[idx] _update_heartbeat() captured_rows.clear() if use_setview: # Fast: just move the map via JS try: await page.evaluate(f"() => {{ window.mtMap.setView([{lat}, {lon}], {ZOOM}); }}") except Exception as e: err = str(e)[:60] if 'crash' in err.lower() or 'target closed' in err.lower(): print(f" Nav {idx}: BROWSER CRASH -- exiting for restart", flush=True) break # setView failed — try page.goto as fallback print(f" Nav {idx}: setView failed ({err}), fallback to goto", flush=True) try: await asyncio.wait_for( page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:{lon}/centery:{lat}/zoom:{ZOOM}', wait_until='load', timeout=35000), timeout=40) except Exception: consecutive_errors += 1 if consecutive_errors >= 10: print("Too many errors. Exiting.", flush=True) break continue await asyncio.sleep(5) else: # Slow fallback: full page reload try: await asyncio.wait_for( page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:{lon}/centery:{lat}/zoom:{ZOOM}', wait_until='load', timeout=35000), timeout=40) except Exception as e: if 'crash' in str(e).lower(): print(f" Nav {idx}: CRASH -- exiting", flush=True) break await asyncio.sleep(3) consecutive_errors += 1 if consecutive_errors >= 10: break continue await asyncio.sleep(args.wait) _update_heartbeat() # Process captured vessels nav_new = 0 nav_cargo = 0 total_vessels_seen += len(captured_rows) if len(captured_rows) == 0: empty_navs += 1 else: consecutive_errors = 0 for row in captured_rows: if not is_cargo_vessel(row): continue nav_cargo += 1 sid = str(row.get('SHIP_ID', '')) if not sid or sid in existing_ids: continue existing_ids.add(sid) nav_new += 1 total_new += 1 cat = classify_vessel(row) gt = str(row.get('GT_SHIPTYPE', '') or '') try: conn, cur = db_safe_execute(conn, cur, """ INSERT INTO mt_bulk_staging (ship_id, name, flag, dwt, shiptype, gt_shiptype, type_category, lat, lon, speed, course, destination, scraped_at) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW()) ON CONFLICT (ship_id) DO UPDATE SET lat = EXCLUDED.lat, lon = EXCLUDED.lon, speed = EXCLUDED.speed, course = EXCLUDED.course, type_category = COALESCE(EXCLUDED.type_category, mt_bulk_staging.type_category), scraped_at = NOW() """, ( sid, row.get('SHIPNAME', ''), row.get('FLAG', ''), int(row.get('DWT', 0) or 0), str(row.get('SHIPTYPE', '')), gt, cat, row.get('LAT'), row.get('LON'), row.get('SPEED'), row.get('COURSE'), row.get('DESTINATION', ''), )) conn.commit() except Exception as e: print(f" DB error: {str(e)[:80]}", flush=True) total_cargo += nav_cargo total_navs_done += 1 # Progress every 10 navs or when new vessels found now = time.time() if total_navs_done % 10 == 0 or nav_new > 0 or (now - last_status_time > 60): elapsed = now - t0 rate = total_navs_done / max(elapsed, 1) * 60 remaining_navs = total_navs - idx - 1 eta_min = remaining_navs / max(rate / 60, 0.01) / 60 print(f" [{idx+1}/{total_navs}] {label} ({lat:.1f},{lon:.1f}) | " f"seen={len(captured_rows)} cargo={nav_cargo} NEW={nav_new} | " f"total_new={total_new} DB={len(existing_ids)} | " f"{elapsed/60:.1f}m | ETA: {eta_min:.0f}m", flush=True) last_status_time = now # Checkpoint every 50 navs if total_navs_done % 50 == 0: with open(checkpoint_file, 'w') as f: json.dump({ 'last_nav': idx, 'total_new': total_new, 'total_cargo': total_cargo, 'total_navs': total_navs_done, 'total_vessels_seen': total_vessels_seen, 'db_count': len(existing_ids), }, f) # GC every 30 navs if total_navs_done % 30 == 0: try: await page.evaluate('() => { if (typeof gc === "function") gc(); }') except Exception: pass await asyncio.sleep(args.delay) # Final stats elapsed = time.time() - t0 try: cur.execute('SELECT count(*) FROM mt_bulk_staging') final_count = cur.fetchone()[0] except Exception: final_count = len(existing_ids) page.remove_listener('response', on_tile_response) print(f"\n{'='*65}") print(f"GLOBAL TILE SCAN COMPLETE -- {elapsed/60:.1f} minutes") print(f" Navigations: {total_navs_done}") print(f" Empty navs: {empty_navs}") print(f" Vessels seen: {total_vessels_seen}") print(f" Cargo found: {total_cargo}") print(f" New vessels: {total_new}") print(f" DB total: {final_count}") print(f"{'='*65}") with open(checkpoint_file, 'w') as f: json.dump({ 'last_nav': total_navs - 1, 'total_new': total_new, 'total_cargo': total_cargo, 'total_navs': total_navs_done, 'total_vessels_seen': total_vessels_seen, 'db_count': final_count, 'completed': True, 'elapsed_min': elapsed / 60, }, f, indent=2) await browser.close() conn.close() if __name__ == '__main__': asyncio.run(main())