706 lines
26 KiB
Python
706 lines
26 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
MT Global Tile Scanner — Fetch ALL cargo+tanker vessels worldwide via tile API.
|
|||
|
|
|
|||
|
|
Strategy:
|
|||
|
|
1. Login + load map once via page.goto()
|
|||
|
|
2. Move map via window.mtMap.setView() (no page reload, 1.7x faster)
|
|||
|
|
3. Capture tile responses via page.on('response')
|
|||
|
|
|
|||
|
|
~1957 navigation points × ~3.5s = ~114 minutes for full global scan.
|
|||
|
|
|
|||
|
|
Usage:
|
|||
|
|
python mt_global_tiles.py # Full global scan
|
|||
|
|
python mt_global_tiles.py --probe # Test first 15 navigations
|
|||
|
|
python mt_global_tiles.py --start 200 # Resume from navigation #200
|
|||
|
|
"""
|
|||
|
|
import asyncio, json, sys, os, time, struct, hmac, hashlib, base64, argparse
|
|||
|
|
import threading
|
|||
|
|
import psycopg2
|
|||
|
|
|
|||
|
|
# ---- WATCHDOG ----
|
|||
|
|
_heartbeat_time = time.time()
|
|||
|
|
_WATCHDOG_TIMEOUT = 300 # 5 min
|
|||
|
|
|
|||
|
|
def _update_heartbeat():
|
|||
|
|
global _heartbeat_time
|
|||
|
|
_heartbeat_time = time.time()
|
|||
|
|
|
|||
|
|
def _watchdog_thread():
|
|||
|
|
while True:
|
|||
|
|
time.sleep(10)
|
|||
|
|
if time.time() - _heartbeat_time > _WATCHDOG_TIMEOUT:
|
|||
|
|
print(f"\n[WATCHDOG] No heartbeat for {time.time() - _heartbeat_time:.0f}s -- exiting!", flush=True)
|
|||
|
|
os._exit(2)
|
|||
|
|
|
|||
|
|
_wd = threading.Thread(target=_watchdog_thread, daemon=True)
|
|||
|
|
_wd.start()
|
|||
|
|
|
|||
|
|
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
|||
|
|
if hasattr(sys.stdout, 'reconfigure'):
|
|||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace', line_buffering=True)
|
|||
|
|
if hasattr(sys.stderr, 'reconfigure'):
|
|||
|
|
sys.stderr.reconfigure(encoding='utf-8', errors='replace', line_buffering=True)
|
|||
|
|
|
|||
|
|
EMAIL = "operation@mrlogisticcorp.com"
|
|||
|
|
PASSWORD = "NKh9i8Z!7fU9jfi"
|
|||
|
|
TOTP_SECRET = "MNWTEPTFJZBUC32GJFEWY6LVKQ2GGYKH"
|
|||
|
|
DB_URL = 'postgresql://seafare:SF_m0ntana_2026@127.0.0.1:15432/seafare_db'
|
|||
|
|
ZOOM = 10
|
|||
|
|
SETVIEW_WAIT = 3.0 # seconds to wait after setView for tiles to load
|
|||
|
|
NAV_DELAY = 0.5 # seconds between navigations
|
|||
|
|
|
|||
|
|
# GT_SHIPTYPE codes for cargo + tanker vessels
|
|||
|
|
CARGO_GT_TYPES = {
|
|||
|
|
'6', '9', '11', '12', '21', '70', '122', # Cargo (green)
|
|||
|
|
'4', '17', '18', '19', '56', '71', '88', # Tanker (red)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
CARGO_TYPE_KEYWORDS = {
|
|||
|
|
'bulk', 'cargo', 'container', 'tanker', 'chemical', 'oil',
|
|||
|
|
'lpg', 'lng', 'reefer', 'ro-ro', 'roro', 'multipurpose',
|
|||
|
|
'general', 'ore', 'cement', 'vehicle', 'barge', 'heavy load',
|
|||
|
|
'fpso', 'fso', 'bitumen', 'asphalt', 'product',
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def generate_global_grid():
|
|||
|
|
"""Generate ~1950 navigation points covering all world shipping routes."""
|
|||
|
|
points = []
|
|||
|
|
|
|||
|
|
def grid(lat_min, lat_max, lon_min, lon_max, step_lat, step_lon, label=""):
|
|||
|
|
lat = lat_min
|
|||
|
|
while lat <= lat_max:
|
|||
|
|
lon = lon_min
|
|||
|
|
while lon <= lon_max:
|
|||
|
|
points.append((round(lat, 1), round(lon, 1), label))
|
|||
|
|
lon += step_lon
|
|||
|
|
lat += step_lat
|
|||
|
|
|
|||
|
|
# === EUROPE ===
|
|||
|
|
grid(30, 46, -6, 36, 1.5, 2.0, "Mediterranean")
|
|||
|
|
grid(40, 47, 27, 42, 1.5, 2.0, "Black Sea")
|
|||
|
|
grid(53, 66, 10, 30, 1.5, 2.0, "Baltic")
|
|||
|
|
grid(48, 62, -5, 10, 2.0, 2.5, "North Sea")
|
|||
|
|
grid(36, 48, -10, -1, 2.5, 2.5, "Biscay/Iberia")
|
|||
|
|
grid(58, 71, 5, 20, 2.5, 3.0, "Norway")
|
|||
|
|
|
|||
|
|
# === MIDDLE EAST ===
|
|||
|
|
grid(12, 30, 32, 44, 1.5, 2.0, "Red Sea")
|
|||
|
|
grid(23, 30, 47, 57, 1.5, 2.0, "Persian Gulf")
|
|||
|
|
grid(10, 25, 55, 70, 2.5, 3.0, "Arabian Sea")
|
|||
|
|
|
|||
|
|
# === CASPIAN ===
|
|||
|
|
grid(36, 47, 46, 55, 2.0, 2.0, "Caspian")
|
|||
|
|
|
|||
|
|
# === SOUTH ASIA / INDIAN OCEAN ===
|
|||
|
|
grid(8, 23, 68, 78, 2.5, 2.5, "India West")
|
|||
|
|
grid(5, 22, 78, 92, 2.5, 2.5, "Bay of Bengal")
|
|||
|
|
grid(0, 10, 72, 82, 3.0, 3.0, "Sri Lanka")
|
|||
|
|
grid(-15, 5, 38, 52, 3.0, 3.0, "East Africa")
|
|||
|
|
grid(-26, -10, 32, 48, 3.0, 4.0, "Mozambique")
|
|||
|
|
grid(-5, 15, 55, 95, 4.0, 5.0, "Indian Ocean lanes")
|
|||
|
|
|
|||
|
|
# === SOUTHEAST ASIA ===
|
|||
|
|
grid(-2, 8, 95, 106, 1.5, 2.0, "Malacca")
|
|||
|
|
grid(0, 22, 105, 120, 2.5, 2.5, "South China Sea")
|
|||
|
|
grid(-8, 0, 105, 135, 2.5, 3.0, "Indonesia")
|
|||
|
|
grid(5, 18, 118, 128, 2.5, 2.5, "Philippines")
|
|||
|
|
|
|||
|
|
# === EAST ASIA ===
|
|||
|
|
grid(20, 40, 110, 125, 1.5, 2.0, "China coast")
|
|||
|
|
grid(30, 42, 125, 142, 2.5, 2.5, "Korea/Japan")
|
|||
|
|
grid(30, 45, 135, 146, 2.5, 3.0, "Japan East")
|
|||
|
|
grid(22, 26, 117, 122, 1.5, 1.5, "Taiwan Strait")
|
|||
|
|
|
|||
|
|
# === OCEANIA ===
|
|||
|
|
grid(-38, -10, 140, 155, 3.0, 3.0, "Australia East")
|
|||
|
|
grid(-35, -15, 112, 125, 3.0, 3.0, "Australia West")
|
|||
|
|
grid(-15, -8, 125, 145, 3.0, 4.0, "Australia North")
|
|||
|
|
grid(-47, -34, 166, 178, 3.0, 4.0, "New Zealand")
|
|||
|
|
|
|||
|
|
# === NORTH ATLANTIC ===
|
|||
|
|
grid(25, 45, -82, -65, 2.5, 2.5, "US East Coast")
|
|||
|
|
grid(18, 30, -98, -80, 2.5, 3.0, "Gulf of Mexico")
|
|||
|
|
grid(10, 22, -85, -60, 3.0, 4.0, "Caribbean")
|
|||
|
|
grid(43, 52, -70, -50, 3.0, 4.0, "Canada East")
|
|||
|
|
grid(40, 55, -50, -10, 4.0, 5.0, "North Atlantic lanes")
|
|||
|
|
|
|||
|
|
# === SOUTH ATLANTIC ===
|
|||
|
|
grid(-30, 0, -50, -30, 3.0, 4.0, "Brazil")
|
|||
|
|
grid(-5, 15, -20, 5, 3.0, 4.0, "West Africa")
|
|||
|
|
grid(-35, -25, 15, 35, 3.0, 4.0, "South Africa")
|
|||
|
|
grid(-35, -5, -30, 10, 5.0, 6.0, "South Atlantic lanes")
|
|||
|
|
|
|||
|
|
# === NORTH PACIFIC ===
|
|||
|
|
grid(30, 50, -130, -115, 2.5, 3.0, "US West Coast")
|
|||
|
|
grid(5, 15, -85, -75, 2.5, 2.5, "Panama")
|
|||
|
|
grid(30, 50, -170, -130, 5.0, 6.0, "North Pacific lanes")
|
|||
|
|
grid(20, 35, 140, 180, 4.0, 5.0, "West Pacific")
|
|||
|
|
|
|||
|
|
# === SOUTH PACIFIC ===
|
|||
|
|
grid(-40, -5, -80, -70, 4.0, 3.0, "South America West")
|
|||
|
|
|
|||
|
|
# === ARCTIC / SUB-ARCTIC ===
|
|||
|
|
grid(65, 72, 20, 70, 4.0, 5.0, "NSR approach")
|
|||
|
|
|
|||
|
|
# Deduplicate: remove points within ~0.5 degrees of each other
|
|||
|
|
unique = []
|
|||
|
|
seen = set()
|
|||
|
|
for lat, lon, label in points:
|
|||
|
|
key = (round(lat * 2) / 2, round(lon * 2) / 2)
|
|||
|
|
if key not in seen:
|
|||
|
|
seen.add(key)
|
|||
|
|
unique.append((lat, lon, label))
|
|||
|
|
|
|||
|
|
return unique
|
|||
|
|
|
|||
|
|
|
|||
|
|
def is_cargo_vessel(row):
|
|||
|
|
gt = str(row.get('GT_SHIPTYPE', '') or '')
|
|||
|
|
if gt in CARGO_GT_TYPES:
|
|||
|
|
return True
|
|||
|
|
st = str(row.get('SHIPTYPE', '') or '').lower()
|
|||
|
|
for kw in CARGO_TYPE_KEYWORDS:
|
|||
|
|
if kw in st:
|
|||
|
|
return True
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
def classify_vessel(row):
|
|||
|
|
gt = str(row.get('GT_SHIPTYPE', '') or '')
|
|||
|
|
if gt == '6':
|
|||
|
|
return 'bulk'
|
|||
|
|
elif gt == '11':
|
|||
|
|
return 'container'
|
|||
|
|
elif gt == '12':
|
|||
|
|
return 'roro'
|
|||
|
|
elif gt in ('4', '17', '18', '19', '56', '71', '88'):
|
|||
|
|
return 'tanker'
|
|||
|
|
elif gt in ('9', '70', '122', '21'):
|
|||
|
|
return 'general'
|
|||
|
|
st = str(row.get('SHIPTYPE', '') or '').lower()
|
|||
|
|
if 'bulk' in st:
|
|||
|
|
return 'bulk'
|
|||
|
|
elif 'container' in st:
|
|||
|
|
return 'container'
|
|||
|
|
elif any(k in st for k in ('tanker', 'oil', 'chemical', 'lpg', 'lng')):
|
|||
|
|
return 'tanker'
|
|||
|
|
elif any(k in st for k in ('ro-ro', 'roro', 'vehicle')):
|
|||
|
|
return 'roro'
|
|||
|
|
return 'general'
|
|||
|
|
|
|||
|
|
|
|||
|
|
def totp(secret):
|
|||
|
|
s = secret.upper().replace(' ', '')
|
|||
|
|
pad = (-len(s)) % 8
|
|||
|
|
key = base64.b32decode(s + '=' * pad)
|
|||
|
|
counter = int(time.time()) // 30
|
|||
|
|
msg = struct.pack('>Q', counter)
|
|||
|
|
h = hmac.new(key, msg, hashlib.sha1).digest()
|
|||
|
|
offset = h[-1] & 0x0f
|
|||
|
|
code = struct.unpack('>I', h[offset:offset+4])[0] & 0x7fffffff
|
|||
|
|
return str(code % 1000000).zfill(6)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def totp_wait_for_fresh(min_remaining=8):
|
|||
|
|
while True:
|
|||
|
|
remaining = 30 - (int(time.time()) % 30)
|
|||
|
|
if remaining >= min_remaining:
|
|||
|
|
break
|
|||
|
|
time.sleep(remaining + 1)
|
|||
|
|
code = totp(TOTP_SECRET)
|
|||
|
|
remaining = 30 - (int(time.time()) % 30)
|
|||
|
|
print(f" [TOTP] Code={code} | {remaining}s remaining", flush=True)
|
|||
|
|
return code
|
|||
|
|
|
|||
|
|
|
|||
|
|
def db_connect():
|
|||
|
|
return psycopg2.connect(
|
|||
|
|
DB_URL, connect_timeout=15,
|
|||
|
|
keepalives=1, keepalives_idle=30, keepalives_interval=10, keepalives_count=5
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _restart_ssh_tunnel():
|
|||
|
|
import subprocess
|
|||
|
|
try:
|
|||
|
|
subprocess.run(['taskkill', '/F', '/IM', 'ssh.exe'], capture_output=True, timeout=5)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
time.sleep(2)
|
|||
|
|
try:
|
|||
|
|
subprocess.Popen(
|
|||
|
|
['ssh', '-o', 'ServerAliveInterval=5', '-o', 'ServerAliveCountMax=120',
|
|||
|
|
'-o', 'TCPKeepAlive=yes', '-o', 'StrictHostKeyChecking=no',
|
|||
|
|
'-L', '15432:127.0.0.1:5432', '-N', 'root@89.19.208.158'],
|
|||
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
|||
|
|
)
|
|||
|
|
print(f" [SSH] Tunnel restarted, waiting 5s...", flush=True)
|
|||
|
|
time.sleep(5)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" [SSH] Failed: {e}", flush=True)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def db_reconnect(conn):
|
|||
|
|
try:
|
|||
|
|
conn.close()
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
for attempt in range(5):
|
|||
|
|
try:
|
|||
|
|
time.sleep(3)
|
|||
|
|
c = db_connect()
|
|||
|
|
print(f" [DB] Reconnected (attempt {attempt+1})", flush=True)
|
|||
|
|
return c, c.cursor()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" [DB] Attempt {attempt+1} failed: {e}", flush=True)
|
|||
|
|
print(f" [DB] Restarting SSH tunnel...", flush=True)
|
|||
|
|
_restart_ssh_tunnel()
|
|||
|
|
for attempt in range(10):
|
|||
|
|
try:
|
|||
|
|
time.sleep(5)
|
|||
|
|
c = db_connect()
|
|||
|
|
print(f" [DB] Reconnected after tunnel restart (attempt {attempt+1})", flush=True)
|
|||
|
|
return c, c.cursor()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" [DB] Post-restart attempt {attempt+1} failed: {e}", flush=True)
|
|||
|
|
if attempt == 4:
|
|||
|
|
_restart_ssh_tunnel()
|
|||
|
|
raise Exception("DB reconnect failed")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def db_safe_execute(conn, cur, query, params=None):
|
|||
|
|
try:
|
|||
|
|
cur.execute(query, params)
|
|||
|
|
return conn, cur
|
|||
|
|
except (psycopg2.InterfaceError, psycopg2.OperationalError) as e:
|
|||
|
|
print(f" [DB] Lost on execute ({e}), reconnecting...", flush=True)
|
|||
|
|
conn, cur = db_reconnect(conn)
|
|||
|
|
cur.execute(query, params)
|
|||
|
|
return conn, cur
|
|||
|
|
except psycopg2.Error as e:
|
|||
|
|
try:
|
|||
|
|
conn.rollback()
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def do_login(page, max_retries=3):
|
|||
|
|
for attempt in range(max_retries):
|
|||
|
|
_update_heartbeat()
|
|||
|
|
print(f"LOGIN (attempt {attempt+1}/{max_retries})...", flush=True)
|
|||
|
|
try:
|
|||
|
|
await asyncio.wait_for(
|
|||
|
|
page.goto('https://www.marinetraffic.com/en/users/login',
|
|||
|
|
wait_until='domcontentloaded', timeout=30000), timeout=35.0)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
await asyncio.sleep(3)
|
|||
|
|
_update_heartbeat()
|
|||
|
|
try:
|
|||
|
|
await page.wait_for_selector('input[name="username"]', timeout=20000)
|
|||
|
|
await page.fill('input[name="username"]', EMAIL, timeout=15000)
|
|||
|
|
await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }')
|
|||
|
|
try:
|
|||
|
|
await page.wait_for_url('**/login/password**', timeout=20000)
|
|||
|
|
except Exception:
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" email step error: {e}", flush=True)
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
try:
|
|||
|
|
await page.wait_for_selector('input[name="password"]', timeout=20000)
|
|||
|
|
await page.fill('input[name="password"]', PASSWORD, timeout=15000)
|
|||
|
|
await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }')
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" password step error: {e}", flush=True)
|
|||
|
|
try:
|
|||
|
|
await page.wait_for_url('**/mfa**', timeout=25000)
|
|||
|
|
except Exception:
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
url = page.url
|
|||
|
|
if 'auth.kpler' in url:
|
|||
|
|
if 'mfa-login-options' in url:
|
|||
|
|
try:
|
|||
|
|
await page.evaluate("""() => {
|
|||
|
|
for (const b of document.querySelectorAll('button')) {
|
|||
|
|
if (b.textContent.includes('Authenticator')) { b.click(); return; }
|
|||
|
|
}
|
|||
|
|
}""")
|
|||
|
|
await page.wait_for_url('**/mfa-otp-challenge**', timeout=20000)
|
|||
|
|
except Exception:
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
|
|||
|
|
if 'mfa-otp-challenge' in page.url or 'auth.kpler' in page.url:
|
|||
|
|
_update_heartbeat()
|
|||
|
|
otp = totp_wait_for_fresh(8)
|
|||
|
|
try:
|
|||
|
|
await page.wait_for_selector('input[name="code"]', timeout=15000)
|
|||
|
|
await page.fill('input[name="code"]', otp, timeout=15000)
|
|||
|
|
await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }')
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
_update_heartbeat()
|
|||
|
|
try:
|
|||
|
|
await page.wait_for_url('**marinetraffic.com**', timeout=25000)
|
|||
|
|
except Exception:
|
|||
|
|
await asyncio.sleep(15)
|
|||
|
|
_update_heartbeat()
|
|||
|
|
|
|||
|
|
ok = 'marinetraffic.com' in page.url and 'auth.kpler' not in page.url
|
|||
|
|
if ok:
|
|||
|
|
print(f" Login: OK | {page.url[:60]}", flush=True)
|
|||
|
|
return True
|
|||
|
|
print(f" Login failed | {page.url[:80]}", flush=True)
|
|||
|
|
if attempt < max_retries - 1:
|
|||
|
|
try:
|
|||
|
|
await page.context.clear_cookies()
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
await asyncio.sleep(10)
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def main():
|
|||
|
|
parser = argparse.ArgumentParser()
|
|||
|
|
parser.add_argument('--probe', action='store_true', help='Test first 15 navigations')
|
|||
|
|
parser.add_argument('--start', type=int, default=0, help='Start from navigation index N')
|
|||
|
|
parser.add_argument('--wait', type=float, default=SETVIEW_WAIT, help='Wait after setView (default 3.0)')
|
|||
|
|
parser.add_argument('--delay', type=float, default=NAV_DELAY, help='Delay between navs (default 0.5)')
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
grid = generate_global_grid()
|
|||
|
|
total_navs = len(grid)
|
|||
|
|
print(f"Generated {total_navs} navigation points (zoom={ZOOM})")
|
|||
|
|
|
|||
|
|
if args.probe:
|
|||
|
|
grid = grid[:15]
|
|||
|
|
total_navs = 15
|
|||
|
|
print(f"PROBE MODE: testing first {total_navs} navigations")
|
|||
|
|
|
|||
|
|
# DB connection
|
|||
|
|
try:
|
|||
|
|
conn = db_connect()
|
|||
|
|
cur = conn.cursor()
|
|||
|
|
cur.execute('SELECT count(*) FROM mt_bulk_staging')
|
|||
|
|
existing_count = cur.fetchone()[0]
|
|||
|
|
cur.execute('SELECT ship_id FROM mt_bulk_staging WHERE ship_id IS NOT NULL')
|
|||
|
|
existing_ids = set(str(r[0]) for r in cur.fetchall())
|
|||
|
|
print(f"DB: {existing_count} existing vessels | {len(existing_ids)} ship_ids")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"DB ERROR: {e}")
|
|||
|
|
print("Start SSH tunnel: ssh -L 15432:127.0.0.1:5432 -N root@89.19.208.158")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
from playwright.async_api import async_playwright
|
|||
|
|
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
browser = await p.chromium.launch(
|
|||
|
|
headless=False,
|
|||
|
|
args=['--no-sandbox', '--disable-blink-features=AutomationControlled', '--disable-dev-shm-usage']
|
|||
|
|
)
|
|||
|
|
ctx = await browser.new_context(
|
|||
|
|
viewport={'width': 1200, 'height': 800},
|
|||
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
|||
|
|
'(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
|||
|
|
)
|
|||
|
|
page = await ctx.new_page()
|
|||
|
|
|
|||
|
|
COOKIE_FILE = 'mt_session_cookies.json'
|
|||
|
|
|
|||
|
|
# Try cookies for resume
|
|||
|
|
cookie_loaded = False
|
|||
|
|
if args.start > 0 and os.path.exists(COOKIE_FILE):
|
|||
|
|
try:
|
|||
|
|
with open(COOKIE_FILE) as f:
|
|||
|
|
saved_cookies = json.load(f)
|
|||
|
|
await ctx.add_cookies(saved_cookies)
|
|||
|
|
cookie_loaded = True
|
|||
|
|
print("Loaded saved cookies for resume")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"Cookie load failed: {e}")
|
|||
|
|
|
|||
|
|
if not cookie_loaded:
|
|||
|
|
if not await do_login(page):
|
|||
|
|
print("LOGIN FAILED!")
|
|||
|
|
await browser.close()
|
|||
|
|
conn.close()
|
|||
|
|
return
|
|||
|
|
try:
|
|||
|
|
await page.wait_for_url('**/ais/home**', timeout=15000)
|
|||
|
|
except Exception:
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
try:
|
|||
|
|
cookies = await ctx.cookies()
|
|||
|
|
with open(COOKIE_FILE, 'w') as f:
|
|||
|
|
json.dump(cookies, f)
|
|||
|
|
print(f"Saved {len(cookies)} cookies")
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
# Load map once (page.goto) — all subsequent moves via setView
|
|||
|
|
_update_heartbeat()
|
|||
|
|
print("\nLoading map (initial page.goto)...", flush=True)
|
|||
|
|
|
|||
|
|
# Capture tile responses
|
|||
|
|
captured_rows = []
|
|||
|
|
|
|||
|
|
async def on_tile_response(response):
|
|||
|
|
if 'getData/get_data_json_4' not in response.url:
|
|||
|
|
return
|
|||
|
|
try:
|
|||
|
|
body = await response.body()
|
|||
|
|
parsed = json.loads(body)
|
|||
|
|
rows = parsed.get('data', {}).get('rows', [])
|
|||
|
|
captured_rows.extend(rows)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
page.on('response', on_tile_response)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
await asyncio.wait_for(
|
|||
|
|
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:29/centery:40/zoom:{ZOOM}',
|
|||
|
|
wait_until='load', timeout=30000), timeout=35)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
await asyncio.sleep(8)
|
|||
|
|
|
|||
|
|
test_cargo = sum(1 for r in captured_rows if is_cargo_vessel(r))
|
|||
|
|
if len(captured_rows) == 0:
|
|||
|
|
print("WARNING: No tile data from initial load. Retrying...", flush=True)
|
|||
|
|
try:
|
|||
|
|
await asyncio.wait_for(
|
|||
|
|
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:4/centery:52/zoom:{ZOOM}',
|
|||
|
|
wait_until='load', timeout=30000), timeout=35)
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
await asyncio.sleep(10)
|
|||
|
|
test_cargo = sum(1 for r in captured_rows if is_cargo_vessel(r))
|
|||
|
|
if len(captured_rows) == 0:
|
|||
|
|
print("FATAL: Cannot capture tile data. Exiting.", flush=True)
|
|||
|
|
await browser.close()
|
|||
|
|
conn.close()
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"Map loaded! Test: {len(captured_rows)} vessels, {test_cargo} cargo/tanker", flush=True)
|
|||
|
|
|
|||
|
|
# Verify mtMap.setView is available
|
|||
|
|
has_setview = await page.evaluate("() => typeof window.mtMap !== 'undefined' && typeof window.mtMap.setView === 'function'")
|
|||
|
|
if not has_setview:
|
|||
|
|
print("ERROR: window.mtMap.setView not available! Falling back to page.goto.", flush=True)
|
|||
|
|
use_setview = False
|
|||
|
|
else:
|
|||
|
|
print("mtMap.setView() available -- using fast mode!", flush=True)
|
|||
|
|
use_setview = True
|
|||
|
|
|
|||
|
|
# === MAIN SCAN LOOP ===
|
|||
|
|
total_new = 0
|
|||
|
|
total_cargo = 0
|
|||
|
|
total_navs_done = 0
|
|||
|
|
total_vessels_seen = 0
|
|||
|
|
empty_navs = 0
|
|||
|
|
t0 = time.time()
|
|||
|
|
checkpoint_file = 'mt_global_tiles_progress.json'
|
|||
|
|
|
|||
|
|
# Load checkpoint
|
|||
|
|
if args.start > 0 and os.path.exists(checkpoint_file):
|
|||
|
|
try:
|
|||
|
|
with open(checkpoint_file) as f:
|
|||
|
|
cp = json.load(f)
|
|||
|
|
total_new = cp.get('total_new', 0)
|
|||
|
|
total_cargo = cp.get('total_cargo', 0)
|
|||
|
|
print(f"Resuming from nav {args.start} | new={total_new}")
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
print(f"\n{'='*65}")
|
|||
|
|
print(f"GLOBAL TILE SCAN: {total_navs} navigations, zoom={ZOOM}")
|
|||
|
|
print(f"Wait: {args.wait}s | Delay: {args.delay}s | Start: {args.start}")
|
|||
|
|
print(f"Mode: {'setView (fast)' if use_setview else 'page.goto (slow)'}")
|
|||
|
|
print(f"{'='*65}\n")
|
|||
|
|
|
|||
|
|
consecutive_errors = 0
|
|||
|
|
last_status_time = time.time()
|
|||
|
|
|
|||
|
|
for idx in range(args.start, total_navs):
|
|||
|
|
lat, lon, label = grid[idx]
|
|||
|
|
_update_heartbeat()
|
|||
|
|
|
|||
|
|
captured_rows.clear()
|
|||
|
|
|
|||
|
|
if use_setview:
|
|||
|
|
# Fast: just move the map via JS
|
|||
|
|
try:
|
|||
|
|
await page.evaluate(f"() => {{ window.mtMap.setView([{lat}, {lon}], {ZOOM}); }}")
|
|||
|
|
except Exception as e:
|
|||
|
|
err = str(e)[:60]
|
|||
|
|
if 'crash' in err.lower() or 'target closed' in err.lower():
|
|||
|
|
print(f" Nav {idx}: BROWSER CRASH -- exiting for restart", flush=True)
|
|||
|
|
break
|
|||
|
|
# setView failed — try page.goto as fallback
|
|||
|
|
print(f" Nav {idx}: setView failed ({err}), fallback to goto", flush=True)
|
|||
|
|
try:
|
|||
|
|
await asyncio.wait_for(
|
|||
|
|
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:{lon}/centery:{lat}/zoom:{ZOOM}',
|
|||
|
|
wait_until='load', timeout=35000), timeout=40)
|
|||
|
|
except Exception:
|
|||
|
|
consecutive_errors += 1
|
|||
|
|
if consecutive_errors >= 10:
|
|||
|
|
print("Too many errors. Exiting.", flush=True)
|
|||
|
|
break
|
|||
|
|
continue
|
|||
|
|
await asyncio.sleep(5)
|
|||
|
|
else:
|
|||
|
|
# Slow fallback: full page reload
|
|||
|
|
try:
|
|||
|
|
await asyncio.wait_for(
|
|||
|
|
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:{lon}/centery:{lat}/zoom:{ZOOM}',
|
|||
|
|
wait_until='load', timeout=35000), timeout=40)
|
|||
|
|
except Exception as e:
|
|||
|
|
if 'crash' in str(e).lower():
|
|||
|
|
print(f" Nav {idx}: CRASH -- exiting", flush=True)
|
|||
|
|
break
|
|||
|
|
await asyncio.sleep(3)
|
|||
|
|
consecutive_errors += 1
|
|||
|
|
if consecutive_errors >= 10:
|
|||
|
|
break
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
await asyncio.sleep(args.wait)
|
|||
|
|
_update_heartbeat()
|
|||
|
|
|
|||
|
|
# Process captured vessels
|
|||
|
|
nav_new = 0
|
|||
|
|
nav_cargo = 0
|
|||
|
|
total_vessels_seen += len(captured_rows)
|
|||
|
|
|
|||
|
|
if len(captured_rows) == 0:
|
|||
|
|
empty_navs += 1
|
|||
|
|
else:
|
|||
|
|
consecutive_errors = 0
|
|||
|
|
|
|||
|
|
for row in captured_rows:
|
|||
|
|
if not is_cargo_vessel(row):
|
|||
|
|
continue
|
|||
|
|
nav_cargo += 1
|
|||
|
|
|
|||
|
|
sid = str(row.get('SHIP_ID', ''))
|
|||
|
|
if not sid or sid in existing_ids:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
existing_ids.add(sid)
|
|||
|
|
nav_new += 1
|
|||
|
|
total_new += 1
|
|||
|
|
|
|||
|
|
cat = classify_vessel(row)
|
|||
|
|
gt = str(row.get('GT_SHIPTYPE', '') or '')
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
conn, cur = db_safe_execute(conn, cur, """
|
|||
|
|
INSERT INTO mt_bulk_staging
|
|||
|
|
(ship_id, name, flag, dwt, shiptype, gt_shiptype, type_category,
|
|||
|
|
lat, lon, speed, course, destination, scraped_at)
|
|||
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
|
|||
|
|
ON CONFLICT (ship_id) DO UPDATE SET
|
|||
|
|
lat = EXCLUDED.lat, lon = EXCLUDED.lon,
|
|||
|
|
speed = EXCLUDED.speed, course = EXCLUDED.course,
|
|||
|
|
type_category = COALESCE(EXCLUDED.type_category, mt_bulk_staging.type_category),
|
|||
|
|
scraped_at = NOW()
|
|||
|
|
""", (
|
|||
|
|
sid,
|
|||
|
|
row.get('SHIPNAME', ''),
|
|||
|
|
row.get('FLAG', ''),
|
|||
|
|
int(row.get('DWT', 0) or 0),
|
|||
|
|
str(row.get('SHIPTYPE', '')),
|
|||
|
|
gt,
|
|||
|
|
cat,
|
|||
|
|
row.get('LAT'),
|
|||
|
|
row.get('LON'),
|
|||
|
|
row.get('SPEED'),
|
|||
|
|
row.get('COURSE'),
|
|||
|
|
row.get('DESTINATION', ''),
|
|||
|
|
))
|
|||
|
|
conn.commit()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" DB error: {str(e)[:80]}", flush=True)
|
|||
|
|
|
|||
|
|
total_cargo += nav_cargo
|
|||
|
|
total_navs_done += 1
|
|||
|
|
|
|||
|
|
# Progress every 10 navs or when new vessels found
|
|||
|
|
now = time.time()
|
|||
|
|
if total_navs_done % 10 == 0 or nav_new > 0 or (now - last_status_time > 60):
|
|||
|
|
elapsed = now - t0
|
|||
|
|
rate = total_navs_done / max(elapsed, 1) * 60
|
|||
|
|
remaining_navs = total_navs - idx - 1
|
|||
|
|
eta_min = remaining_navs / max(rate / 60, 0.01) / 60
|
|||
|
|
print(f" [{idx+1}/{total_navs}] {label} ({lat:.1f},{lon:.1f}) | "
|
|||
|
|
f"seen={len(captured_rows)} cargo={nav_cargo} NEW={nav_new} | "
|
|||
|
|
f"total_new={total_new} DB={len(existing_ids)} | "
|
|||
|
|
f"{elapsed/60:.1f}m | ETA: {eta_min:.0f}m", flush=True)
|
|||
|
|
last_status_time = now
|
|||
|
|
|
|||
|
|
# Checkpoint every 50 navs
|
|||
|
|
if total_navs_done % 50 == 0:
|
|||
|
|
with open(checkpoint_file, 'w') as f:
|
|||
|
|
json.dump({
|
|||
|
|
'last_nav': idx,
|
|||
|
|
'total_new': total_new,
|
|||
|
|
'total_cargo': total_cargo,
|
|||
|
|
'total_navs': total_navs_done,
|
|||
|
|
'total_vessels_seen': total_vessels_seen,
|
|||
|
|
'db_count': len(existing_ids),
|
|||
|
|
}, f)
|
|||
|
|
|
|||
|
|
# GC every 30 navs
|
|||
|
|
if total_navs_done % 30 == 0:
|
|||
|
|
try:
|
|||
|
|
await page.evaluate('() => { if (typeof gc === "function") gc(); }')
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
await asyncio.sleep(args.delay)
|
|||
|
|
|
|||
|
|
# Final stats
|
|||
|
|
elapsed = time.time() - t0
|
|||
|
|
try:
|
|||
|
|
cur.execute('SELECT count(*) FROM mt_bulk_staging')
|
|||
|
|
final_count = cur.fetchone()[0]
|
|||
|
|
except Exception:
|
|||
|
|
final_count = len(existing_ids)
|
|||
|
|
|
|||
|
|
page.remove_listener('response', on_tile_response)
|
|||
|
|
|
|||
|
|
print(f"\n{'='*65}")
|
|||
|
|
print(f"GLOBAL TILE SCAN COMPLETE -- {elapsed/60:.1f} minutes")
|
|||
|
|
print(f" Navigations: {total_navs_done}")
|
|||
|
|
print(f" Empty navs: {empty_navs}")
|
|||
|
|
print(f" Vessels seen: {total_vessels_seen}")
|
|||
|
|
print(f" Cargo found: {total_cargo}")
|
|||
|
|
print(f" New vessels: {total_new}")
|
|||
|
|
print(f" DB total: {final_count}")
|
|||
|
|
print(f"{'='*65}")
|
|||
|
|
|
|||
|
|
with open(checkpoint_file, 'w') as f:
|
|||
|
|
json.dump({
|
|||
|
|
'last_nav': total_navs - 1,
|
|||
|
|
'total_new': total_new,
|
|||
|
|
'total_cargo': total_cargo,
|
|||
|
|
'total_navs': total_navs_done,
|
|||
|
|
'total_vessels_seen': total_vessels_seen,
|
|||
|
|
'db_count': final_count,
|
|||
|
|
'completed': True,
|
|||
|
|
'elapsed_min': elapsed / 60,
|
|||
|
|
}, f, indent=2)
|
|||
|
|
|
|||
|
|
await browser.close()
|
|||
|
|
conn.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
asyncio.run(main())
|