montana/Русский/Логистика/mt_global_tiles.py

706 lines
26 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
MT Global Tile Scanner — Fetch ALL cargo+tanker vessels worldwide via tile API.
Strategy:
1. Login + load map once via page.goto()
2. Move map via window.mtMap.setView() (no page reload, 1.7x faster)
3. Capture tile responses via page.on('response')
~1957 navigation points × ~3.5s = ~114 minutes for full global scan.
Usage:
python mt_global_tiles.py # Full global scan
python mt_global_tiles.py --probe # Test first 15 navigations
python mt_global_tiles.py --start 200 # Resume from navigation #200
"""
import asyncio, json, sys, os, time, struct, hmac, hashlib, base64, argparse
import threading
import psycopg2
# ---- WATCHDOG ----
_heartbeat_time = time.time()
_WATCHDOG_TIMEOUT = 300 # 5 min
def _update_heartbeat():
global _heartbeat_time
_heartbeat_time = time.time()
def _watchdog_thread():
while True:
time.sleep(10)
if time.time() - _heartbeat_time > _WATCHDOG_TIMEOUT:
print(f"\n[WATCHDOG] No heartbeat for {time.time() - _heartbeat_time:.0f}s -- exiting!", flush=True)
os._exit(2)
_wd = threading.Thread(target=_watchdog_thread, daemon=True)
_wd.start()
os.chdir(os.path.dirname(os.path.abspath(__file__)))
if hasattr(sys.stdout, 'reconfigure'):
sys.stdout.reconfigure(encoding='utf-8', errors='replace', line_buffering=True)
if hasattr(sys.stderr, 'reconfigure'):
sys.stderr.reconfigure(encoding='utf-8', errors='replace', line_buffering=True)
EMAIL = "operation@mrlogisticcorp.com"
PASSWORD = "NKh9i8Z!7fU9jfi"
TOTP_SECRET = "MNWTEPTFJZBUC32GJFEWY6LVKQ2GGYKH"
DB_URL = 'postgresql://seafare:SF_m0ntana_2026@127.0.0.1:15432/seafare_db'
ZOOM = 10
SETVIEW_WAIT = 3.0 # seconds to wait after setView for tiles to load
NAV_DELAY = 0.5 # seconds between navigations
# GT_SHIPTYPE codes for cargo + tanker vessels
CARGO_GT_TYPES = {
'6', '9', '11', '12', '21', '70', '122', # Cargo (green)
'4', '17', '18', '19', '56', '71', '88', # Tanker (red)
}
CARGO_TYPE_KEYWORDS = {
'bulk', 'cargo', 'container', 'tanker', 'chemical', 'oil',
'lpg', 'lng', 'reefer', 'ro-ro', 'roro', 'multipurpose',
'general', 'ore', 'cement', 'vehicle', 'barge', 'heavy load',
'fpso', 'fso', 'bitumen', 'asphalt', 'product',
}
def generate_global_grid():
"""Generate ~1950 navigation points covering all world shipping routes."""
points = []
def grid(lat_min, lat_max, lon_min, lon_max, step_lat, step_lon, label=""):
lat = lat_min
while lat <= lat_max:
lon = lon_min
while lon <= lon_max:
points.append((round(lat, 1), round(lon, 1), label))
lon += step_lon
lat += step_lat
# === EUROPE ===
grid(30, 46, -6, 36, 1.5, 2.0, "Mediterranean")
grid(40, 47, 27, 42, 1.5, 2.0, "Black Sea")
grid(53, 66, 10, 30, 1.5, 2.0, "Baltic")
grid(48, 62, -5, 10, 2.0, 2.5, "North Sea")
grid(36, 48, -10, -1, 2.5, 2.5, "Biscay/Iberia")
grid(58, 71, 5, 20, 2.5, 3.0, "Norway")
# === MIDDLE EAST ===
grid(12, 30, 32, 44, 1.5, 2.0, "Red Sea")
grid(23, 30, 47, 57, 1.5, 2.0, "Persian Gulf")
grid(10, 25, 55, 70, 2.5, 3.0, "Arabian Sea")
# === CASPIAN ===
grid(36, 47, 46, 55, 2.0, 2.0, "Caspian")
# === SOUTH ASIA / INDIAN OCEAN ===
grid(8, 23, 68, 78, 2.5, 2.5, "India West")
grid(5, 22, 78, 92, 2.5, 2.5, "Bay of Bengal")
grid(0, 10, 72, 82, 3.0, 3.0, "Sri Lanka")
grid(-15, 5, 38, 52, 3.0, 3.0, "East Africa")
grid(-26, -10, 32, 48, 3.0, 4.0, "Mozambique")
grid(-5, 15, 55, 95, 4.0, 5.0, "Indian Ocean lanes")
# === SOUTHEAST ASIA ===
grid(-2, 8, 95, 106, 1.5, 2.0, "Malacca")
grid(0, 22, 105, 120, 2.5, 2.5, "South China Sea")
grid(-8, 0, 105, 135, 2.5, 3.0, "Indonesia")
grid(5, 18, 118, 128, 2.5, 2.5, "Philippines")
# === EAST ASIA ===
grid(20, 40, 110, 125, 1.5, 2.0, "China coast")
grid(30, 42, 125, 142, 2.5, 2.5, "Korea/Japan")
grid(30, 45, 135, 146, 2.5, 3.0, "Japan East")
grid(22, 26, 117, 122, 1.5, 1.5, "Taiwan Strait")
# === OCEANIA ===
grid(-38, -10, 140, 155, 3.0, 3.0, "Australia East")
grid(-35, -15, 112, 125, 3.0, 3.0, "Australia West")
grid(-15, -8, 125, 145, 3.0, 4.0, "Australia North")
grid(-47, -34, 166, 178, 3.0, 4.0, "New Zealand")
# === NORTH ATLANTIC ===
grid(25, 45, -82, -65, 2.5, 2.5, "US East Coast")
grid(18, 30, -98, -80, 2.5, 3.0, "Gulf of Mexico")
grid(10, 22, -85, -60, 3.0, 4.0, "Caribbean")
grid(43, 52, -70, -50, 3.0, 4.0, "Canada East")
grid(40, 55, -50, -10, 4.0, 5.0, "North Atlantic lanes")
# === SOUTH ATLANTIC ===
grid(-30, 0, -50, -30, 3.0, 4.0, "Brazil")
grid(-5, 15, -20, 5, 3.0, 4.0, "West Africa")
grid(-35, -25, 15, 35, 3.0, 4.0, "South Africa")
grid(-35, -5, -30, 10, 5.0, 6.0, "South Atlantic lanes")
# === NORTH PACIFIC ===
grid(30, 50, -130, -115, 2.5, 3.0, "US West Coast")
grid(5, 15, -85, -75, 2.5, 2.5, "Panama")
grid(30, 50, -170, -130, 5.0, 6.0, "North Pacific lanes")
grid(20, 35, 140, 180, 4.0, 5.0, "West Pacific")
# === SOUTH PACIFIC ===
grid(-40, -5, -80, -70, 4.0, 3.0, "South America West")
# === ARCTIC / SUB-ARCTIC ===
grid(65, 72, 20, 70, 4.0, 5.0, "NSR approach")
# Deduplicate: remove points within ~0.5 degrees of each other
unique = []
seen = set()
for lat, lon, label in points:
key = (round(lat * 2) / 2, round(lon * 2) / 2)
if key not in seen:
seen.add(key)
unique.append((lat, lon, label))
return unique
def is_cargo_vessel(row):
gt = str(row.get('GT_SHIPTYPE', '') or '')
if gt in CARGO_GT_TYPES:
return True
st = str(row.get('SHIPTYPE', '') or '').lower()
for kw in CARGO_TYPE_KEYWORDS:
if kw in st:
return True
return False
def classify_vessel(row):
gt = str(row.get('GT_SHIPTYPE', '') or '')
if gt == '6':
return 'bulk'
elif gt == '11':
return 'container'
elif gt == '12':
return 'roro'
elif gt in ('4', '17', '18', '19', '56', '71', '88'):
return 'tanker'
elif gt in ('9', '70', '122', '21'):
return 'general'
st = str(row.get('SHIPTYPE', '') or '').lower()
if 'bulk' in st:
return 'bulk'
elif 'container' in st:
return 'container'
elif any(k in st for k in ('tanker', 'oil', 'chemical', 'lpg', 'lng')):
return 'tanker'
elif any(k in st for k in ('ro-ro', 'roro', 'vehicle')):
return 'roro'
return 'general'
def totp(secret):
s = secret.upper().replace(' ', '')
pad = (-len(s)) % 8
key = base64.b32decode(s + '=' * pad)
counter = int(time.time()) // 30
msg = struct.pack('>Q', counter)
h = hmac.new(key, msg, hashlib.sha1).digest()
offset = h[-1] & 0x0f
code = struct.unpack('>I', h[offset:offset+4])[0] & 0x7fffffff
return str(code % 1000000).zfill(6)
def totp_wait_for_fresh(min_remaining=8):
while True:
remaining = 30 - (int(time.time()) % 30)
if remaining >= min_remaining:
break
time.sleep(remaining + 1)
code = totp(TOTP_SECRET)
remaining = 30 - (int(time.time()) % 30)
print(f" [TOTP] Code={code} | {remaining}s remaining", flush=True)
return code
def db_connect():
return psycopg2.connect(
DB_URL, connect_timeout=15,
keepalives=1, keepalives_idle=30, keepalives_interval=10, keepalives_count=5
)
def _restart_ssh_tunnel():
import subprocess
try:
subprocess.run(['taskkill', '/F', '/IM', 'ssh.exe'], capture_output=True, timeout=5)
except Exception:
pass
time.sleep(2)
try:
subprocess.Popen(
['ssh', '-o', 'ServerAliveInterval=5', '-o', 'ServerAliveCountMax=120',
'-o', 'TCPKeepAlive=yes', '-o', 'StrictHostKeyChecking=no',
'-L', '15432:127.0.0.1:5432', '-N', 'root@89.19.208.158'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
print(f" [SSH] Tunnel restarted, waiting 5s...", flush=True)
time.sleep(5)
except Exception as e:
print(f" [SSH] Failed: {e}", flush=True)
def db_reconnect(conn):
try:
conn.close()
except Exception:
pass
for attempt in range(5):
try:
time.sleep(3)
c = db_connect()
print(f" [DB] Reconnected (attempt {attempt+1})", flush=True)
return c, c.cursor()
except Exception as e:
print(f" [DB] Attempt {attempt+1} failed: {e}", flush=True)
print(f" [DB] Restarting SSH tunnel...", flush=True)
_restart_ssh_tunnel()
for attempt in range(10):
try:
time.sleep(5)
c = db_connect()
print(f" [DB] Reconnected after tunnel restart (attempt {attempt+1})", flush=True)
return c, c.cursor()
except Exception as e:
print(f" [DB] Post-restart attempt {attempt+1} failed: {e}", flush=True)
if attempt == 4:
_restart_ssh_tunnel()
raise Exception("DB reconnect failed")
def db_safe_execute(conn, cur, query, params=None):
try:
cur.execute(query, params)
return conn, cur
except (psycopg2.InterfaceError, psycopg2.OperationalError) as e:
print(f" [DB] Lost on execute ({e}), reconnecting...", flush=True)
conn, cur = db_reconnect(conn)
cur.execute(query, params)
return conn, cur
except psycopg2.Error as e:
try:
conn.rollback()
except Exception:
pass
raise
async def do_login(page, max_retries=3):
for attempt in range(max_retries):
_update_heartbeat()
print(f"LOGIN (attempt {attempt+1}/{max_retries})...", flush=True)
try:
await asyncio.wait_for(
page.goto('https://www.marinetraffic.com/en/users/login',
wait_until='domcontentloaded', timeout=30000), timeout=35.0)
except Exception:
pass
await asyncio.sleep(3)
_update_heartbeat()
try:
await page.wait_for_selector('input[name="username"]', timeout=20000)
await page.fill('input[name="username"]', EMAIL, timeout=15000)
await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }')
try:
await page.wait_for_url('**/login/password**', timeout=20000)
except Exception:
await asyncio.sleep(5)
except Exception as e:
print(f" email step error: {e}", flush=True)
await asyncio.sleep(5)
try:
await page.wait_for_selector('input[name="password"]', timeout=20000)
await page.fill('input[name="password"]', PASSWORD, timeout=15000)
await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }')
except Exception as e:
print(f" password step error: {e}", flush=True)
try:
await page.wait_for_url('**/mfa**', timeout=25000)
except Exception:
await asyncio.sleep(5)
url = page.url
if 'auth.kpler' in url:
if 'mfa-login-options' in url:
try:
await page.evaluate("""() => {
for (const b of document.querySelectorAll('button')) {
if (b.textContent.includes('Authenticator')) { b.click(); return; }
}
}""")
await page.wait_for_url('**/mfa-otp-challenge**', timeout=20000)
except Exception:
await asyncio.sleep(5)
if 'mfa-otp-challenge' in page.url or 'auth.kpler' in page.url:
_update_heartbeat()
otp = totp_wait_for_fresh(8)
try:
await page.wait_for_selector('input[name="code"]', timeout=15000)
await page.fill('input[name="code"]', otp, timeout=15000)
await page.evaluate('() => { const b = document.querySelector("button[type=submit]"); if(b) b.click(); }')
except Exception:
pass
_update_heartbeat()
try:
await page.wait_for_url('**marinetraffic.com**', timeout=25000)
except Exception:
await asyncio.sleep(15)
_update_heartbeat()
ok = 'marinetraffic.com' in page.url and 'auth.kpler' not in page.url
if ok:
print(f" Login: OK | {page.url[:60]}", flush=True)
return True
print(f" Login failed | {page.url[:80]}", flush=True)
if attempt < max_retries - 1:
try:
await page.context.clear_cookies()
except Exception:
pass
await asyncio.sleep(10)
return False
async def main():
parser = argparse.ArgumentParser()
parser.add_argument('--probe', action='store_true', help='Test first 15 navigations')
parser.add_argument('--start', type=int, default=0, help='Start from navigation index N')
parser.add_argument('--wait', type=float, default=SETVIEW_WAIT, help='Wait after setView (default 3.0)')
parser.add_argument('--delay', type=float, default=NAV_DELAY, help='Delay between navs (default 0.5)')
args = parser.parse_args()
grid = generate_global_grid()
total_navs = len(grid)
print(f"Generated {total_navs} navigation points (zoom={ZOOM})")
if args.probe:
grid = grid[:15]
total_navs = 15
print(f"PROBE MODE: testing first {total_navs} navigations")
# DB connection
try:
conn = db_connect()
cur = conn.cursor()
cur.execute('SELECT count(*) FROM mt_bulk_staging')
existing_count = cur.fetchone()[0]
cur.execute('SELECT ship_id FROM mt_bulk_staging WHERE ship_id IS NOT NULL')
existing_ids = set(str(r[0]) for r in cur.fetchall())
print(f"DB: {existing_count} existing vessels | {len(existing_ids)} ship_ids")
except Exception as e:
print(f"DB ERROR: {e}")
print("Start SSH tunnel: ssh -L 15432:127.0.0.1:5432 -N root@89.19.208.158")
return
from playwright.async_api import async_playwright
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=False,
args=['--no-sandbox', '--disable-blink-features=AutomationControlled', '--disable-dev-shm-usage']
)
ctx = await browser.new_context(
viewport={'width': 1200, 'height': 800},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
)
page = await ctx.new_page()
COOKIE_FILE = 'mt_session_cookies.json'
# Try cookies for resume
cookie_loaded = False
if args.start > 0 and os.path.exists(COOKIE_FILE):
try:
with open(COOKIE_FILE) as f:
saved_cookies = json.load(f)
await ctx.add_cookies(saved_cookies)
cookie_loaded = True
print("Loaded saved cookies for resume")
except Exception as e:
print(f"Cookie load failed: {e}")
if not cookie_loaded:
if not await do_login(page):
print("LOGIN FAILED!")
await browser.close()
conn.close()
return
try:
await page.wait_for_url('**/ais/home**', timeout=15000)
except Exception:
await asyncio.sleep(5)
try:
cookies = await ctx.cookies()
with open(COOKIE_FILE, 'w') as f:
json.dump(cookies, f)
print(f"Saved {len(cookies)} cookies")
except Exception:
pass
# Load map once (page.goto) — all subsequent moves via setView
_update_heartbeat()
print("\nLoading map (initial page.goto)...", flush=True)
# Capture tile responses
captured_rows = []
async def on_tile_response(response):
if 'getData/get_data_json_4' not in response.url:
return
try:
body = await response.body()
parsed = json.loads(body)
rows = parsed.get('data', {}).get('rows', [])
captured_rows.extend(rows)
except Exception:
pass
page.on('response', on_tile_response)
try:
await asyncio.wait_for(
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:29/centery:40/zoom:{ZOOM}',
wait_until='load', timeout=30000), timeout=35)
except Exception:
pass
await asyncio.sleep(8)
test_cargo = sum(1 for r in captured_rows if is_cargo_vessel(r))
if len(captured_rows) == 0:
print("WARNING: No tile data from initial load. Retrying...", flush=True)
try:
await asyncio.wait_for(
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:4/centery:52/zoom:{ZOOM}',
wait_until='load', timeout=30000), timeout=35)
except Exception:
pass
await asyncio.sleep(10)
test_cargo = sum(1 for r in captured_rows if is_cargo_vessel(r))
if len(captured_rows) == 0:
print("FATAL: Cannot capture tile data. Exiting.", flush=True)
await browser.close()
conn.close()
return
print(f"Map loaded! Test: {len(captured_rows)} vessels, {test_cargo} cargo/tanker", flush=True)
# Verify mtMap.setView is available
has_setview = await page.evaluate("() => typeof window.mtMap !== 'undefined' && typeof window.mtMap.setView === 'function'")
if not has_setview:
print("ERROR: window.mtMap.setView not available! Falling back to page.goto.", flush=True)
use_setview = False
else:
print("mtMap.setView() available -- using fast mode!", flush=True)
use_setview = True
# === MAIN SCAN LOOP ===
total_new = 0
total_cargo = 0
total_navs_done = 0
total_vessels_seen = 0
empty_navs = 0
t0 = time.time()
checkpoint_file = 'mt_global_tiles_progress.json'
# Load checkpoint
if args.start > 0 and os.path.exists(checkpoint_file):
try:
with open(checkpoint_file) as f:
cp = json.load(f)
total_new = cp.get('total_new', 0)
total_cargo = cp.get('total_cargo', 0)
print(f"Resuming from nav {args.start} | new={total_new}")
except Exception:
pass
print(f"\n{'='*65}")
print(f"GLOBAL TILE SCAN: {total_navs} navigations, zoom={ZOOM}")
print(f"Wait: {args.wait}s | Delay: {args.delay}s | Start: {args.start}")
print(f"Mode: {'setView (fast)' if use_setview else 'page.goto (slow)'}")
print(f"{'='*65}\n")
consecutive_errors = 0
last_status_time = time.time()
for idx in range(args.start, total_navs):
lat, lon, label = grid[idx]
_update_heartbeat()
captured_rows.clear()
if use_setview:
# Fast: just move the map via JS
try:
await page.evaluate(f"() => {{ window.mtMap.setView([{lat}, {lon}], {ZOOM}); }}")
except Exception as e:
err = str(e)[:60]
if 'crash' in err.lower() or 'target closed' in err.lower():
print(f" Nav {idx}: BROWSER CRASH -- exiting for restart", flush=True)
break
# setView failed — try page.goto as fallback
print(f" Nav {idx}: setView failed ({err}), fallback to goto", flush=True)
try:
await asyncio.wait_for(
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:{lon}/centery:{lat}/zoom:{ZOOM}',
wait_until='load', timeout=35000), timeout=40)
except Exception:
consecutive_errors += 1
if consecutive_errors >= 10:
print("Too many errors. Exiting.", flush=True)
break
continue
await asyncio.sleep(5)
else:
# Slow fallback: full page reload
try:
await asyncio.wait_for(
page.goto(f'https://www.marinetraffic.com/en/ais/home/centerx:{lon}/centery:{lat}/zoom:{ZOOM}',
wait_until='load', timeout=35000), timeout=40)
except Exception as e:
if 'crash' in str(e).lower():
print(f" Nav {idx}: CRASH -- exiting", flush=True)
break
await asyncio.sleep(3)
consecutive_errors += 1
if consecutive_errors >= 10:
break
continue
await asyncio.sleep(args.wait)
_update_heartbeat()
# Process captured vessels
nav_new = 0
nav_cargo = 0
total_vessels_seen += len(captured_rows)
if len(captured_rows) == 0:
empty_navs += 1
else:
consecutive_errors = 0
for row in captured_rows:
if not is_cargo_vessel(row):
continue
nav_cargo += 1
sid = str(row.get('SHIP_ID', ''))
if not sid or sid in existing_ids:
continue
existing_ids.add(sid)
nav_new += 1
total_new += 1
cat = classify_vessel(row)
gt = str(row.get('GT_SHIPTYPE', '') or '')
try:
conn, cur = db_safe_execute(conn, cur, """
INSERT INTO mt_bulk_staging
(ship_id, name, flag, dwt, shiptype, gt_shiptype, type_category,
lat, lon, speed, course, destination, scraped_at)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
ON CONFLICT (ship_id) DO UPDATE SET
lat = EXCLUDED.lat, lon = EXCLUDED.lon,
speed = EXCLUDED.speed, course = EXCLUDED.course,
type_category = COALESCE(EXCLUDED.type_category, mt_bulk_staging.type_category),
scraped_at = NOW()
""", (
sid,
row.get('SHIPNAME', ''),
row.get('FLAG', ''),
int(row.get('DWT', 0) or 0),
str(row.get('SHIPTYPE', '')),
gt,
cat,
row.get('LAT'),
row.get('LON'),
row.get('SPEED'),
row.get('COURSE'),
row.get('DESTINATION', ''),
))
conn.commit()
except Exception as e:
print(f" DB error: {str(e)[:80]}", flush=True)
total_cargo += nav_cargo
total_navs_done += 1
# Progress every 10 navs or when new vessels found
now = time.time()
if total_navs_done % 10 == 0 or nav_new > 0 or (now - last_status_time > 60):
elapsed = now - t0
rate = total_navs_done / max(elapsed, 1) * 60
remaining_navs = total_navs - idx - 1
eta_min = remaining_navs / max(rate / 60, 0.01) / 60
print(f" [{idx+1}/{total_navs}] {label} ({lat:.1f},{lon:.1f}) | "
f"seen={len(captured_rows)} cargo={nav_cargo} NEW={nav_new} | "
f"total_new={total_new} DB={len(existing_ids)} | "
f"{elapsed/60:.1f}m | ETA: {eta_min:.0f}m", flush=True)
last_status_time = now
# Checkpoint every 50 navs
if total_navs_done % 50 == 0:
with open(checkpoint_file, 'w') as f:
json.dump({
'last_nav': idx,
'total_new': total_new,
'total_cargo': total_cargo,
'total_navs': total_navs_done,
'total_vessels_seen': total_vessels_seen,
'db_count': len(existing_ids),
}, f)
# GC every 30 navs
if total_navs_done % 30 == 0:
try:
await page.evaluate('() => { if (typeof gc === "function") gc(); }')
except Exception:
pass
await asyncio.sleep(args.delay)
# Final stats
elapsed = time.time() - t0
try:
cur.execute('SELECT count(*) FROM mt_bulk_staging')
final_count = cur.fetchone()[0]
except Exception:
final_count = len(existing_ids)
page.remove_listener('response', on_tile_response)
print(f"\n{'='*65}")
print(f"GLOBAL TILE SCAN COMPLETE -- {elapsed/60:.1f} minutes")
print(f" Navigations: {total_navs_done}")
print(f" Empty navs: {empty_navs}")
print(f" Vessels seen: {total_vessels_seen}")
print(f" Cargo found: {total_cargo}")
print(f" New vessels: {total_new}")
print(f" DB total: {final_count}")
print(f"{'='*65}")
with open(checkpoint_file, 'w') as f:
json.dump({
'last_nav': total_navs - 1,
'total_new': total_new,
'total_cargo': total_cargo,
'total_navs': total_navs_done,
'total_vessels_seen': total_vessels_seen,
'db_count': final_count,
'completed': True,
'elapsed_min': elapsed / 60,
}, f, indent=2)
await browser.close()
conn.close()
if __name__ == '__main__':
asyncio.run(main())