883 lines
35 KiB
Python
883 lines
35 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
MT Regional Scraper — Full pipeline for specific sea regions.
|
||
|
|
|
||
|
|
For each region: tile scraping → save to DB → MMSI lookup → ownership fetch.
|
||
|
|
One browser session, one script, complete data for each region.
|
||
|
|
|
||
|
|
Regions: Caspian → Mediterranean → Baltic → everything else.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python mt_regional_scraper.py --region caspian # Caspian Sea only
|
||
|
|
python mt_regional_scraper.py --region med # Mediterranean only
|
||
|
|
python mt_regional_scraper.py --region baltic # Baltic only
|
||
|
|
python mt_regional_scraper.py --region all # All regions sequentially
|
||
|
|
python mt_regional_scraper.py --region caspian --phase tiles # Only tile phase
|
||
|
|
python mt_regional_scraper.py --region caspian --phase mmsi # Only MMSI phase
|
||
|
|
python mt_regional_scraper.py --region caspian --phase owner # Only ownership phase
|
||
|
|
"""
|
||
|
|
import asyncio, json, sys, os, time, re, struct, hmac, hashlib, base64, argparse, math
|
||
|
|
import psycopg2
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
||
|
|
if hasattr(sys.stdout, 'reconfigure'):
|
||
|
|
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||
|
|
if hasattr(sys.stderr, 'reconfigure'):
|
||
|
|
sys.stderr.reconfigure(encoding='utf-8', errors='replace')
|
||
|
|
|
||
|
|
EMAIL = "operation@mrlogisticcorp.com"
|
||
|
|
PASSWORD = "NKh9i8Z!7fU9jfi"
|
||
|
|
TOTP_SECRET = "MNWTEPTFJZBUC32GJFEWY6LVKQ2GGYKH"
|
||
|
|
DB_URL = 'postgresql://seafare:SF_m0ntana_2026@127.0.0.1:15432/seafare_db'
|
||
|
|
|
||
|
|
OWNERSHIP_COLS = 'shipname,imo,flag,beneficial_owner,operator,registered_owner'
|
||
|
|
CKPT_PREFIX = 'mt_regional_'
|
||
|
|
|
||
|
|
|
||
|
|
def db_connect():
|
||
|
|
"""Connect to DB with keepalive to survive SSH tunnel drops."""
|
||
|
|
return psycopg2.connect(
|
||
|
|
DB_URL, connect_timeout=15,
|
||
|
|
keepalives=1, keepalives_idle=30, keepalives_interval=10, keepalives_count=5
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def _restart_ssh_tunnel():
|
||
|
|
"""Kill and restart the SSH tunnel to prod DB."""
|
||
|
|
import subprocess
|
||
|
|
# Kill existing ssh
|
||
|
|
try:
|
||
|
|
subprocess.run(['taskkill', '/F', '/IM', 'ssh.exe'],
|
||
|
|
capture_output=True, timeout=5)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
time.sleep(2)
|
||
|
|
# Start new tunnel in background
|
||
|
|
try:
|
||
|
|
subprocess.Popen(
|
||
|
|
['ssh', '-o', 'ServerAliveInterval=5', '-o', 'ServerAliveCountMax=120',
|
||
|
|
'-o', 'TCPKeepAlive=yes', '-o', 'StrictHostKeyChecking=no',
|
||
|
|
'-L', '15432:127.0.0.1:5432', '-N', 'root@89.19.208.158'],
|
||
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||
|
|
)
|
||
|
|
print(f" [SSH] Tunnel restarted, waiting 5s...")
|
||
|
|
time.sleep(5)
|
||
|
|
except Exception as e:
|
||
|
|
print(f" [SSH] Failed to restart: {e}")
|
||
|
|
|
||
|
|
|
||
|
|
def db_reconnect(conn):
|
||
|
|
"""Close old connection and reconnect. Returns (conn, cursor).
|
||
|
|
Will restart SSH tunnel if needed."""
|
||
|
|
try:
|
||
|
|
conn.close()
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
# First try: just reconnect (tunnel might still be alive)
|
||
|
|
for attempt in range(5):
|
||
|
|
try:
|
||
|
|
time.sleep(3)
|
||
|
|
new_conn = db_connect()
|
||
|
|
print(f" [DB] Reconnected (attempt {attempt+1})")
|
||
|
|
return new_conn, new_conn.cursor()
|
||
|
|
except Exception as e:
|
||
|
|
print(f" [DB] Reconnect attempt {attempt+1} failed: {e}")
|
||
|
|
# Second try: restart SSH tunnel and reconnect
|
||
|
|
print(f" [DB] Restarting SSH tunnel...")
|
||
|
|
_restart_ssh_tunnel()
|
||
|
|
for attempt in range(10):
|
||
|
|
try:
|
||
|
|
time.sleep(5)
|
||
|
|
new_conn = db_connect()
|
||
|
|
print(f" [DB] Reconnected after tunnel restart (attempt {attempt+1})")
|
||
|
|
return new_conn, new_conn.cursor()
|
||
|
|
except Exception as e:
|
||
|
|
print(f" [DB] Post-restart attempt {attempt+1} failed: {e}")
|
||
|
|
if attempt == 4: # try restarting tunnel again
|
||
|
|
_restart_ssh_tunnel()
|
||
|
|
raise Exception("DB reconnect failed after tunnel restart")
|
||
|
|
|
||
|
|
|
||
|
|
def db_safe_commit(conn):
|
||
|
|
"""Commit with reconnect if connection died (SSH tunnel drop)."""
|
||
|
|
try:
|
||
|
|
conn.commit()
|
||
|
|
return conn
|
||
|
|
except (psycopg2.InterfaceError, psycopg2.OperationalError):
|
||
|
|
print(f" [DB] Connection lost on commit, reconnecting...")
|
||
|
|
conn, _ = db_reconnect(conn)
|
||
|
|
return conn
|
||
|
|
|
||
|
|
|
||
|
|
def db_safe_execute(conn, cur, query, params=None):
|
||
|
|
"""Execute query with auto-reconnect on connection drop. Returns (conn, cur)."""
|
||
|
|
try:
|
||
|
|
cur.execute(query, params)
|
||
|
|
return conn, cur
|
||
|
|
except (psycopg2.InterfaceError, psycopg2.OperationalError) as e:
|
||
|
|
print(f" [DB] Connection lost on execute ({e}), reconnecting...")
|
||
|
|
conn, cur = db_reconnect(conn)
|
||
|
|
# Retry the query on new connection
|
||
|
|
cur.execute(query, params)
|
||
|
|
return conn, cur
|
||
|
|
|
||
|
|
# ─── REGIONS: dense grid points at zoom=8 for max coverage ──────────────────
|
||
|
|
# Each region has: name, zoom, [(lat, lon), ...] grid points
|
||
|
|
# At zoom=8, each view covers ~5-6 degrees, so points every ~4 degrees for overlap.
|
||
|
|
# At zoom=9, each view covers ~2.5 degrees. Using zoom=8 for wider capture.
|
||
|
|
REGIONS = {
|
||
|
|
'caspian': {
|
||
|
|
'name': 'Caspian Sea',
|
||
|
|
'zoom': 10, # max detail! At zoom=10 every vessel visible
|
||
|
|
'grid': [
|
||
|
|
# Dense Caspian coverage with zoom=10 (each view ~1 degree)
|
||
|
|
# South Caspian (Iran coast: Anzali, Noshahr, Amirabad)
|
||
|
|
(37.0, 49.0), (37.0, 50.0), (37.0, 51.0), (37.0, 52.0), (37.0, 53.0),
|
||
|
|
(36.8, 49.5), (36.8, 50.5), (36.8, 51.5), (36.8, 52.5),
|
||
|
|
(37.5, 49.0), (37.5, 50.0), (37.5, 51.0), (37.5, 52.0),
|
||
|
|
# Baku area (major hub)
|
||
|
|
(39.8, 49.5), (40.0, 49.8), (40.3, 50.0), (40.5, 50.3),
|
||
|
|
(39.5, 49.0), (39.5, 50.0), (40.0, 50.5),
|
||
|
|
# Turkmenistan coast (Turkmenbashi)
|
||
|
|
(39.5, 52.5), (40.0, 53.0), (40.5, 52.5),
|
||
|
|
(39.0, 52.0), (38.5, 53.0), (38.0, 52.5),
|
||
|
|
# Central Caspian
|
||
|
|
(40.0, 51.0), (41.0, 51.0), (42.0, 51.0),
|
||
|
|
(40.5, 52.0), (41.5, 52.0),
|
||
|
|
# Kazakhstan (Aktau port)
|
||
|
|
(43.0, 51.0), (43.6, 51.2), (44.0, 51.5),
|
||
|
|
(43.0, 52.0), (44.0, 52.0),
|
||
|
|
# North Caspian
|
||
|
|
(45.0, 51.0), (45.0, 52.0), (46.0, 51.5),
|
||
|
|
(46.5, 51.0), (47.0, 51.5), (47.5, 52.0),
|
||
|
|
# Astrakhan / Volga delta area
|
||
|
|
(45.5, 48.5), (46.0, 49.0), (46.5, 49.5),
|
||
|
|
(45.0, 49.0), (45.5, 49.5),
|
||
|
|
# Dagestan coast (Makhachkala)
|
||
|
|
(42.5, 47.5), (43.0, 47.5), (43.5, 48.0),
|
||
|
|
(42.0, 48.5), (42.5, 48.0),
|
||
|
|
# West coast (Azerbaijan south)
|
||
|
|
(38.5, 49.0), (39.0, 49.5), (41.0, 49.0),
|
||
|
|
(38.0, 49.5), (41.5, 50.0),
|
||
|
|
],
|
||
|
|
},
|
||
|
|
'med': {
|
||
|
|
'name': 'Mediterranean Sea',
|
||
|
|
'zoom': 10, # max detail
|
||
|
|
'grid': [
|
||
|
|
# Gibraltar / Strait
|
||
|
|
(36.0, -5.5), (36.0, -4.0), (36.5, -5.0), (36.5, -3.0),
|
||
|
|
# Alboran Sea
|
||
|
|
(36.0, -2.5), (36.0, -1.0), (36.5, -2.0), (37.0, -1.5),
|
||
|
|
# Spanish coast (Cartagena, Valencia, Barcelona)
|
||
|
|
(37.5, -1.0), (38.0, 0.0), (39.0, 0.5), (39.5, -0.5),
|
||
|
|
(40.0, 0.5), (41.0, 1.0), (41.5, 2.5),
|
||
|
|
# Balearics
|
||
|
|
(39.5, 2.5), (39.0, 3.5),
|
||
|
|
# French Riviera / Marseille
|
||
|
|
(43.0, 5.0), (43.5, 5.5), (43.0, 7.0),
|
||
|
|
# Gulf of Lion
|
||
|
|
(42.5, 3.5), (42.5, 5.0),
|
||
|
|
# Corsica/Sardinia
|
||
|
|
(41.5, 9.0), (40.0, 9.5), (39.0, 9.0),
|
||
|
|
# Genoa / NW Italy
|
||
|
|
(44.0, 9.0), (43.5, 10.5),
|
||
|
|
# Tyrrhenian Sea
|
||
|
|
(41.0, 13.0), (40.5, 14.0), (39.5, 14.5), (38.5, 15.5),
|
||
|
|
(40.0, 12.0), (41.5, 12.5),
|
||
|
|
# Sicily / Malta
|
||
|
|
(37.5, 15.0), (37.0, 14.0), (36.0, 14.5), (35.5, 14.5),
|
||
|
|
# Tunisia/Libya coast
|
||
|
|
(37.0, 10.0), (36.5, 11.0), (35.0, 12.0), (33.0, 13.0),
|
||
|
|
(32.5, 15.0), (32.0, 20.0), (31.5, 25.0),
|
||
|
|
# Adriatic (south to north)
|
||
|
|
(40.0, 18.5), (41.0, 17.0), (42.0, 16.5), (43.0, 15.5),
|
||
|
|
(44.0, 13.5), (44.5, 12.5), (45.0, 13.5),
|
||
|
|
(41.5, 19.0), (39.5, 19.5),
|
||
|
|
# Ionian Sea
|
||
|
|
(38.0, 20.5), (37.5, 21.5), (36.5, 21.0),
|
||
|
|
# Crete
|
||
|
|
(35.5, 24.0), (35.0, 25.5), (35.5, 26.5),
|
||
|
|
# Aegean
|
||
|
|
(37.5, 24.0), (38.0, 25.0), (38.5, 23.5), (39.0, 26.0),
|
||
|
|
(39.5, 25.5), (37.0, 25.5), (38.5, 26.5),
|
||
|
|
# Dardanelles / Marmara
|
||
|
|
(40.5, 27.0), (40.0, 26.0), (41.0, 29.0),
|
||
|
|
# South Turkey coast
|
||
|
|
(36.5, 28.5), (36.5, 30.0), (36.5, 32.0), (36.5, 34.0),
|
||
|
|
(37.0, 35.5), (36.0, 33.0),
|
||
|
|
# Cyprus
|
||
|
|
(34.5, 33.0), (35.0, 34.0), (34.0, 32.5),
|
||
|
|
# Lebanon/Israel
|
||
|
|
(33.5, 35.0), (32.5, 34.5), (31.5, 34.0),
|
||
|
|
# Egypt (Alexandria, Port Said, Suez Canal entry)
|
||
|
|
(31.5, 30.0), (31.0, 32.0), (31.5, 32.5),
|
||
|
|
(30.0, 32.5), (30.5, 33.0),
|
||
|
|
# Libya south / offshore
|
||
|
|
(33.5, 12.0), (33.5, 15.0), (34.0, 11.0),
|
||
|
|
# Algeria coast
|
||
|
|
(36.5, 1.0), (37.0, 3.0), (37.0, 6.0),
|
||
|
|
(36.5, 8.0),
|
||
|
|
],
|
||
|
|
},
|
||
|
|
'baltic': {
|
||
|
|
'name': 'Baltic Sea',
|
||
|
|
'zoom': 10, # max detail
|
||
|
|
'grid': [
|
||
|
|
# Danish Straits / Kattegat / Skagerrak
|
||
|
|
(55.5, 8.0), (55.5, 9.0), (55.5, 10.0), (55.5, 11.0),
|
||
|
|
(56.0, 8.5), (56.0, 10.0), (56.0, 11.5),
|
||
|
|
(56.5, 8.5), (56.5, 10.0), (56.5, 12.0),
|
||
|
|
(57.0, 9.0), (57.0, 10.5), (57.5, 8.5), (57.5, 10.0),
|
||
|
|
(58.0, 10.0), (58.5, 10.5),
|
||
|
|
# South Baltic (Kiel, Rostock, Gdansk, Kaliningrad)
|
||
|
|
(54.0, 10.5), (54.0, 12.0), (54.0, 13.5),
|
||
|
|
(54.5, 10.0), (54.5, 11.5), (54.5, 13.0), (54.5, 14.5),
|
||
|
|
(54.5, 16.0), (54.5, 18.0), (54.5, 19.5),
|
||
|
|
(55.0, 12.0), (55.0, 14.0), (55.0, 16.0), (55.0, 18.0), (55.0, 20.0),
|
||
|
|
# Central Baltic (Sweden east coast, Latvia, Lithuania)
|
||
|
|
(55.5, 13.0), (55.5, 15.0), (55.5, 17.0), (55.5, 19.0), (55.5, 21.0),
|
||
|
|
(56.0, 14.0), (56.0, 16.0), (56.0, 18.0), (56.0, 20.0),
|
||
|
|
(56.5, 14.0), (56.5, 16.0), (56.5, 18.0), (56.5, 20.0),
|
||
|
|
(57.0, 17.0), (57.0, 19.0), (57.0, 21.0),
|
||
|
|
(57.5, 18.0), (57.5, 20.0),
|
||
|
|
(58.0, 17.0), (58.0, 19.0),
|
||
|
|
# Gotland / Öland area
|
||
|
|
(57.5, 16.0), (57.0, 18.5),
|
||
|
|
# Gulf of Riga
|
||
|
|
(57.5, 22.0), (57.5, 24.0), (58.0, 23.0),
|
||
|
|
# Estonia / Gulf of Finland
|
||
|
|
(59.0, 21.0), (59.0, 23.0), (59.0, 24.5),
|
||
|
|
(59.5, 22.0), (59.5, 23.5), (59.5, 25.0), (59.5, 27.0),
|
||
|
|
(60.0, 24.0), (60.0, 25.5), (60.0, 27.0), (60.0, 29.0),
|
||
|
|
(60.5, 25.0), (60.5, 27.0), (60.5, 29.0),
|
||
|
|
# Stockholm area
|
||
|
|
(59.0, 18.0), (59.5, 18.5),
|
||
|
|
# Åland Sea
|
||
|
|
(60.0, 19.5), (60.0, 21.0),
|
||
|
|
# Gulf of Bothnia
|
||
|
|
(61.0, 18.0), (61.0, 20.0), (61.0, 22.0),
|
||
|
|
(62.0, 18.0), (62.0, 20.0), (62.0, 22.0),
|
||
|
|
(63.0, 18.5), (63.0, 20.5),
|
||
|
|
(64.0, 20.0), (64.0, 22.0),
|
||
|
|
(65.0, 22.0), (65.0, 24.0),
|
||
|
|
(65.5, 23.5),
|
||
|
|
# St. Petersburg approach
|
||
|
|
(59.5, 28.5), (60.0, 28.5),
|
||
|
|
],
|
||
|
|
},
|
||
|
|
'blacksea': {
|
||
|
|
'name': 'Black Sea',
|
||
|
|
'zoom': 10,
|
||
|
|
'grid': [
|
||
|
|
# Bosphorus / Istanbul approach
|
||
|
|
(41.0, 29.0), (41.5, 29.5), (41.0, 28.0),
|
||
|
|
# West Black Sea (Bulgaria, Romania)
|
||
|
|
(43.0, 28.0), (43.5, 28.5), (42.5, 28.0),
|
||
|
|
(44.0, 29.0), (44.5, 29.5),
|
||
|
|
# Constanta / Odessa area
|
||
|
|
(44.0, 30.0), (44.5, 30.5), (45.0, 30.0),
|
||
|
|
(45.5, 30.0), (45.5, 31.0), (46.0, 31.0), (46.5, 31.5),
|
||
|
|
# Central Black Sea
|
||
|
|
(43.0, 31.0), (43.0, 33.0), (43.0, 35.0),
|
||
|
|
(44.0, 32.0), (44.0, 34.0), (44.0, 36.0),
|
||
|
|
# Turkey north coast
|
||
|
|
(42.0, 33.0), (42.0, 35.0), (42.0, 37.0),
|
||
|
|
(41.5, 31.0), (41.5, 33.0), (41.5, 36.0),
|
||
|
|
(41.0, 37.0), (41.0, 39.0), (41.0, 41.0),
|
||
|
|
# Trabzon / Batumi area
|
||
|
|
(41.5, 40.0), (41.5, 41.5), (42.0, 41.0),
|
||
|
|
# Samsun area
|
||
|
|
(41.5, 36.0), (41.0, 35.5),
|
||
|
|
# East Black Sea / Georgia
|
||
|
|
(42.0, 39.0), (42.0, 41.5),
|
||
|
|
(42.5, 37.0), (43.0, 38.0), (43.0, 40.0),
|
||
|
|
# Novorossiysk / Tuapse
|
||
|
|
(44.5, 37.5), (44.0, 38.0), (44.5, 38.5),
|
||
|
|
# Crimea / Sevastopol
|
||
|
|
(44.5, 33.0), (44.5, 34.0), (45.0, 33.5),
|
||
|
|
# Sea of Azov
|
||
|
|
(46.0, 37.0), (46.5, 38.0), (47.0, 38.5),
|
||
|
|
(45.5, 36.0), (46.0, 39.0),
|
||
|
|
],
|
||
|
|
},
|
||
|
|
'redsea': {
|
||
|
|
'name': 'Red Sea + Suez',
|
||
|
|
'zoom': 10,
|
||
|
|
'grid': [
|
||
|
|
# Suez Canal
|
||
|
|
(30.5, 32.5), (30.0, 32.5), (31.0, 32.5),
|
||
|
|
# Gulf of Suez
|
||
|
|
(29.5, 32.5), (29.0, 33.0), (28.5, 33.5), (28.0, 33.5),
|
||
|
|
(27.5, 34.0),
|
||
|
|
# North Red Sea
|
||
|
|
(27.0, 34.5), (26.5, 35.0), (26.0, 35.5), (25.5, 36.0),
|
||
|
|
(25.0, 36.5),
|
||
|
|
# Central Red Sea
|
||
|
|
(24.0, 37.0), (23.0, 37.5), (22.0, 38.0),
|
||
|
|
(21.0, 38.5), (20.0, 39.0), (19.0, 39.5),
|
||
|
|
# South Red Sea
|
||
|
|
(18.0, 40.0), (17.0, 40.5), (16.0, 41.5),
|
||
|
|
(15.0, 42.0), (14.0, 42.5),
|
||
|
|
# Bab el-Mandeb / Djibouti
|
||
|
|
(13.0, 43.0), (12.5, 43.5), (12.0, 44.0),
|
||
|
|
(11.5, 43.0),
|
||
|
|
# Gulf of Aden
|
||
|
|
(12.0, 45.0), (12.5, 46.0), (12.0, 47.0),
|
||
|
|
(13.0, 48.0), (12.5, 49.0), (12.0, 50.0),
|
||
|
|
# Jeddah area
|
||
|
|
(21.5, 39.0), (22.0, 39.0),
|
||
|
|
],
|
||
|
|
},
|
||
|
|
'persian': {
|
||
|
|
'name': 'Persian Gulf',
|
||
|
|
'zoom': 10,
|
||
|
|
'grid': [
|
||
|
|
# Strait of Hormuz
|
||
|
|
(26.0, 56.0), (26.5, 56.5), (26.0, 57.0),
|
||
|
|
(25.5, 56.5), (25.0, 57.0),
|
||
|
|
# UAE coast (Fujairah, Dubai, Abu Dhabi)
|
||
|
|
(25.0, 55.0), (25.5, 55.5), (25.0, 54.0),
|
||
|
|
(24.5, 54.5), (24.5, 53.5), (24.0, 53.0),
|
||
|
|
# Qatar / Bahrain
|
||
|
|
(26.0, 51.0), (26.0, 50.5), (25.5, 51.5),
|
||
|
|
(25.5, 50.5),
|
||
|
|
# Kuwait
|
||
|
|
(29.0, 48.5), (29.5, 48.5), (29.0, 49.0),
|
||
|
|
# Iraq (Basra)
|
||
|
|
(29.5, 48.0), (30.0, 48.5),
|
||
|
|
# Iran coast
|
||
|
|
(27.0, 52.0), (27.5, 52.5), (27.0, 53.5),
|
||
|
|
(28.0, 51.0), (28.5, 50.5),
|
||
|
|
# Central Gulf
|
||
|
|
(26.5, 52.0), (26.5, 54.0),
|
||
|
|
(27.0, 50.0), (27.0, 51.0),
|
||
|
|
# Saudi coast
|
||
|
|
(26.5, 50.0), (26.0, 49.5),
|
||
|
|
(28.0, 49.0),
|
||
|
|
# Oman Sea / Muscat
|
||
|
|
(23.5, 58.5), (24.0, 57.5),
|
||
|
|
(23.0, 58.0),
|
||
|
|
],
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
# Include ALL cargo types, not just bulk
|
||
|
|
ALL_CARGO_TYPES = {'4', '5', '6', '7', '8', '9'} # all that appear as "green"
|
||
|
|
MIN_DWT = 1000 # minimum DWT for any cargo vessel
|
||
|
|
|
||
|
|
# GT_SHIPTYPE values for GREEN (cargo) vessels on MT map
|
||
|
|
GREEN_GT_TYPES = ('6', '9', '11', '12', '70', '122', '139') # bulk, cargo, container, roro, multipurpose
|
||
|
|
|
||
|
|
|
||
|
|
def totp(secret):
|
||
|
|
s = secret.upper().replace(' ', '')
|
||
|
|
pad = (-len(s)) % 8
|
||
|
|
key = base64.b32decode(s + '=' * pad)
|
||
|
|
counter = int(time.time()) // 30
|
||
|
|
msg = struct.pack('>Q', counter)
|
||
|
|
h = hmac.new(key, msg, hashlib.sha1).digest()
|
||
|
|
offset = h[-1] & 0x0f
|
||
|
|
code = struct.unpack('>I', h[offset:offset+4])[0] & 0x7fffffff
|
||
|
|
return str(code % 1000000).zfill(6)
|
||
|
|
|
||
|
|
|
||
|
|
def ts():
|
||
|
|
return datetime.now().strftime('%H:%M:%S')
|
||
|
|
|
||
|
|
|
||
|
|
async def do_login(page, max_retries=2):
|
||
|
|
for attempt in range(max_retries):
|
||
|
|
print(f"[{ts()}] LOGIN (attempt {attempt+1}/{max_retries})...")
|
||
|
|
await page.goto('https://www.marinetraffic.com/en/users/login',
|
||
|
|
wait_until='domcontentloaded', timeout=30000)
|
||
|
|
await asyncio.sleep(3)
|
||
|
|
await page.fill('input[name="username"]', EMAIL)
|
||
|
|
await page.click('button[type="submit"]')
|
||
|
|
await asyncio.sleep(3)
|
||
|
|
await page.fill('input[type="password"]', PASSWORD)
|
||
|
|
await page.click('button[type="submit"]')
|
||
|
|
await asyncio.sleep(4)
|
||
|
|
|
||
|
|
# Handle 2FA - might be method selection or direct OTP entry
|
||
|
|
if 'mfa' in page.url.lower() or 'auth.kpler' in page.url:
|
||
|
|
# Try clicking Google Authenticator if method selection screen
|
||
|
|
try:
|
||
|
|
await page.click('button:has-text("Google Authenticator")', timeout=3000)
|
||
|
|
await asyncio.sleep(2)
|
||
|
|
except Exception:
|
||
|
|
pass # Already on OTP input screen
|
||
|
|
|
||
|
|
# Wait for OTP field, then generate fresh TOTP right before typing
|
||
|
|
await asyncio.sleep(1)
|
||
|
|
otp = totp(TOTP_SECRET) # Generate as close to fill as possible
|
||
|
|
print(f" TOTP: {otp}")
|
||
|
|
try:
|
||
|
|
await page.fill('input[name="code"]', otp)
|
||
|
|
await page.click('button[type="submit"]')
|
||
|
|
except Exception:
|
||
|
|
# Try alternative selectors
|
||
|
|
try:
|
||
|
|
await page.fill('input[type="text"]', otp)
|
||
|
|
await page.click('button[type="submit"]')
|
||
|
|
except Exception as e:
|
||
|
|
print(f" 2FA fill error: {e}")
|
||
|
|
|
||
|
|
await asyncio.sleep(8) # Wait longer for redirect after 2FA
|
||
|
|
|
||
|
|
ok = 'marinetraffic.com' in page.url and 'auth.kpler' not in page.url
|
||
|
|
if ok:
|
||
|
|
print(f"[{ts()}] Login: OK | {page.url[:80]}")
|
||
|
|
return True
|
||
|
|
else:
|
||
|
|
print(f"[{ts()}] Login attempt {attempt+1} failed | {page.url[:80]}")
|
||
|
|
if attempt < max_retries - 1:
|
||
|
|
print(f" Retrying in 5s...")
|
||
|
|
await asyncio.sleep(5)
|
||
|
|
|
||
|
|
print(f"[{ts()}] Login: FAILED after {max_retries} attempts")
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def is_cargo(row):
|
||
|
|
"""Return True if tile row is a cargo/bulk vessel worth collecting."""
|
||
|
|
st = str(row.get('SHIPTYPE', '') or '')
|
||
|
|
dwt = int(row.get('DWT', 0) or 0)
|
||
|
|
# Collect all cargo types (green on map) with reasonable DWT
|
||
|
|
if st in ALL_CARGO_TYPES and dwt >= MIN_DWT:
|
||
|
|
return True
|
||
|
|
# Also catch GT_SHIPTYPE codes for bulk/cargo if SHIPTYPE is empty
|
||
|
|
gt = str(row.get('GT_SHIPTYPE', '') or '')
|
||
|
|
if gt in ('6', '9', '11', '12', '17', '88') and dwt >= MIN_DWT:
|
||
|
|
return True
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
# PHASE 1: Tile Scraping
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
|
||
|
|
async def phase_tiles(page, conn, region_key, region):
|
||
|
|
"""Navigate region grid and collect vessels from tile API responses."""
|
||
|
|
zoom = region['zoom']
|
||
|
|
grid = region['grid']
|
||
|
|
print(f"\n[{ts()}] === TILES: {region['name']} ({len(grid)} points, zoom={zoom}) ===")
|
||
|
|
|
||
|
|
captured_tiles = []
|
||
|
|
new_vessels = 0
|
||
|
|
existing_ship_ids = set()
|
||
|
|
|
||
|
|
# Get existing ship_ids for this region
|
||
|
|
cur = conn.cursor()
|
||
|
|
cur.execute("SELECT ship_id FROM mt_bulk_staging")
|
||
|
|
for row in cur.fetchall():
|
||
|
|
existing_ship_ids.add(str(row[0]))
|
||
|
|
|
||
|
|
async def on_tile(response):
|
||
|
|
if 'getData/get_data_json_4' not in response.url:
|
||
|
|
return
|
||
|
|
try:
|
||
|
|
body = await response.body()
|
||
|
|
parsed = json.loads(body)
|
||
|
|
rows = parsed.get('data', {}).get('rows', [])
|
||
|
|
captured_tiles.extend(rows)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
page.on('response', on_tile)
|
||
|
|
|
||
|
|
for i, (lat, lon) in enumerate(grid):
|
||
|
|
captured_tiles.clear()
|
||
|
|
url = f'https://www.marinetraffic.com/en/ais/home/centerx:{lon}/centery:{lat}/zoom:{zoom}'
|
||
|
|
print(f" [{i+1}/{len(grid)}] ({lat:.1f},{lon:.1f})", end=' ', flush=True)
|
||
|
|
|
||
|
|
try:
|
||
|
|
await page.goto(url, wait_until='load', timeout=35000)
|
||
|
|
except Exception as e:
|
||
|
|
print(f"ERROR: {e}")
|
||
|
|
await asyncio.sleep(5)
|
||
|
|
try:
|
||
|
|
await page.goto(url, wait_until='domcontentloaded', timeout=35000)
|
||
|
|
except Exception:
|
||
|
|
print(f" SKIP")
|
||
|
|
continue
|
||
|
|
|
||
|
|
await asyncio.sleep(10) # Wait for tiles to load
|
||
|
|
|
||
|
|
area_new = 0
|
||
|
|
for row in captured_tiles:
|
||
|
|
if not is_cargo(row):
|
||
|
|
continue
|
||
|
|
sid = str(row.get('SHIP_ID', ''))
|
||
|
|
if not sid or sid in existing_ship_ids:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Insert into mt_bulk_staging
|
||
|
|
try:
|
||
|
|
conn, cur = db_safe_execute(conn, cur, """
|
||
|
|
INSERT INTO mt_bulk_staging
|
||
|
|
(ship_id, name, flag, dwt, shiptype, gt_shiptype,
|
||
|
|
lat, lon, speed, course, destination, scraped_at)
|
||
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())
|
||
|
|
ON CONFLICT (ship_id) DO UPDATE SET
|
||
|
|
lat = EXCLUDED.lat, lon = EXCLUDED.lon,
|
||
|
|
speed = EXCLUDED.speed, course = EXCLUDED.course,
|
||
|
|
scraped_at = NOW()
|
||
|
|
""", (
|
||
|
|
sid,
|
||
|
|
row.get('SHIPNAME', ''),
|
||
|
|
row.get('FLAG', ''),
|
||
|
|
int(row.get('DWT', 0) or 0),
|
||
|
|
str(row.get('SHIPTYPE', '')),
|
||
|
|
str(row.get('GT_SHIPTYPE', '')),
|
||
|
|
row.get('LAT'),
|
||
|
|
row.get('LON'),
|
||
|
|
row.get('SPEED'),
|
||
|
|
row.get('COURSE'),
|
||
|
|
row.get('DESTINATION', ''),
|
||
|
|
))
|
||
|
|
existing_ship_ids.add(sid)
|
||
|
|
area_new += 1
|
||
|
|
new_vessels += 1
|
||
|
|
except Exception as e:
|
||
|
|
try: conn.rollback()
|
||
|
|
except: pass
|
||
|
|
|
||
|
|
conn = db_safe_commit(conn)
|
||
|
|
cur = conn.cursor()
|
||
|
|
print(f"tiles={len(captured_tiles):4d} new={area_new:3d} total_new={new_vessels}")
|
||
|
|
await asyncio.sleep(2)
|
||
|
|
|
||
|
|
page.remove_listener('response', on_tile)
|
||
|
|
print(f"[{ts()}] TILES DONE: {new_vessels} new vessels added for {region['name']}")
|
||
|
|
return new_vessels, conn
|
||
|
|
|
||
|
|
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
# PHASE 2: MMSI Lookup (via quicksearch_shipid)
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
|
||
|
|
async def phase_mmsi(page, conn, region_key, region):
|
||
|
|
"""Get MMSI for vessels without it, using reports quicksearch_shipid."""
|
||
|
|
print(f"\n[{ts()}] === MMSI: {region['name']} ===")
|
||
|
|
|
||
|
|
cur = conn.cursor()
|
||
|
|
# Get region bounds from grid points
|
||
|
|
lats = [p[0] for p in region['grid']]
|
||
|
|
lons = [p[1] for p in region['grid']]
|
||
|
|
lat_min, lat_max = min(lats) - 5, max(lats) + 5
|
||
|
|
lon_min, lon_max = min(lons) - 5, max(lons) + 5
|
||
|
|
|
||
|
|
gt_filter = ','.join(f"'{g}'" for g in GREEN_GT_TYPES)
|
||
|
|
cur.execute(f"""
|
||
|
|
SELECT ship_id, name, gt_shiptype, dwt, flag
|
||
|
|
FROM mt_bulk_staging
|
||
|
|
WHERE mmsi IS NULL
|
||
|
|
AND lat BETWEEN %s AND %s
|
||
|
|
AND lon BETWEEN %s AND %s
|
||
|
|
AND gt_shiptype IN ({gt_filter})
|
||
|
|
ORDER BY CASE WHEN gt_shiptype='6' THEN 0 ELSE 1 END, dwt DESC NULLS LAST
|
||
|
|
""", (lat_min, lat_max, lon_min, lon_max))
|
||
|
|
vessels = cur.fetchall()
|
||
|
|
|
||
|
|
if not vessels:
|
||
|
|
print(f" No vessels without MMSI in region")
|
||
|
|
return 0, conn
|
||
|
|
|
||
|
|
print(f" Vessels to process: {len(vessels)}")
|
||
|
|
cols = 'flag,shipname,imo,ship_type,speed'
|
||
|
|
found = 0
|
||
|
|
|
||
|
|
for i, (ship_id, name, gt, dwt, flag) in enumerate(vessels):
|
||
|
|
url = (f'https://www.marinetraffic.com/en/reports/?asset_type=vessels'
|
||
|
|
f'&columns={cols}&quicksearch_shipid={ship_id}')
|
||
|
|
js = f"""
|
||
|
|
async () => {{
|
||
|
|
try {{
|
||
|
|
const r = await fetch({json.dumps(url)}, {{
|
||
|
|
credentials: 'include', cache: 'no-store',
|
||
|
|
headers: {{
|
||
|
|
'X-Requested-With': 'XMLHttpRequest',
|
||
|
|
'Accept': 'application/json',
|
||
|
|
'Referer': 'https://www.marinetraffic.com/en/data/?asset_type=vessels',
|
||
|
|
}}
|
||
|
|
}});
|
||
|
|
const text = await r.text();
|
||
|
|
const d = JSON.parse(text);
|
||
|
|
return (d.data || []).slice(0, 3);
|
||
|
|
}} catch(e) {{ return []; }}
|
||
|
|
}}
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
rows = await page.evaluate(js)
|
||
|
|
except Exception:
|
||
|
|
rows = []
|
||
|
|
|
||
|
|
mmsi = None
|
||
|
|
imo = None
|
||
|
|
if rows:
|
||
|
|
row = rows[0]
|
||
|
|
api_name = (row.get('SHIPNAME') or '').upper()
|
||
|
|
our_name = (name or '').upper()
|
||
|
|
match = 0
|
||
|
|
if api_name and our_name:
|
||
|
|
if api_name == our_name:
|
||
|
|
match = 3
|
||
|
|
elif our_name[:10] in api_name or api_name[:10] in our_name:
|
||
|
|
match = 2
|
||
|
|
if match >= 2 or len(rows) == 1:
|
||
|
|
mmsi = str(row.get('MMSI', '')) or None
|
||
|
|
imo = str(row.get('IMO', '')) or None
|
||
|
|
|
||
|
|
if mmsi:
|
||
|
|
found += 1
|
||
|
|
conn, cur = db_safe_execute(conn, cur, """
|
||
|
|
UPDATE mt_bulk_staging
|
||
|
|
SET mmsi = %s, imo = COALESCE(%s, imo), scraped_at = NOW()
|
||
|
|
WHERE ship_id = %s
|
||
|
|
""", (mmsi, imo, ship_id))
|
||
|
|
|
||
|
|
if i < 10 or (i + 1) % 50 == 0:
|
||
|
|
status = f"MMSI={mmsi}" if mmsi else "no match"
|
||
|
|
print(f" [{i+1}/{len(vessels)}] {name} -> {status}")
|
||
|
|
|
||
|
|
if (i + 1) % 200 == 0:
|
||
|
|
conn = db_safe_commit(conn)
|
||
|
|
cur = conn.cursor()
|
||
|
|
print(f" --- COMMIT [{i+1}/{len(vessels)}] found={found} ---")
|
||
|
|
|
||
|
|
await asyncio.sleep(0.8)
|
||
|
|
|
||
|
|
conn = db_safe_commit(conn)
|
||
|
|
cur = conn.cursor()
|
||
|
|
print(f"[{ts()}] MMSI DONE: {found}/{len(vessels)} found for {region['name']}")
|
||
|
|
return found, conn
|
||
|
|
|
||
|
|
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
# PHASE 3: Ownership Fetch
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
|
||
|
|
async def phase_ownership(page, conn, region_key, region):
|
||
|
|
"""Get ownership data for vessels with MMSI but no owner."""
|
||
|
|
print(f"\n[{ts()}] === OWNERSHIP: {region['name']} ===")
|
||
|
|
|
||
|
|
cur = conn.cursor()
|
||
|
|
lats = [p[0] for p in region['grid']]
|
||
|
|
lons = [p[1] for p in region['grid']]
|
||
|
|
lat_min, lat_max = min(lats) - 5, max(lats) + 5
|
||
|
|
lon_min, lon_max = min(lons) - 5, max(lons) + 5
|
||
|
|
|
||
|
|
gt_filter = ','.join(f"'{g}'" for g in GREEN_GT_TYPES)
|
||
|
|
cur.execute(f"""
|
||
|
|
SELECT ship_id, name, gt_shiptype, dwt, flag, mmsi, imo
|
||
|
|
FROM mt_bulk_staging
|
||
|
|
WHERE mmsi IS NOT NULL AND owner IS NULL
|
||
|
|
AND lat BETWEEN %s AND %s
|
||
|
|
AND lon BETWEEN %s AND %s
|
||
|
|
AND gt_shiptype IN ({gt_filter})
|
||
|
|
ORDER BY CASE WHEN gt_shiptype='6' THEN 0 ELSE 1 END, dwt DESC NULLS LAST
|
||
|
|
""", (lat_min, lat_max, lon_min, lon_max))
|
||
|
|
vessels = cur.fetchall()
|
||
|
|
|
||
|
|
if not vessels:
|
||
|
|
print(f" No vessels needing ownership in region")
|
||
|
|
return 0, conn
|
||
|
|
|
||
|
|
print(f" Vessels to process: {len(vessels)}")
|
||
|
|
found = 0
|
||
|
|
|
||
|
|
for i, (ship_id, name, gt, dwt, flag, mmsi, imo) in enumerate(vessels):
|
||
|
|
url = (f'https://www.marinetraffic.com/en/reports/?asset_type=vessels'
|
||
|
|
f'&columns={OWNERSHIP_COLS}&quicksearch_shipid={ship_id}')
|
||
|
|
js = f"""
|
||
|
|
async () => {{
|
||
|
|
try {{
|
||
|
|
const r = await fetch({json.dumps(url)}, {{
|
||
|
|
credentials: 'include', cache: 'no-store',
|
||
|
|
headers: {{
|
||
|
|
'X-Requested-With': 'XMLHttpRequest',
|
||
|
|
'Accept': 'application/json',
|
||
|
|
'Referer': 'https://www.marinetraffic.com/en/data/?asset_type=vessels',
|
||
|
|
}}
|
||
|
|
}});
|
||
|
|
const text = await r.text();
|
||
|
|
const d = JSON.parse(text);
|
||
|
|
return (d.data || []).length > 0 ? (d.data[0]) : null;
|
||
|
|
}} catch(e) {{ return null; }}
|
||
|
|
}}
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
row = await page.evaluate(js)
|
||
|
|
except Exception:
|
||
|
|
row = None
|
||
|
|
|
||
|
|
if row:
|
||
|
|
owner = row.get('BENEFICIAL_OWNER') or row.get('REGISTERED_OWNER') or None
|
||
|
|
reg_owner = row.get('REGISTERED_OWNER') or None
|
||
|
|
operator = row.get('OPERATOR') or None
|
||
|
|
row_imo = row.get('IMO') or None
|
||
|
|
|
||
|
|
if owner or operator:
|
||
|
|
found += 1
|
||
|
|
conn, cur = db_safe_execute(conn, cur, """
|
||
|
|
UPDATE mt_bulk_staging
|
||
|
|
SET owner = COALESCE(%s, owner),
|
||
|
|
operator = COALESCE(%s, operator),
|
||
|
|
registered_owner = COALESCE(%s, registered_owner),
|
||
|
|
imo = COALESCE(%s, imo),
|
||
|
|
scraped_at = NOW()
|
||
|
|
WHERE ship_id = %s
|
||
|
|
""", (owner, operator, reg_owner,
|
||
|
|
str(row_imo) if row_imo and str(row_imo) != '0' else imo,
|
||
|
|
ship_id))
|
||
|
|
|
||
|
|
if i < 10 or (i + 1) % 50 == 0:
|
||
|
|
status = f"{owner[:35] if owner else '?'}" if row else "no data"
|
||
|
|
print(f" [{i+1}/{len(vessels)}] {name} -> {status}")
|
||
|
|
|
||
|
|
if (i + 1) % 200 == 0:
|
||
|
|
conn = db_safe_commit(conn)
|
||
|
|
cur = conn.cursor()
|
||
|
|
print(f" --- COMMIT [{i+1}/{len(vessels)}] found={found} ---")
|
||
|
|
|
||
|
|
await asyncio.sleep(0.5)
|
||
|
|
|
||
|
|
conn = db_safe_commit(conn)
|
||
|
|
cur = conn.cursor()
|
||
|
|
print(f"[{ts()}] OWNERSHIP DONE: {found}/{len(vessels)} for {region['name']}")
|
||
|
|
return found, conn
|
||
|
|
|
||
|
|
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
# MAIN
|
||
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||
|
|
|
||
|
|
async def main():
|
||
|
|
parser = argparse.ArgumentParser()
|
||
|
|
parser.add_argument('--region', required=True,
|
||
|
|
choices=list(REGIONS.keys()) + ['all'],
|
||
|
|
help='Region to scrape')
|
||
|
|
parser.add_argument('--phase', choices=['tiles', 'mmsi', 'owner', 'all'],
|
||
|
|
default='all', help='Which phase(s) to run')
|
||
|
|
parser.add_argument('--zoom', type=int, default=0,
|
||
|
|
help='Override zoom level for tiles phase')
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
# Determine regions to process
|
||
|
|
if args.region == 'all':
|
||
|
|
regions_to_do = ['caspian', 'med', 'baltic', 'blacksea', 'redsea', 'persian']
|
||
|
|
else:
|
||
|
|
regions_to_do = [args.region]
|
||
|
|
|
||
|
|
# DB connection
|
||
|
|
try:
|
||
|
|
conn = psycopg2.connect(DB_URL, connect_timeout=10)
|
||
|
|
cur = conn.cursor()
|
||
|
|
cur.execute('SELECT count(*) FROM mt_bulk_staging')
|
||
|
|
total = cur.fetchone()[0]
|
||
|
|
print(f"DB connected. mt_bulk_staging: {total} vessels")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"DB ERROR: {e}")
|
||
|
|
print("Is SSH tunnel running?")
|
||
|
|
return
|
||
|
|
|
||
|
|
# Ensure registered_owner column exists
|
||
|
|
try:
|
||
|
|
cur.execute("ALTER TABLE mt_bulk_staging ADD COLUMN IF NOT EXISTS registered_owner TEXT")
|
||
|
|
cur.execute("ALTER TABLE mt_bulk_staging ADD COLUMN IF NOT EXISTS year_built INTEGER")
|
||
|
|
conn.commit()
|
||
|
|
except Exception:
|
||
|
|
conn.rollback()
|
||
|
|
|
||
|
|
from playwright.async_api import async_playwright
|
||
|
|
|
||
|
|
async with async_playwright() as p:
|
||
|
|
browser = await p.chromium.launch(
|
||
|
|
headless=False,
|
||
|
|
args=['--no-sandbox', '--disable-blink-features=AutomationControlled']
|
||
|
|
)
|
||
|
|
context = await browser.new_context(
|
||
|
|
viewport={'width': 1440, 'height': 900},
|
||
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||
|
|
'(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||
|
|
)
|
||
|
|
page = await context.new_page()
|
||
|
|
|
||
|
|
if not await do_login(page):
|
||
|
|
print("Login FAILED!")
|
||
|
|
await browser.close()
|
||
|
|
conn.close()
|
||
|
|
return
|
||
|
|
|
||
|
|
await asyncio.sleep(3)
|
||
|
|
|
||
|
|
# Warm up data page for reports API
|
||
|
|
if args.phase in ('mmsi', 'owner', 'all'):
|
||
|
|
await page.goto('https://www.marinetraffic.com/en/data/?asset_type=vessels',
|
||
|
|
wait_until='load', timeout=40000)
|
||
|
|
await asyncio.sleep(5)
|
||
|
|
|
||
|
|
# Process each region
|
||
|
|
for region_key in regions_to_do:
|
||
|
|
region = REGIONS[region_key]
|
||
|
|
if args.zoom:
|
||
|
|
region = {**region, 'zoom': args.zoom}
|
||
|
|
|
||
|
|
print(f"\n{'='*60}")
|
||
|
|
print(f"REGION: {region['name']} ({len(region['grid'])} grid points)")
|
||
|
|
print(f"{'='*60}")
|
||
|
|
|
||
|
|
t0 = time.time()
|
||
|
|
|
||
|
|
if args.phase in ('tiles', 'all'):
|
||
|
|
new_tiles, conn = await phase_tiles(page, conn, region_key, region)
|
||
|
|
|
||
|
|
# After tiles, warm up data page for MMSI/ownership API
|
||
|
|
if args.phase == 'all' and new_tiles > 0:
|
||
|
|
await page.goto('https://www.marinetraffic.com/en/data/?asset_type=vessels',
|
||
|
|
wait_until='load', timeout=40000)
|
||
|
|
await asyncio.sleep(5)
|
||
|
|
|
||
|
|
if args.phase in ('mmsi', 'all'):
|
||
|
|
_, conn = await phase_mmsi(page, conn, region_key, region)
|
||
|
|
|
||
|
|
if args.phase in ('owner', 'all'):
|
||
|
|
_, conn = await phase_ownership(page, conn, region_key, region)
|
||
|
|
|
||
|
|
elapsed = time.time() - t0
|
||
|
|
|
||
|
|
# Region summary
|
||
|
|
cur = conn.cursor()
|
||
|
|
lats = [p[0] for p in region['grid']]
|
||
|
|
lons = [p[1] for p in region['grid']]
|
||
|
|
lat_min, lat_max = min(lats) - 5, max(lats) + 5
|
||
|
|
lon_min, lon_max = min(lons) - 5, max(lons) + 5
|
||
|
|
cur.execute("""
|
||
|
|
SELECT count(*), count(mmsi), count(owner)
|
||
|
|
FROM mt_bulk_staging
|
||
|
|
WHERE lat BETWEEN %s AND %s AND lon BETWEEN %s AND %s
|
||
|
|
""", (lat_min, lat_max, lon_min, lon_max))
|
||
|
|
t, m, o = cur.fetchone()
|
||
|
|
print(f"\n>>> {region['name']} COMPLETE in {elapsed/60:.1f} min")
|
||
|
|
print(f" Vessels: {t}, with MMSI: {m}, with owner: {o}")
|
||
|
|
|
||
|
|
# Final global summary
|
||
|
|
cur = conn.cursor()
|
||
|
|
cur.execute("SELECT count(*), count(mmsi), count(owner) FROM mt_bulk_staging")
|
||
|
|
t, m, o = cur.fetchone()
|
||
|
|
cur.execute("SELECT count(*), count(mmsi), count(owner) FROM mt_bulk_staging WHERE gt_shiptype='6'")
|
||
|
|
tb, mb, ob = cur.fetchone()
|
||
|
|
print(f"\n{'='*60}")
|
||
|
|
print(f"GLOBAL: total={t} mmsi={m} owner={o}")
|
||
|
|
print(f"BULK: total={tb} mmsi={mb} owner={ob}")
|
||
|
|
print(f"{'='*60}")
|
||
|
|
|
||
|
|
conn.close()
|
||
|
|
await browser.close()
|
||
|
|
print("Done!")
|
||
|
|
|
||
|
|
|
||
|
|
asyncio.run(main())
|