334 lines
14 KiB
Python
334 lines
14 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
MarineTraffic / Kpler Cookie Extractor via Playwright
|
||
|
|
=====================================================
|
||
|
|
MT now uses Kpler Auth0 login (auth.kpler.com).
|
||
|
|
Logs in using real Chromium, extracts session cookies → mt_cookies.json.
|
||
|
|
The bulk_carrier_scraper.py uses these cookies to scrape with auth.
|
||
|
|
|
||
|
|
Flow: marinetraffic.com/login → redirect → auth.kpler.com Auth0 ULP
|
||
|
|
→ email → Continue → password → Continue → [2FA OTP] → done
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python mt_cookie_extractor.py --login EMAIL --password PASS [--totp-secret SECRET]
|
||
|
|
python mt_cookie_extractor.py --login EMAIL --password PASS --headed # see browser
|
||
|
|
"""
|
||
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
import logging
|
||
|
|
|
||
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
|
||
|
|
logger = logging.getLogger('mt_cookies')
|
||
|
|
|
||
|
|
COOKIES_FILE = 'mt_cookies.json'
|
||
|
|
|
||
|
|
|
||
|
|
def _generate_totp(secret: str) -> str:
|
||
|
|
"""Generate TOTP 6-digit code (RFC 6238, no external deps)."""
|
||
|
|
import hmac, hashlib, struct, base64
|
||
|
|
secret_clean = secret.upper().replace(' ', '')
|
||
|
|
pad = (-len(secret_clean)) % 8
|
||
|
|
key = base64.b32decode(secret_clean + '=' * pad)
|
||
|
|
counter = int(time.time()) // 30
|
||
|
|
msg = struct.pack('>Q', counter)
|
||
|
|
h = hmac.new(key, msg, hashlib.sha1).digest()
|
||
|
|
offset = h[-1] & 0x0f
|
||
|
|
code = struct.unpack('>I', h[offset:offset + 4])[0] & 0x7fffffff
|
||
|
|
return str(code % 1000000).zfill(6)
|
||
|
|
|
||
|
|
|
||
|
|
def extract_cookies(email: str, password: str, totp_secret: str = None,
|
||
|
|
headed: bool = False) -> list:
|
||
|
|
"""Login to MT Pro via Kpler Auth0 and extract session cookies."""
|
||
|
|
from playwright.sync_api import sync_playwright
|
||
|
|
|
||
|
|
with sync_playwright() as p:
|
||
|
|
logger.info(f"Launching Chromium (headless={not headed})...")
|
||
|
|
browser = p.chromium.launch(
|
||
|
|
headless=not headed,
|
||
|
|
args=['--no-sandbox', '--disable-blink-features=AutomationControlled'],
|
||
|
|
)
|
||
|
|
context = browser.new_context(
|
||
|
|
viewport={'width': 1280, 'height': 800},
|
||
|
|
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||
|
|
'(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||
|
|
)
|
||
|
|
page = context.new_page()
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Step 1: Go to MT login → auto-redirects to auth.kpler.com
|
||
|
|
logger.info("Navigating to MT login (will redirect to Kpler Auth0)...")
|
||
|
|
page.goto('https://www.marinetraffic.com/en/users/login',
|
||
|
|
wait_until='domcontentloaded', timeout=30000)
|
||
|
|
time.sleep(3)
|
||
|
|
logger.info(f"Current URL: {page.url}")
|
||
|
|
|
||
|
|
# If still on MT (not redirected), click login button
|
||
|
|
if 'marinetraffic.com' in page.url and 'auth.kpler.com' not in page.url:
|
||
|
|
logger.info("Didn't redirect automatically — looking for login button...")
|
||
|
|
for sel in ['a[href*="login"]', 'button:has-text("Log")', 'a:has-text("Log in")']:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
page.click(sel)
|
||
|
|
time.sleep(3)
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
logger.info(f"Auth page URL: {page.url}")
|
||
|
|
|
||
|
|
# Step 2: Auth0 identifier-first — enter email
|
||
|
|
logger.info(f"Entering email: {email}")
|
||
|
|
email_field = None
|
||
|
|
for sel in ['input[name="username"]', 'input[type="email"]',
|
||
|
|
'input[name="email"]', 'input[placeholder*="email" i]',
|
||
|
|
'input[placeholder*="Email" ]']:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
email_field = sel
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
if not email_field:
|
||
|
|
logger.error(f"Email field not found on: {page.url}")
|
||
|
|
page.screenshot(path='mt_login_debug.png')
|
||
|
|
logger.info("Screenshot saved: mt_login_debug.png")
|
||
|
|
return []
|
||
|
|
|
||
|
|
page.fill(email_field, email)
|
||
|
|
time.sleep(0.5)
|
||
|
|
|
||
|
|
# Click Continue / Next / Submit
|
||
|
|
for sel in ['button[type="submit"]', 'button[value="default"]',
|
||
|
|
'button:has-text("Continue")', 'button:has-text("Next")',
|
||
|
|
'input[type="submit"]']:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
page.click(sel)
|
||
|
|
logger.info(f"Clicked: {sel}")
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
time.sleep(3)
|
||
|
|
logger.info(f"After email step URL: {page.url}")
|
||
|
|
|
||
|
|
# Step 3: Enter password (Auth0 shows password field on next screen)
|
||
|
|
logger.info("Entering password...")
|
||
|
|
pw_field = None
|
||
|
|
for sel in ['input[type="password"]', 'input[name="password"]']:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
pw_field = sel
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
if pw_field:
|
||
|
|
page.fill(pw_field, password)
|
||
|
|
time.sleep(0.5)
|
||
|
|
# Submit password
|
||
|
|
for sel in ['button[type="submit"]', 'button[value="default"]',
|
||
|
|
'button:has-text("Continue")', 'button:has-text("Log in")',
|
||
|
|
'button:has-text("Sign in")']:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
page.click(sel)
|
||
|
|
logger.info(f"Submitted password via: {sel}")
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
time.sleep(4)
|
||
|
|
else:
|
||
|
|
logger.warning("Password field not found — trying combined email+pw form")
|
||
|
|
# Fallback: maybe both fields are on same page
|
||
|
|
for sel2 in ['input[type="password"]']:
|
||
|
|
try:
|
||
|
|
page.fill(sel2, password)
|
||
|
|
page.press(sel2, 'Enter')
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
time.sleep(4)
|
||
|
|
|
||
|
|
logger.info(f"After password step URL: {page.url}")
|
||
|
|
|
||
|
|
# Step 4: Check for 2FA / MFA
|
||
|
|
page_content = page.content()
|
||
|
|
page_lower = page_content.lower()
|
||
|
|
needs_2fa = any(x in page_lower for x in [
|
||
|
|
'two-factor', 'two_factor', 'authenticator', 'verification code',
|
||
|
|
'one-time', 'otp', '2fa', 'security code', 'mfa', 'multi-factor',
|
||
|
|
'enter the code', '6-digit', 'authenticator app',
|
||
|
|
])
|
||
|
|
# Also check URL
|
||
|
|
if any(x in page.url for x in ['mfa', '2fa', 'otp', 'factor']):
|
||
|
|
needs_2fa = True
|
||
|
|
|
||
|
|
if needs_2fa:
|
||
|
|
logger.info("2FA/MFA challenge detected!")
|
||
|
|
if not totp_secret:
|
||
|
|
logger.error("TOTP secret required for 2FA but not provided!")
|
||
|
|
return []
|
||
|
|
|
||
|
|
page.screenshot(path='mt_2fa_debug.png')
|
||
|
|
|
||
|
|
# Step 4a: mfa-login-options — select "Google Authenticator or similar"
|
||
|
|
if 'mfa-login-options' in page.url:
|
||
|
|
logger.info("MFA method selection screen — clicking Google Authenticator...")
|
||
|
|
for sel in [
|
||
|
|
'a:has-text("Google Authenticator")',
|
||
|
|
'button:has-text("Google Authenticator")',
|
||
|
|
'li:has-text("Google Authenticator")',
|
||
|
|
'[data-action-button-secondary*="totp"]',
|
||
|
|
'a[href*="totp"]',
|
||
|
|
'a[href*="otp"]',
|
||
|
|
]:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
page.click(sel)
|
||
|
|
logger.info(f"Clicked MFA option: {sel}")
|
||
|
|
time.sleep(3)
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
else:
|
||
|
|
# Fallback: click the first list item (should be Authenticator)
|
||
|
|
try:
|
||
|
|
page.locator('ul li a, ul li button').first.click()
|
||
|
|
logger.info("Clicked first MFA option (fallback)")
|
||
|
|
time.sleep(3)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
logger.info(f"After MFA selection URL: {page.url}")
|
||
|
|
|
||
|
|
# Step 4b: Enter TOTP code
|
||
|
|
otp_code = _generate_totp(totp_secret)
|
||
|
|
logger.info(f"Generated TOTP code: {otp_code}")
|
||
|
|
page.screenshot(path='mt_otp_debug.png')
|
||
|
|
|
||
|
|
otp_field = None
|
||
|
|
for sel in [
|
||
|
|
'input[name="code"]', 'input[name="otp"]',
|
||
|
|
'input[autocomplete="one-time-code"]', 'input[inputmode="numeric"]',
|
||
|
|
'input[type="text"]', 'input[type="number"]',
|
||
|
|
'input[placeholder*="code" i]', 'input[placeholder*="digit" i]',
|
||
|
|
]:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
otp_field = sel
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
if otp_field:
|
||
|
|
logger.info(f"Filling OTP field: {otp_field}")
|
||
|
|
page.fill(otp_field, otp_code)
|
||
|
|
time.sleep(0.5)
|
||
|
|
for sel in ['button[type="submit"]', 'button:has-text("Verify")',
|
||
|
|
'button:has-text("Continue")', 'button:has-text("Confirm")',
|
||
|
|
'button:has-text("Submit")']:
|
||
|
|
try:
|
||
|
|
if page.locator(sel).count() > 0:
|
||
|
|
page.click(sel)
|
||
|
|
logger.info(f"Submitted OTP via: {sel}")
|
||
|
|
break
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
else:
|
||
|
|
logger.warning("OTP input field not found — typing code directly")
|
||
|
|
page.keyboard.type(otp_code)
|
||
|
|
page.keyboard.press('Enter')
|
||
|
|
|
||
|
|
time.sleep(5)
|
||
|
|
logger.info(f"After 2FA URL: {page.url}")
|
||
|
|
|
||
|
|
# Step 5: Verify login — should be back on marinetraffic.com
|
||
|
|
final_url = page.url
|
||
|
|
final_content = page.content()
|
||
|
|
is_logged_in = (
|
||
|
|
'marinetraffic.com' in final_url and
|
||
|
|
'login' not in final_url and
|
||
|
|
'auth.kpler.com' not in final_url
|
||
|
|
)
|
||
|
|
if not is_logged_in:
|
||
|
|
# Check mtGlobal.isLoggedIn JS variable
|
||
|
|
try:
|
||
|
|
js_logged_in = page.evaluate('window.mtGlobal && window.mtGlobal.isLoggedIn')
|
||
|
|
if js_logged_in:
|
||
|
|
is_logged_in = True
|
||
|
|
logger.info("JS confirms: isLoggedIn = true")
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
logger.info(f"Final URL: {final_url}")
|
||
|
|
logger.info(f"Login success: {is_logged_in}")
|
||
|
|
|
||
|
|
if not is_logged_in:
|
||
|
|
page.screenshot(path='mt_login_debug.png')
|
||
|
|
logger.warning("May not be logged in — extracting cookies anyway")
|
||
|
|
|
||
|
|
# Step 6: Extract ALL cookies from both domains
|
||
|
|
all_cookies = context.cookies()
|
||
|
|
mt_cookies = [c for c in all_cookies
|
||
|
|
if any(d in c.get('domain', '')
|
||
|
|
for d in ['marinetraffic', 'kpler', 'auth0'])]
|
||
|
|
|
||
|
|
logger.info(f"Total cookies: {len(all_cookies)}, MT/Kpler cookies: {len(mt_cookies)}")
|
||
|
|
for c in mt_cookies[:15]:
|
||
|
|
logger.info(f" {c['domain']}: {c['name']} = {str(c['value'])[:40]}")
|
||
|
|
|
||
|
|
return mt_cookies
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Playwright error: {e}")
|
||
|
|
import traceback
|
||
|
|
traceback.print_exc()
|
||
|
|
try:
|
||
|
|
page.screenshot(path='mt_error_debug.png')
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
return []
|
||
|
|
finally:
|
||
|
|
browser.close()
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
ap = argparse.ArgumentParser(description='MarineTraffic/Kpler Cookie Extractor')
|
||
|
|
ap.add_argument('--login', default=os.environ.get('MT_LOGIN', ''))
|
||
|
|
ap.add_argument('--password', default=os.environ.get('MT_PASSWORD', ''))
|
||
|
|
ap.add_argument('--totp-secret', default=os.environ.get('MT_TOTP_SECRET', ''))
|
||
|
|
ap.add_argument('--headed', action='store_true', help='Show browser window')
|
||
|
|
ap.add_argument('--output', default=COOKIES_FILE)
|
||
|
|
args = ap.parse_args()
|
||
|
|
|
||
|
|
if not args.login or not args.password:
|
||
|
|
print("ERROR: --login and --password required")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
cookies = extract_cookies(
|
||
|
|
email=args.login,
|
||
|
|
password=args.password,
|
||
|
|
totp_secret=args.totp_secret or None,
|
||
|
|
headed=args.headed,
|
||
|
|
)
|
||
|
|
|
||
|
|
if cookies:
|
||
|
|
with open(args.output, 'w') as f:
|
||
|
|
json.dump(cookies, f, indent=2)
|
||
|
|
logger.info(f"Saved {len(cookies)} cookies to {args.output}")
|
||
|
|
print(f"\nSUCCESS: {len(cookies)} cookies saved to {args.output}")
|
||
|
|
else:
|
||
|
|
logger.error("No cookies extracted.")
|
||
|
|
print("\nFAILED — try: python mt_cookie_extractor.py --headed (to see browser)")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|