#!/usr/bin/env python3 """ MarineTraffic / Kpler Cookie Extractor via Playwright ===================================================== MT now uses Kpler Auth0 login (auth.kpler.com). Logs in using real Chromium, extracts session cookies → mt_cookies.json. The bulk_carrier_scraper.py uses these cookies to scrape with auth. Flow: marinetraffic.com/login → redirect → auth.kpler.com Auth0 ULP → email → Continue → password → Continue → [2FA OTP] → done Usage: python mt_cookie_extractor.py --login EMAIL --password PASS [--totp-secret SECRET] python mt_cookie_extractor.py --login EMAIL --password PASS --headed # see browser """ import argparse import json import os import sys import time import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') logger = logging.getLogger('mt_cookies') COOKIES_FILE = 'mt_cookies.json' def _generate_totp(secret: str) -> str: """Generate TOTP 6-digit code (RFC 6238, no external deps).""" import hmac, hashlib, struct, base64 secret_clean = secret.upper().replace(' ', '') pad = (-len(secret_clean)) % 8 key = base64.b32decode(secret_clean + '=' * pad) counter = int(time.time()) // 30 msg = struct.pack('>Q', counter) h = hmac.new(key, msg, hashlib.sha1).digest() offset = h[-1] & 0x0f code = struct.unpack('>I', h[offset:offset + 4])[0] & 0x7fffffff return str(code % 1000000).zfill(6) def extract_cookies(email: str, password: str, totp_secret: str = None, headed: bool = False) -> list: """Login to MT Pro via Kpler Auth0 and extract session cookies.""" from playwright.sync_api import sync_playwright with sync_playwright() as p: logger.info(f"Launching Chromium (headless={not headed})...") browser = p.chromium.launch( headless=not headed, args=['--no-sandbox', '--disable-blink-features=AutomationControlled'], ) context = browser.new_context( viewport={'width': 1280, 'height': 800}, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', ) page = context.new_page() try: # Step 1: Go to MT login → auto-redirects to auth.kpler.com logger.info("Navigating to MT login (will redirect to Kpler Auth0)...") page.goto('https://www.marinetraffic.com/en/users/login', wait_until='domcontentloaded', timeout=30000) time.sleep(3) logger.info(f"Current URL: {page.url}") # If still on MT (not redirected), click login button if 'marinetraffic.com' in page.url and 'auth.kpler.com' not in page.url: logger.info("Didn't redirect automatically — looking for login button...") for sel in ['a[href*="login"]', 'button:has-text("Log")', 'a:has-text("Log in")']: try: if page.locator(sel).count() > 0: page.click(sel) time.sleep(3) break except Exception: pass logger.info(f"Auth page URL: {page.url}") # Step 2: Auth0 identifier-first — enter email logger.info(f"Entering email: {email}") email_field = None for sel in ['input[name="username"]', 'input[type="email"]', 'input[name="email"]', 'input[placeholder*="email" i]', 'input[placeholder*="Email" ]']: try: if page.locator(sel).count() > 0: email_field = sel break except Exception: pass if not email_field: logger.error(f"Email field not found on: {page.url}") page.screenshot(path='mt_login_debug.png') logger.info("Screenshot saved: mt_login_debug.png") return [] page.fill(email_field, email) time.sleep(0.5) # Click Continue / Next / Submit for sel in ['button[type="submit"]', 'button[value="default"]', 'button:has-text("Continue")', 'button:has-text("Next")', 'input[type="submit"]']: try: if page.locator(sel).count() > 0: page.click(sel) logger.info(f"Clicked: {sel}") break except Exception: pass time.sleep(3) logger.info(f"After email step URL: {page.url}") # Step 3: Enter password (Auth0 shows password field on next screen) logger.info("Entering password...") pw_field = None for sel in ['input[type="password"]', 'input[name="password"]']: try: if page.locator(sel).count() > 0: pw_field = sel break except Exception: pass if pw_field: page.fill(pw_field, password) time.sleep(0.5) # Submit password for sel in ['button[type="submit"]', 'button[value="default"]', 'button:has-text("Continue")', 'button:has-text("Log in")', 'button:has-text("Sign in")']: try: if page.locator(sel).count() > 0: page.click(sel) logger.info(f"Submitted password via: {sel}") break except Exception: pass time.sleep(4) else: logger.warning("Password field not found — trying combined email+pw form") # Fallback: maybe both fields are on same page for sel2 in ['input[type="password"]']: try: page.fill(sel2, password) page.press(sel2, 'Enter') break except Exception: pass time.sleep(4) logger.info(f"After password step URL: {page.url}") # Step 4: Check for 2FA / MFA page_content = page.content() page_lower = page_content.lower() needs_2fa = any(x in page_lower for x in [ 'two-factor', 'two_factor', 'authenticator', 'verification code', 'one-time', 'otp', '2fa', 'security code', 'mfa', 'multi-factor', 'enter the code', '6-digit', 'authenticator app', ]) # Also check URL if any(x in page.url for x in ['mfa', '2fa', 'otp', 'factor']): needs_2fa = True if needs_2fa: logger.info("2FA/MFA challenge detected!") if not totp_secret: logger.error("TOTP secret required for 2FA but not provided!") return [] page.screenshot(path='mt_2fa_debug.png') # Step 4a: mfa-login-options — select "Google Authenticator or similar" if 'mfa-login-options' in page.url: logger.info("MFA method selection screen — clicking Google Authenticator...") for sel in [ 'a:has-text("Google Authenticator")', 'button:has-text("Google Authenticator")', 'li:has-text("Google Authenticator")', '[data-action-button-secondary*="totp"]', 'a[href*="totp"]', 'a[href*="otp"]', ]: try: if page.locator(sel).count() > 0: page.click(sel) logger.info(f"Clicked MFA option: {sel}") time.sleep(3) break except Exception: pass else: # Fallback: click the first list item (should be Authenticator) try: page.locator('ul li a, ul li button').first.click() logger.info("Clicked first MFA option (fallback)") time.sleep(3) except Exception: pass logger.info(f"After MFA selection URL: {page.url}") # Step 4b: Enter TOTP code otp_code = _generate_totp(totp_secret) logger.info(f"Generated TOTP code: {otp_code}") page.screenshot(path='mt_otp_debug.png') otp_field = None for sel in [ 'input[name="code"]', 'input[name="otp"]', 'input[autocomplete="one-time-code"]', 'input[inputmode="numeric"]', 'input[type="text"]', 'input[type="number"]', 'input[placeholder*="code" i]', 'input[placeholder*="digit" i]', ]: try: if page.locator(sel).count() > 0: otp_field = sel break except Exception: pass if otp_field: logger.info(f"Filling OTP field: {otp_field}") page.fill(otp_field, otp_code) time.sleep(0.5) for sel in ['button[type="submit"]', 'button:has-text("Verify")', 'button:has-text("Continue")', 'button:has-text("Confirm")', 'button:has-text("Submit")']: try: if page.locator(sel).count() > 0: page.click(sel) logger.info(f"Submitted OTP via: {sel}") break except Exception: pass else: logger.warning("OTP input field not found — typing code directly") page.keyboard.type(otp_code) page.keyboard.press('Enter') time.sleep(5) logger.info(f"After 2FA URL: {page.url}") # Step 5: Verify login — should be back on marinetraffic.com final_url = page.url final_content = page.content() is_logged_in = ( 'marinetraffic.com' in final_url and 'login' not in final_url and 'auth.kpler.com' not in final_url ) if not is_logged_in: # Check mtGlobal.isLoggedIn JS variable try: js_logged_in = page.evaluate('window.mtGlobal && window.mtGlobal.isLoggedIn') if js_logged_in: is_logged_in = True logger.info("JS confirms: isLoggedIn = true") except Exception: pass logger.info(f"Final URL: {final_url}") logger.info(f"Login success: {is_logged_in}") if not is_logged_in: page.screenshot(path='mt_login_debug.png') logger.warning("May not be logged in — extracting cookies anyway") # Step 6: Extract ALL cookies from both domains all_cookies = context.cookies() mt_cookies = [c for c in all_cookies if any(d in c.get('domain', '') for d in ['marinetraffic', 'kpler', 'auth0'])] logger.info(f"Total cookies: {len(all_cookies)}, MT/Kpler cookies: {len(mt_cookies)}") for c in mt_cookies[:15]: logger.info(f" {c['domain']}: {c['name']} = {str(c['value'])[:40]}") return mt_cookies except Exception as e: logger.error(f"Playwright error: {e}") import traceback traceback.print_exc() try: page.screenshot(path='mt_error_debug.png') except Exception: pass return [] finally: browser.close() def main(): ap = argparse.ArgumentParser(description='MarineTraffic/Kpler Cookie Extractor') ap.add_argument('--login', default=os.environ.get('MT_LOGIN', '')) ap.add_argument('--password', default=os.environ.get('MT_PASSWORD', '')) ap.add_argument('--totp-secret', default=os.environ.get('MT_TOTP_SECRET', '')) ap.add_argument('--headed', action='store_true', help='Show browser window') ap.add_argument('--output', default=COOKIES_FILE) args = ap.parse_args() if not args.login or not args.password: print("ERROR: --login and --password required") sys.exit(1) cookies = extract_cookies( email=args.login, password=args.password, totp_secret=args.totp_secret or None, headed=args.headed, ) if cookies: with open(args.output, 'w') as f: json.dump(cookies, f, indent=2) logger.info(f"Saved {len(cookies)} cookies to {args.output}") print(f"\nSUCCESS: {len(cookies)} cookies saved to {args.output}") else: logger.error("No cookies extracted.") print("\nFAILED — try: python mt_cookie_extractor.py --headed (to see browser)") sys.exit(1) if __name__ == '__main__': main()