montana/Русский/Логистика/mt_cookie_extractor.py

334 lines
14 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
MarineTraffic / Kpler Cookie Extractor via Playwright
=====================================================
MT now uses Kpler Auth0 login (auth.kpler.com).
Logs in using real Chromium, extracts session cookies mt_cookies.json.
The bulk_carrier_scraper.py uses these cookies to scrape with auth.
Flow: marinetraffic.com/login redirect auth.kpler.com Auth0 ULP
email Continue password Continue [2FA OTP] done
Usage:
python mt_cookie_extractor.py --login EMAIL --password PASS [--totp-secret SECRET]
python mt_cookie_extractor.py --login EMAIL --password PASS --headed # see browser
"""
import argparse
import json
import os
import sys
import time
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
logger = logging.getLogger('mt_cookies')
COOKIES_FILE = 'mt_cookies.json'
def _generate_totp(secret: str) -> str:
"""Generate TOTP 6-digit code (RFC 6238, no external deps)."""
import hmac, hashlib, struct, base64
secret_clean = secret.upper().replace(' ', '')
pad = (-len(secret_clean)) % 8
key = base64.b32decode(secret_clean + '=' * pad)
counter = int(time.time()) // 30
msg = struct.pack('>Q', counter)
h = hmac.new(key, msg, hashlib.sha1).digest()
offset = h[-1] & 0x0f
code = struct.unpack('>I', h[offset:offset + 4])[0] & 0x7fffffff
return str(code % 1000000).zfill(6)
def extract_cookies(email: str, password: str, totp_secret: str = None,
headed: bool = False) -> list:
"""Login to MT Pro via Kpler Auth0 and extract session cookies."""
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
logger.info(f"Launching Chromium (headless={not headed})...")
browser = p.chromium.launch(
headless=not headed,
args=['--no-sandbox', '--disable-blink-features=AutomationControlled'],
)
context = browser.new_context(
viewport={'width': 1280, 'height': 800},
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
)
page = context.new_page()
try:
# Step 1: Go to MT login → auto-redirects to auth.kpler.com
logger.info("Navigating to MT login (will redirect to Kpler Auth0)...")
page.goto('https://www.marinetraffic.com/en/users/login',
wait_until='domcontentloaded', timeout=30000)
time.sleep(3)
logger.info(f"Current URL: {page.url}")
# If still on MT (not redirected), click login button
if 'marinetraffic.com' in page.url and 'auth.kpler.com' not in page.url:
logger.info("Didn't redirect automatically — looking for login button...")
for sel in ['a[href*="login"]', 'button:has-text("Log")', 'a:has-text("Log in")']:
try:
if page.locator(sel).count() > 0:
page.click(sel)
time.sleep(3)
break
except Exception:
pass
logger.info(f"Auth page URL: {page.url}")
# Step 2: Auth0 identifier-first — enter email
logger.info(f"Entering email: {email}")
email_field = None
for sel in ['input[name="username"]', 'input[type="email"]',
'input[name="email"]', 'input[placeholder*="email" i]',
'input[placeholder*="Email" ]']:
try:
if page.locator(sel).count() > 0:
email_field = sel
break
except Exception:
pass
if not email_field:
logger.error(f"Email field not found on: {page.url}")
page.screenshot(path='mt_login_debug.png')
logger.info("Screenshot saved: mt_login_debug.png")
return []
page.fill(email_field, email)
time.sleep(0.5)
# Click Continue / Next / Submit
for sel in ['button[type="submit"]', 'button[value="default"]',
'button:has-text("Continue")', 'button:has-text("Next")',
'input[type="submit"]']:
try:
if page.locator(sel).count() > 0:
page.click(sel)
logger.info(f"Clicked: {sel}")
break
except Exception:
pass
time.sleep(3)
logger.info(f"After email step URL: {page.url}")
# Step 3: Enter password (Auth0 shows password field on next screen)
logger.info("Entering password...")
pw_field = None
for sel in ['input[type="password"]', 'input[name="password"]']:
try:
if page.locator(sel).count() > 0:
pw_field = sel
break
except Exception:
pass
if pw_field:
page.fill(pw_field, password)
time.sleep(0.5)
# Submit password
for sel in ['button[type="submit"]', 'button[value="default"]',
'button:has-text("Continue")', 'button:has-text("Log in")',
'button:has-text("Sign in")']:
try:
if page.locator(sel).count() > 0:
page.click(sel)
logger.info(f"Submitted password via: {sel}")
break
except Exception:
pass
time.sleep(4)
else:
logger.warning("Password field not found — trying combined email+pw form")
# Fallback: maybe both fields are on same page
for sel2 in ['input[type="password"]']:
try:
page.fill(sel2, password)
page.press(sel2, 'Enter')
break
except Exception:
pass
time.sleep(4)
logger.info(f"After password step URL: {page.url}")
# Step 4: Check for 2FA / MFA
page_content = page.content()
page_lower = page_content.lower()
needs_2fa = any(x in page_lower for x in [
'two-factor', 'two_factor', 'authenticator', 'verification code',
'one-time', 'otp', '2fa', 'security code', 'mfa', 'multi-factor',
'enter the code', '6-digit', 'authenticator app',
])
# Also check URL
if any(x in page.url for x in ['mfa', '2fa', 'otp', 'factor']):
needs_2fa = True
if needs_2fa:
logger.info("2FA/MFA challenge detected!")
if not totp_secret:
logger.error("TOTP secret required for 2FA but not provided!")
return []
page.screenshot(path='mt_2fa_debug.png')
# Step 4a: mfa-login-options — select "Google Authenticator or similar"
if 'mfa-login-options' in page.url:
logger.info("MFA method selection screen — clicking Google Authenticator...")
for sel in [
'a:has-text("Google Authenticator")',
'button:has-text("Google Authenticator")',
'li:has-text("Google Authenticator")',
'[data-action-button-secondary*="totp"]',
'a[href*="totp"]',
'a[href*="otp"]',
]:
try:
if page.locator(sel).count() > 0:
page.click(sel)
logger.info(f"Clicked MFA option: {sel}")
time.sleep(3)
break
except Exception:
pass
else:
# Fallback: click the first list item (should be Authenticator)
try:
page.locator('ul li a, ul li button').first.click()
logger.info("Clicked first MFA option (fallback)")
time.sleep(3)
except Exception:
pass
logger.info(f"After MFA selection URL: {page.url}")
# Step 4b: Enter TOTP code
otp_code = _generate_totp(totp_secret)
logger.info(f"Generated TOTP code: {otp_code}")
page.screenshot(path='mt_otp_debug.png')
otp_field = None
for sel in [
'input[name="code"]', 'input[name="otp"]',
'input[autocomplete="one-time-code"]', 'input[inputmode="numeric"]',
'input[type="text"]', 'input[type="number"]',
'input[placeholder*="code" i]', 'input[placeholder*="digit" i]',
]:
try:
if page.locator(sel).count() > 0:
otp_field = sel
break
except Exception:
pass
if otp_field:
logger.info(f"Filling OTP field: {otp_field}")
page.fill(otp_field, otp_code)
time.sleep(0.5)
for sel in ['button[type="submit"]', 'button:has-text("Verify")',
'button:has-text("Continue")', 'button:has-text("Confirm")',
'button:has-text("Submit")']:
try:
if page.locator(sel).count() > 0:
page.click(sel)
logger.info(f"Submitted OTP via: {sel}")
break
except Exception:
pass
else:
logger.warning("OTP input field not found — typing code directly")
page.keyboard.type(otp_code)
page.keyboard.press('Enter')
time.sleep(5)
logger.info(f"After 2FA URL: {page.url}")
# Step 5: Verify login — should be back on marinetraffic.com
final_url = page.url
final_content = page.content()
is_logged_in = (
'marinetraffic.com' in final_url and
'login' not in final_url and
'auth.kpler.com' not in final_url
)
if not is_logged_in:
# Check mtGlobal.isLoggedIn JS variable
try:
js_logged_in = page.evaluate('window.mtGlobal && window.mtGlobal.isLoggedIn')
if js_logged_in:
is_logged_in = True
logger.info("JS confirms: isLoggedIn = true")
except Exception:
pass
logger.info(f"Final URL: {final_url}")
logger.info(f"Login success: {is_logged_in}")
if not is_logged_in:
page.screenshot(path='mt_login_debug.png')
logger.warning("May not be logged in — extracting cookies anyway")
# Step 6: Extract ALL cookies from both domains
all_cookies = context.cookies()
mt_cookies = [c for c in all_cookies
if any(d in c.get('domain', '')
for d in ['marinetraffic', 'kpler', 'auth0'])]
logger.info(f"Total cookies: {len(all_cookies)}, MT/Kpler cookies: {len(mt_cookies)}")
for c in mt_cookies[:15]:
logger.info(f" {c['domain']}: {c['name']} = {str(c['value'])[:40]}")
return mt_cookies
except Exception as e:
logger.error(f"Playwright error: {e}")
import traceback
traceback.print_exc()
try:
page.screenshot(path='mt_error_debug.png')
except Exception:
pass
return []
finally:
browser.close()
def main():
ap = argparse.ArgumentParser(description='MarineTraffic/Kpler Cookie Extractor')
ap.add_argument('--login', default=os.environ.get('MT_LOGIN', ''))
ap.add_argument('--password', default=os.environ.get('MT_PASSWORD', ''))
ap.add_argument('--totp-secret', default=os.environ.get('MT_TOTP_SECRET', ''))
ap.add_argument('--headed', action='store_true', help='Show browser window')
ap.add_argument('--output', default=COOKIES_FILE)
args = ap.parse_args()
if not args.login or not args.password:
print("ERROR: --login and --password required")
sys.exit(1)
cookies = extract_cookies(
email=args.login,
password=args.password,
totp_secret=args.totp_secret or None,
headed=args.headed,
)
if cookies:
with open(args.output, 'w') as f:
json.dump(cookies, f, indent=2)
logger.info(f"Saved {len(cookies)} cookies to {args.output}")
print(f"\nSUCCESS: {len(cookies)} cookies saved to {args.output}")
else:
logger.error("No cookies extracted.")
print("\nFAILED — try: python mt_cookie_extractor.py --headed (to see browser)")
sys.exit(1)
if __name__ == '__main__':
main()