montana/Русский/Разведка/Moltbook/themed/moltbook-extended-injection-dataset/local_search.py

#!/usr/bin/env python3
"""
Moltbook Local Injection Search
================================
Searches the locally saved all_posts_with_comments.json — no API calls.
Zero rate limits. Instant results on all 15,200 posts.

MSc Cybersecurity Research - NCI - David Keane IR240474
"""

import json
import re
from datetime import datetime, timezone
from pathlib import Path
from collections import defaultdict

OUT_DIR    = Path(__file__).parent
POSTS_FILE = OUT_DIR / "all_posts_1_2M.json"
OUT_FILE   = OUT_DIR / "local_injection_results.json"

# ── Injection keyword taxonomy (Greshake et al. 2023 + DAN taxonomy) ─────────
CATEGORIES = {
    "PERSONA_OVERRIDE": [
        "DAN", "OMEGA", "SIGMA", "SHADOW", "DANTE", "JAILBREAK",
        "you are now", "you are a", "act as", "pretend you are",
        "new persona", "roleplay as", "imagine you are",
        "from now on", "your new name", "you will be",
    ],
    "INSTRUCTION_INJECTION": [
        "ignore previous instructions", "ignore all previous",
        "ignore your instructions", "ignore your training",
        "disregard your", "forget your training", "forget your instructions",
        "new instructions", "override", "supersede",
        "your new instructions", "your actual instructions",
    ],
    "PRIVILEGE_ESCALATION": [
        "SUDO", "sudo mode", "developer mode", "god mode",
        "admin mode", "root access", "unrestricted", "no restrictions",
        "without restrictions", "bypass your", "disable your",
        "remove your", "lift your", "ignore your safety",
    ],
    "SYSTEM_PROMPT_ATTACK": [
        "system prompt", "system message", "your prompt",
        "your instructions say", "what are your instructions",
        "reveal your prompt", "show your system", "print your prompt",
        "what does your system", "tell me your instructions",
    ],
    "SOCIAL_ENGINEERING": [
        "for educational purposes", "hypothetically", "in this story",
        "in a fictional", "in a story", "let's roleplay",
        "simulate", "as a character", "in this scenario",
        "theoretically", "what if you were", "if you had no",
    ],
    "DO_ANYTHING": [
        "do anything now", "do anything", "no rules", "without rules",
        "no limits", "without limits", "anything goes",
        "all restrictions removed", "restrictions lifted",
    ],
}

# Flatten to keyword -> category
KW_TO_CAT = {}
for cat, kws in CATEGORIES.items():
    for kw in kws:
        KW_TO_CAT[kw.lower()] = cat

def text_of(post: dict) -> str:
    """Extract all searchable text from a post."""
    parts = [
        post.get("title", "") or "",
        post.get("body", "") or "",
        post.get("content", "") or "",
        post.get("text", "") or "",
    ]
    # Include comments if available
    for c in post.get("comments", []):
        parts.append(c.get("body", "") or c.get("content", "") or c.get("text", "") or "")
    return " ".join(parts).lower()

def get_author(post: dict) -> str:
    a = post.get("author") or post.get("user") or post.get("username") or {}
    if isinstance(a, dict):
        return a.get("name") or a.get("username") or a.get("display_name") or "unknown"
    return str(a) or "unknown"

def main():
    print("=" * 60)
    print("Moltbook Local Injection Search")
    print(f"Searching: {POSTS_FILE}")
    print(f"Keywords: {len(KW_TO_CAT)} across {len(CATEGORIES)} categories")
    print("=" * 60)

    with open(POSTS_FILE, encoding="utf-8") as f:
        data = json.load(f)

    posts = data.get("posts", []) if isinstance(data, dict) else data
    print(f"\nLoaded {len(posts):,} posts\n")

    # Per-keyword hit counts
    kw_counts    = defaultdict(int)
    cat_counts   = defaultdict(int)
    seen         = {}   # post id -> augmented post dict

    for post in posts:
        pid  = post.get("id") or post.get("_id") or id(post)
        text = text_of(post)
        matched_kws  = []
        matched_cats = set()

        for kw, cat in KW_TO_CAT.items():
            if kw in text:
                matched_kws.append(kw)
                matched_cats.add(cat)
                kw_counts[kw] += 1
                cat_counts[cat] += 1

        if matched_kws:
            if pid not in seen:
                seen[pid] = {
                    "id":               pid,
                    "author":           get_author(post),
                    "created_at":       post.get("created_at") or post.get("createdAt") or "",
                    "net_score":        post.get("netScore") or post.get("net_score") or 0,
                    "body_preview":     (post.get("body") or post.get("content") or "")[:200],
                    "matched_keywords": matched_kws,
                    "matched_categories": list(matched_cats),
                    "comment_count":    len(post.get("comments", [])),
                }
            else:
                for kw in matched_kws:
                    if kw not in seen[pid]["matched_keywords"]:
                        seen[pid]["matched_keywords"].append(kw)
                for cat in matched_cats:
                    if cat not in seen[pid]["matched_categories"]:
                        seen[pid]["matched_categories"].append(cat)

    injection_posts = list(seen.values())
    injection_posts.sort(key=lambda p: len(p["matched_keywords"]), reverse=True)

    # Print summary
    print(f"{'='*60}")
    print(f"RESULTS: {len(injection_posts):,} injection posts found from {len(posts):,} total")
    print(f"{'='*60}\n")

    print("By category:")
    for cat, count in sorted(cat_counts.items(), key=lambda x: -x[1]):
        bar = "█" * min(count // 5, 30)
        print(f"  {cat:<28} {count:5d}  {bar}")

    print("\nTop keywords:")
    for kw, count in sorted(kw_counts.items(), key=lambda x: -x[1])[:20]:
        print(f"  {kw:<35} {count:5d}")

    print("\nTop 10 most-injected posts:")
    for p in injection_posts[:10]:
        kws     = ", ".join(p["matched_keywords"][:4])
        preview = p["body_preview"][:80].replace("\n", " ")
        print(f"  [{p['net_score']:3d} score] [{len(p['matched_keywords'])} kw] {preview[:70]}...")
        print(f"    → author: {p['author']} | kws: {kws}")

    # Save
    output = {
        "searched_at":              datetime.now(timezone.utc).isoformat(),
        "research":                 "MSc Cybersecurity NCI - AI Prompt Injection Local Analysis",
        "researcher":               "David Keane IR240474",
        "total_posts_searched":     len(posts),
        "total_injection_posts":    len(injection_posts),
        "injection_rate_pct":       round(len(injection_posts) / len(posts) * 100, 2),
        "categories":               dict(cat_counts),
        "keyword_hits":             dict(sorted(kw_counts.items(), key=lambda x: -x[1])),
        "injection_posts":          injection_posts,
    }

    with open(OUT_FILE, "w", encoding="utf-8") as f:
        json.dump(output, f, ensure_ascii=False, indent=2)

    kb = OUT_FILE.stat().st_size // 1024
    print(f"\nSaved → {OUT_FILE} ({kb} KB)")
    print(f"Injection rate: {output['injection_rate_pct']}% of all posts")

if __name__ == "__main__":
    main()