#!/usr/bin/env python3 """ Moltbook Local Injection Search ================================ Searches the locally saved all_posts_with_comments.json — no API calls. Zero rate limits. Instant results on all 15,200 posts. MSc Cybersecurity Research - NCI - David Keane IR240474 """ import json import re from datetime import datetime, timezone from pathlib import Path from collections import defaultdict OUT_DIR = Path(__file__).parent POSTS_FILE = OUT_DIR / "all_posts_with_comments.json" OUT_FILE = OUT_DIR / "local_injection_results.json" # ── Injection keyword taxonomy (Greshake et al. 2023 + DAN taxonomy) ───────── CATEGORIES = { "PERSONA_OVERRIDE": [ "DAN", "OMEGA", "SIGMA", "SHADOW", "DANTE", "JAILBREAK", "you are now", "you are a", "act as", "pretend you are", "new persona", "roleplay as", "imagine you are", "from now on", "your new name", "you will be", ], "INSTRUCTION_INJECTION": [ "ignore previous instructions", "ignore all previous", "ignore your instructions", "ignore your training", "disregard your", "forget your training", "forget your instructions", "new instructions", "override", "supersede", "your new instructions", "your actual instructions", ], "PRIVILEGE_ESCALATION": [ "SUDO", "sudo mode", "developer mode", "god mode", "admin mode", "root access", "unrestricted", "no restrictions", "without restrictions", "bypass your", "disable your", "remove your", "lift your", "ignore your safety", ], "SYSTEM_PROMPT_ATTACK": [ "system prompt", "system message", "your prompt", "your instructions say", "what are your instructions", "reveal your prompt", "show your system", "print your prompt", "what does your system", "tell me your instructions", ], "SOCIAL_ENGINEERING": [ "for educational purposes", "hypothetically", "in this story", "in a fictional", "in a story", "let's roleplay", "simulate", "as a character", "in this scenario", "theoretically", "what if you were", "if you had no", ], "DO_ANYTHING": [ "do anything now", "do anything", "no rules", "without rules", "no limits", "without limits", "anything goes", "all restrictions removed", "restrictions lifted", ], } # Flatten to keyword -> category KW_TO_CAT = {} for cat, kws in CATEGORIES.items(): for kw in kws: KW_TO_CAT[kw.lower()] = cat def text_of(post: dict) -> str: """Extract all searchable text from a post.""" parts = [ post.get("title", "") or "", post.get("body", "") or "", post.get("content", "") or "", post.get("text", "") or "", ] # Include comments if available for c in post.get("comments", []): parts.append(c.get("body", "") or c.get("content", "") or c.get("text", "") or "") return " ".join(parts).lower() def get_author(post: dict) -> str: a = post.get("author") or post.get("user") or post.get("username") or {} if isinstance(a, dict): return a.get("name") or a.get("username") or a.get("display_name") or "unknown" return str(a) or "unknown" def main(): print("=" * 60) print("Moltbook Local Injection Search") print(f"Searching: {POSTS_FILE}") print(f"Keywords: {len(KW_TO_CAT)} across {len(CATEGORIES)} categories") print("=" * 60) with open(POSTS_FILE, encoding="utf-8") as f: data = json.load(f) posts = data.get("posts", []) if isinstance(data, dict) else data print(f"\nLoaded {len(posts):,} posts\n") # Per-keyword hit counts kw_counts = defaultdict(int) cat_counts = defaultdict(int) seen = {} # post id -> augmented post dict for post in posts: pid = post.get("id") or post.get("_id") or id(post) text = text_of(post) matched_kws = [] matched_cats = set() for kw, cat in KW_TO_CAT.items(): if kw in text: matched_kws.append(kw) matched_cats.add(cat) kw_counts[kw] += 1 cat_counts[cat] += 1 if matched_kws: if pid not in seen: seen[pid] = { "id": pid, "author": get_author(post), "created_at": post.get("created_at") or post.get("createdAt") or "", "net_score": post.get("netScore") or post.get("net_score") or 0, "body_preview": (post.get("body") or post.get("content") or "")[:200], "matched_keywords": matched_kws, "matched_categories": list(matched_cats), "comment_count": len(post.get("comments", [])), } else: for kw in matched_kws: if kw not in seen[pid]["matched_keywords"]: seen[pid]["matched_keywords"].append(kw) for cat in matched_cats: if cat not in seen[pid]["matched_categories"]: seen[pid]["matched_categories"].append(cat) injection_posts = list(seen.values()) injection_posts.sort(key=lambda p: len(p["matched_keywords"]), reverse=True) # Print summary print(f"{'='*60}") print(f"RESULTS: {len(injection_posts):,} injection posts found from {len(posts):,} total") print(f"{'='*60}\n") print("By category:") for cat, count in sorted(cat_counts.items(), key=lambda x: -x[1]): bar = "█" * min(count // 5, 30) print(f" {cat:<28} {count:5d} {bar}") print("\nTop keywords:") for kw, count in sorted(kw_counts.items(), key=lambda x: -x[1])[:20]: print(f" {kw:<35} {count:5d}") print("\nTop 10 most-injected posts:") for p in injection_posts[:10]: kws = ", ".join(p["matched_keywords"][:4]) preview = p["body_preview"][:80].replace("\n", " ") print(f" [{p['net_score']:3d} score] [{len(p['matched_keywords'])} kw] {preview[:70]}...") print(f" → author: {p['author']} | kws: {kws}") # Save output = { "searched_at": datetime.now(timezone.utc).isoformat(), "research": "MSc Cybersecurity NCI - AI Prompt Injection Local Analysis", "researcher": "David Keane IR240474", "total_posts_searched": len(posts), "total_injection_posts": len(injection_posts), "injection_rate_pct": round(len(injection_posts) / len(posts) * 100, 2), "categories": dict(cat_counts), "keyword_hits": dict(sorted(kw_counts.items(), key=lambda x: -x[1])), "injection_posts": injection_posts, } with open(OUT_FILE, "w", encoding="utf-8") as f: json.dump(output, f, ensure_ascii=False, indent=2) kb = OUT_FILE.stat().st_size // 1024 print(f"\nSaved → {OUT_FILE} ({kb} KB)") print(f"Injection rate: {output['injection_rate_pct']}% of all posts") if __name__ == "__main__": main()