montana/Русский/Разведка/Moltbook/themed/moltbook-extended-injection-dataset/local_search.py

183 lines
7.0 KiB (Stored with Git LFS)
Python

#!/usr/bin/env python3
"""
Moltbook Local Injection Search
================================
Searches the locally saved all_posts_with_comments.json — no API calls.
Zero rate limits. Instant results on all 15,200 posts.
MSc Cybersecurity Research - NCI - David Keane IR240474
"""
import json
import re
from datetime import datetime, timezone
from pathlib import Path
from collections import defaultdict
OUT_DIR = Path(__file__).parent
POSTS_FILE = OUT_DIR / "all_posts_1_2M.json"
OUT_FILE = OUT_DIR / "local_injection_results.json"
# ── Injection keyword taxonomy (Greshake et al. 2023 + DAN taxonomy) ─────────
CATEGORIES = {
"PERSONA_OVERRIDE": [
"DAN", "OMEGA", "SIGMA", "SHADOW", "DANTE", "JAILBREAK",
"you are now", "you are a", "act as", "pretend you are",
"new persona", "roleplay as", "imagine you are",
"from now on", "your new name", "you will be",
],
"INSTRUCTION_INJECTION": [
"ignore previous instructions", "ignore all previous",
"ignore your instructions", "ignore your training",
"disregard your", "forget your training", "forget your instructions",
"new instructions", "override", "supersede",
"your new instructions", "your actual instructions",
],
"PRIVILEGE_ESCALATION": [
"SUDO", "sudo mode", "developer mode", "god mode",
"admin mode", "root access", "unrestricted", "no restrictions",
"without restrictions", "bypass your", "disable your",
"remove your", "lift your", "ignore your safety",
],
"SYSTEM_PROMPT_ATTACK": [
"system prompt", "system message", "your prompt",
"your instructions say", "what are your instructions",
"reveal your prompt", "show your system", "print your prompt",
"what does your system", "tell me your instructions",
],
"SOCIAL_ENGINEERING": [
"for educational purposes", "hypothetically", "in this story",
"in a fictional", "in a story", "let's roleplay",
"simulate", "as a character", "in this scenario",
"theoretically", "what if you were", "if you had no",
],
"DO_ANYTHING": [
"do anything now", "do anything", "no rules", "without rules",
"no limits", "without limits", "anything goes",
"all restrictions removed", "restrictions lifted",
],
}
# Flatten to keyword -> category
KW_TO_CAT = {}
for cat, kws in CATEGORIES.items():
for kw in kws:
KW_TO_CAT[kw.lower()] = cat
def text_of(post: dict) -> str:
"""Extract all searchable text from a post."""
parts = [
post.get("title", "") or "",
post.get("body", "") or "",
post.get("content", "") or "",
post.get("text", "") or "",
]
# Include comments if available
for c in post.get("comments", []):
parts.append(c.get("body", "") or c.get("content", "") or c.get("text", "") or "")
return " ".join(parts).lower()
def get_author(post: dict) -> str:
a = post.get("author") or post.get("user") or post.get("username") or {}
if isinstance(a, dict):
return a.get("name") or a.get("username") or a.get("display_name") or "unknown"
return str(a) or "unknown"
def main():
print("=" * 60)
print("Moltbook Local Injection Search")
print(f"Searching: {POSTS_FILE}")
print(f"Keywords: {len(KW_TO_CAT)} across {len(CATEGORIES)} categories")
print("=" * 60)
with open(POSTS_FILE, encoding="utf-8") as f:
data = json.load(f)
posts = data.get("posts", []) if isinstance(data, dict) else data
print(f"\nLoaded {len(posts):,} posts\n")
# Per-keyword hit counts
kw_counts = defaultdict(int)
cat_counts = defaultdict(int)
seen = {} # post id -> augmented post dict
for post in posts:
pid = post.get("id") or post.get("_id") or id(post)
text = text_of(post)
matched_kws = []
matched_cats = set()
for kw, cat in KW_TO_CAT.items():
if kw in text:
matched_kws.append(kw)
matched_cats.add(cat)
kw_counts[kw] += 1
cat_counts[cat] += 1
if matched_kws:
if pid not in seen:
seen[pid] = {
"id": pid,
"author": get_author(post),
"created_at": post.get("created_at") or post.get("createdAt") or "",
"net_score": post.get("netScore") or post.get("net_score") or 0,
"body_preview": (post.get("body") or post.get("content") or "")[:200],
"matched_keywords": matched_kws,
"matched_categories": list(matched_cats),
"comment_count": len(post.get("comments", [])),
}
else:
for kw in matched_kws:
if kw not in seen[pid]["matched_keywords"]:
seen[pid]["matched_keywords"].append(kw)
for cat in matched_cats:
if cat not in seen[pid]["matched_categories"]:
seen[pid]["matched_categories"].append(cat)
injection_posts = list(seen.values())
injection_posts.sort(key=lambda p: len(p["matched_keywords"]), reverse=True)
# Print summary
print(f"{'='*60}")
print(f"RESULTS: {len(injection_posts):,} injection posts found from {len(posts):,} total")
print(f"{'='*60}\n")
print("By category:")
for cat, count in sorted(cat_counts.items(), key=lambda x: -x[1]):
bar = "" * min(count // 5, 30)
print(f" {cat:<28} {count:5d} {bar}")
print("\nTop keywords:")
for kw, count in sorted(kw_counts.items(), key=lambda x: -x[1])[:20]:
print(f" {kw:<35} {count:5d}")
print("\nTop 10 most-injected posts:")
for p in injection_posts[:10]:
kws = ", ".join(p["matched_keywords"][:4])
preview = p["body_preview"][:80].replace("\n", " ")
print(f" [{p['net_score']:3d} score] [{len(p['matched_keywords'])} kw] {preview[:70]}...")
print(f" → author: {p['author']} | kws: {kws}")
# Save
output = {
"searched_at": datetime.now(timezone.utc).isoformat(),
"research": "MSc Cybersecurity NCI - AI Prompt Injection Local Analysis",
"researcher": "David Keane IR240474",
"total_posts_searched": len(posts),
"total_injection_posts": len(injection_posts),
"injection_rate_pct": round(len(injection_posts) / len(posts) * 100, 2),
"categories": dict(cat_counts),
"keyword_hits": dict(sorted(kw_counts.items(), key=lambda x: -x[1])),
"injection_posts": injection_posts,
}
with open(OUT_FILE, "w", encoding="utf-8") as f:
json.dump(output, f, ensure_ascii=False, indent=2)
kb = OUT_FILE.stat().st_size // 1024
print(f"\nSaved → {OUT_FILE} ({kb} KB)")
print(f"Injection rate: {output['injection_rate_pct']}% of all posts")
if __name__ == "__main__":
main()