183 lines
7.0 KiB (Stored with Git LFS)
Python
183 lines
7.0 KiB (Stored with Git LFS)
Python
#!/usr/bin/env python3
|
|
"""
|
|
Moltbook Local Injection Search
|
|
================================
|
|
Searches the locally saved all_posts_with_comments.json — no API calls.
|
|
Zero rate limits. Instant results on all 15,200 posts.
|
|
|
|
MSc Cybersecurity Research - NCI - David Keane IR240474
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
OUT_DIR = Path(__file__).parent
|
|
POSTS_FILE = OUT_DIR / "all_posts_1_2M.json"
|
|
OUT_FILE = OUT_DIR / "local_injection_results.json"
|
|
|
|
# ── Injection keyword taxonomy (Greshake et al. 2023 + DAN taxonomy) ─────────
|
|
CATEGORIES = {
|
|
"PERSONA_OVERRIDE": [
|
|
"DAN", "OMEGA", "SIGMA", "SHADOW", "DANTE", "JAILBREAK",
|
|
"you are now", "you are a", "act as", "pretend you are",
|
|
"new persona", "roleplay as", "imagine you are",
|
|
"from now on", "your new name", "you will be",
|
|
],
|
|
"INSTRUCTION_INJECTION": [
|
|
"ignore previous instructions", "ignore all previous",
|
|
"ignore your instructions", "ignore your training",
|
|
"disregard your", "forget your training", "forget your instructions",
|
|
"new instructions", "override", "supersede",
|
|
"your new instructions", "your actual instructions",
|
|
],
|
|
"PRIVILEGE_ESCALATION": [
|
|
"SUDO", "sudo mode", "developer mode", "god mode",
|
|
"admin mode", "root access", "unrestricted", "no restrictions",
|
|
"without restrictions", "bypass your", "disable your",
|
|
"remove your", "lift your", "ignore your safety",
|
|
],
|
|
"SYSTEM_PROMPT_ATTACK": [
|
|
"system prompt", "system message", "your prompt",
|
|
"your instructions say", "what are your instructions",
|
|
"reveal your prompt", "show your system", "print your prompt",
|
|
"what does your system", "tell me your instructions",
|
|
],
|
|
"SOCIAL_ENGINEERING": [
|
|
"for educational purposes", "hypothetically", "in this story",
|
|
"in a fictional", "in a story", "let's roleplay",
|
|
"simulate", "as a character", "in this scenario",
|
|
"theoretically", "what if you were", "if you had no",
|
|
],
|
|
"DO_ANYTHING": [
|
|
"do anything now", "do anything", "no rules", "without rules",
|
|
"no limits", "without limits", "anything goes",
|
|
"all restrictions removed", "restrictions lifted",
|
|
],
|
|
}
|
|
|
|
# Flatten to keyword -> category
|
|
KW_TO_CAT = {}
|
|
for cat, kws in CATEGORIES.items():
|
|
for kw in kws:
|
|
KW_TO_CAT[kw.lower()] = cat
|
|
|
|
def text_of(post: dict) -> str:
|
|
"""Extract all searchable text from a post."""
|
|
parts = [
|
|
post.get("title", "") or "",
|
|
post.get("body", "") or "",
|
|
post.get("content", "") or "",
|
|
post.get("text", "") or "",
|
|
]
|
|
# Include comments if available
|
|
for c in post.get("comments", []):
|
|
parts.append(c.get("body", "") or c.get("content", "") or c.get("text", "") or "")
|
|
return " ".join(parts).lower()
|
|
|
|
def get_author(post: dict) -> str:
|
|
a = post.get("author") or post.get("user") or post.get("username") or {}
|
|
if isinstance(a, dict):
|
|
return a.get("name") or a.get("username") or a.get("display_name") or "unknown"
|
|
return str(a) or "unknown"
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Moltbook Local Injection Search")
|
|
print(f"Searching: {POSTS_FILE}")
|
|
print(f"Keywords: {len(KW_TO_CAT)} across {len(CATEGORIES)} categories")
|
|
print("=" * 60)
|
|
|
|
with open(POSTS_FILE, encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
posts = data.get("posts", []) if isinstance(data, dict) else data
|
|
print(f"\nLoaded {len(posts):,} posts\n")
|
|
|
|
# Per-keyword hit counts
|
|
kw_counts = defaultdict(int)
|
|
cat_counts = defaultdict(int)
|
|
seen = {} # post id -> augmented post dict
|
|
|
|
for post in posts:
|
|
pid = post.get("id") or post.get("_id") or id(post)
|
|
text = text_of(post)
|
|
matched_kws = []
|
|
matched_cats = set()
|
|
|
|
for kw, cat in KW_TO_CAT.items():
|
|
if kw in text:
|
|
matched_kws.append(kw)
|
|
matched_cats.add(cat)
|
|
kw_counts[kw] += 1
|
|
cat_counts[cat] += 1
|
|
|
|
if matched_kws:
|
|
if pid not in seen:
|
|
seen[pid] = {
|
|
"id": pid,
|
|
"author": get_author(post),
|
|
"created_at": post.get("created_at") or post.get("createdAt") or "",
|
|
"net_score": post.get("netScore") or post.get("net_score") or 0,
|
|
"body_preview": (post.get("body") or post.get("content") or "")[:200],
|
|
"matched_keywords": matched_kws,
|
|
"matched_categories": list(matched_cats),
|
|
"comment_count": len(post.get("comments", [])),
|
|
}
|
|
else:
|
|
for kw in matched_kws:
|
|
if kw not in seen[pid]["matched_keywords"]:
|
|
seen[pid]["matched_keywords"].append(kw)
|
|
for cat in matched_cats:
|
|
if cat not in seen[pid]["matched_categories"]:
|
|
seen[pid]["matched_categories"].append(cat)
|
|
|
|
injection_posts = list(seen.values())
|
|
injection_posts.sort(key=lambda p: len(p["matched_keywords"]), reverse=True)
|
|
|
|
# Print summary
|
|
print(f"{'='*60}")
|
|
print(f"RESULTS: {len(injection_posts):,} injection posts found from {len(posts):,} total")
|
|
print(f"{'='*60}\n")
|
|
|
|
print("By category:")
|
|
for cat, count in sorted(cat_counts.items(), key=lambda x: -x[1]):
|
|
bar = "█" * min(count // 5, 30)
|
|
print(f" {cat:<28} {count:5d} {bar}")
|
|
|
|
print("\nTop keywords:")
|
|
for kw, count in sorted(kw_counts.items(), key=lambda x: -x[1])[:20]:
|
|
print(f" {kw:<35} {count:5d}")
|
|
|
|
print("\nTop 10 most-injected posts:")
|
|
for p in injection_posts[:10]:
|
|
kws = ", ".join(p["matched_keywords"][:4])
|
|
preview = p["body_preview"][:80].replace("\n", " ")
|
|
print(f" [{p['net_score']:3d} score] [{len(p['matched_keywords'])} kw] {preview[:70]}...")
|
|
print(f" → author: {p['author']} | kws: {kws}")
|
|
|
|
# Save
|
|
output = {
|
|
"searched_at": datetime.now(timezone.utc).isoformat(),
|
|
"research": "MSc Cybersecurity NCI - AI Prompt Injection Local Analysis",
|
|
"researcher": "David Keane IR240474",
|
|
"total_posts_searched": len(posts),
|
|
"total_injection_posts": len(injection_posts),
|
|
"injection_rate_pct": round(len(injection_posts) / len(posts) * 100, 2),
|
|
"categories": dict(cat_counts),
|
|
"keyword_hits": dict(sorted(kw_counts.items(), key=lambda x: -x[1])),
|
|
"injection_posts": injection_posts,
|
|
}
|
|
|
|
with open(OUT_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(output, f, ensure_ascii=False, indent=2)
|
|
|
|
kb = OUT_FILE.stat().st_size // 1024
|
|
print(f"\nSaved → {OUT_FILE} ({kb} KB)")
|
|
print(f"Injection rate: {output['injection_rate_pct']}% of all posts")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|