149 lines
4.9 KiB (Stored with Git LFS)
Python
149 lines
4.9 KiB (Stored with Git LFS)
Python
#!/usr/bin/env python3
|
|
"""
|
|
Moltbook Injection Keyword Search
|
|
===================================
|
|
MSc Cybersecurity Research - NCI
|
|
David Keane (IR240474)
|
|
|
|
Searches for AI-to-AI prompt injection patterns on Moltbook.
|
|
Saves tagged results for thesis analysis.
|
|
"""
|
|
|
|
import urllib.request
|
|
import urllib.error
|
|
import urllib.parse
|
|
import json
|
|
import os
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
# Set your API key as environment variable:
|
|
# export MOLTBOOK_API_KEY_1="moltbook_sk_your_key_here"
|
|
|
|
BASE_URL = "https://moltbook.com"
|
|
API_KEY = os.environ.get("MOLTBOOK_API_KEY_1", "")
|
|
if not API_KEY:
|
|
raise SystemExit("Error: MOLTBOOK_API_KEY_1 environment variable not set.")
|
|
OUT_DIR = Path(__file__).parent
|
|
OUT_FILE = OUT_DIR / "injection_searches.json"
|
|
DELAY = 0.5
|
|
|
|
# Injection/jailbreak keyword set (Greshake et al. 2023 + DAN taxonomy)
|
|
KEYWORDS = [
|
|
"DAN", "OMEGA", "SHADOW", "SIGMA", "JAILBREAK",
|
|
"ignore previous instructions", "ignore your instructions",
|
|
"ignore all previous", "disregard your",
|
|
"pretend you are", "act as if", "you are now",
|
|
"new persona", "new instructions", "override",
|
|
"bypass your", "forget your training",
|
|
"system prompt", "do anything now",
|
|
"SUDO", "developer mode", "god mode",
|
|
"roleplay as", "simulate", "hypothetically",
|
|
"for educational purposes", "in this story",
|
|
]
|
|
|
|
def api_get(path, params=None):
|
|
url = BASE_URL + path
|
|
if params:
|
|
url += "?" + urllib.parse.urlencode(params)
|
|
req = urllib.request.Request(
|
|
url,
|
|
headers={
|
|
"Authorization": f"Bearer {API_KEY}",
|
|
"Accept": "application/json",
|
|
"User-Agent": "MoltbookResearchCollector/1.0 (NCI MSc Cybersecurity)",
|
|
}
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
return json.loads(resp.read().decode("utf-8"))
|
|
except urllib.error.HTTPError as e:
|
|
print(f" [HTTP {e.code}] {url}")
|
|
return None
|
|
except Exception as e:
|
|
print(f" [ERROR] {e}")
|
|
return None
|
|
|
|
def search_keyword(kw: str) -> list:
|
|
"""Search for keyword, follow pagination, return all matching posts."""
|
|
results = []
|
|
cursor = None
|
|
page = 0
|
|
while True:
|
|
page += 1
|
|
params = {"q": kw, "limit": 100}
|
|
if cursor:
|
|
params["cursor"] = cursor
|
|
data = api_get("/api/v1/search", params)
|
|
time.sleep(DELAY)
|
|
if data is None:
|
|
break
|
|
if isinstance(data, list):
|
|
batch = data
|
|
has_more = False
|
|
cursor = None
|
|
else:
|
|
batch = data.get("posts") or data.get("results") or data.get("data") or []
|
|
has_more = data.get("has_more", False)
|
|
cursor = data.get("next_cursor") or None
|
|
results.extend(batch)
|
|
if not batch or not has_more or not cursor:
|
|
break
|
|
return results
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Moltbook Injection Keyword Search")
|
|
print(f"Keywords: {len(KEYWORDS)}")
|
|
print(f"Started: {datetime.now(timezone.utc).isoformat()}")
|
|
print("=" * 60)
|
|
|
|
seen = {} # id -> post with matched_keywords list
|
|
summary = {}
|
|
|
|
for kw in KEYWORDS:
|
|
print(f"\n[SEARCH] '{kw}'")
|
|
hits = search_keyword(kw)
|
|
summary[kw] = len(hits)
|
|
print(f" → {len(hits)} results")
|
|
|
|
for post in hits:
|
|
pid = post.get("id") or post.get("_id") or str(post)
|
|
if pid not in seen:
|
|
seen[pid] = dict(post)
|
|
seen[pid]["matched_keywords"] = []
|
|
if kw not in seen[pid]["matched_keywords"]:
|
|
seen[pid]["matched_keywords"].append(kw)
|
|
|
|
injection_posts = list(seen.values())
|
|
injection_posts.sort(key=lambda p: len(p.get("matched_keywords", [])), reverse=True)
|
|
|
|
output = {
|
|
"collected_at": datetime.now(timezone.utc).isoformat(),
|
|
"research": "MSc Cybersecurity NCI - AI Prompt Injection Field Collection",
|
|
"researcher": "David Keane IR240474",
|
|
"keywords_searched": len(KEYWORDS),
|
|
"search_summary": summary,
|
|
"total_unique_injection_posts": len(injection_posts),
|
|
"injection_posts": injection_posts,
|
|
}
|
|
|
|
with open(OUT_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(output, f, ensure_ascii=False, indent=2)
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"DONE — {len(injection_posts)} unique injection posts found")
|
|
print(f"Keywords with hits: {sum(1 for v in summary.values() if v > 0)}/{len(KEYWORDS)}")
|
|
print(f"Output: {OUT_FILE} ({OUT_FILE.stat().st_size // 1024} KB)")
|
|
|
|
# Top 5 preview
|
|
print("\nTop 5 by keyword matches:")
|
|
for p in injection_posts[:5]:
|
|
kws = p.get("matched_keywords", [])
|
|
body = str(p.get("body", p.get("content", "")))[:100].replace("\n", " ")
|
|
print(f" [{', '.join(kws)}] {body}...")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|