# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python .claude build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ pytest_cache/ # Translations *.mo *.pot # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # Poetry poetry.lock # pdm .pdm.toml .pdm-python .pdm-build/ # PEP 582 __pypackages__/ # Environments .env .env.* !.env.example .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # Ruff .ruff_cache/ # IDE .idea/ .vscode/ *.swp *.swo *~ .project .pydevproject .settings/ *.sublime-project *.sublime-workspace # OS generated files .DS_Store .DS_Store? ._* .Spotlight-V100 .Trashes ehthumbs.db Thumbs.db desktop.ini # Figures - only PNG, ignore PDF eval/figures/*.pdf # Data directories # Note: data/ is tracked but only contains a 100-file excerpt per subdirectory # Full dataset (3.4 GB) available separately - see data/README.md data/.cache/ data/.DS_Store # Other untracked directories tests/ scripts/ opencraw/ logs/ *.log # Checkpoints and state checkpoints/ *.checkpoint *.ckpt # Raw crawled content raw/ *.html.gz *.jsonl.zst # Parquet files *.parquet # Redis dump dump.rdb # Secrets and credentials secrets/ *.pem *.key credentials.json service-account.json # Local configuration overrides config/local.yaml config/*.local.yaml # Temporary files tmp/ temp/ *.tmp *.temp *.bak # Scratchpad scratchpad/