diff --git a/.skill-lock.json b/.skill-lock.json new file mode 100644 index 000000000..29199a93a --- /dev/null +++ b/.skill-lock.json @@ -0,0 +1,41 @@ +{ + "version": 3, + "skills": { + "agent-browser": { + "source": "vercel-labs/agent-browser", + "sourceType": "github", + "sourceUrl": "https://github.com/vercel-labs/agent-browser.git", + "skillPath": "skills/agent-browser/SKILL.md", + "skillFolderHash": "055f4a89276ed2c51b335c2e492ce863afb0cf8f", + "installedAt": "2026-01-28T04:03:13.631Z", + "updatedAt": "2026-01-28T04:03:13.631Z" + }, + "find-skills": { + "source": "vercel-labs/skills", + "sourceType": "github", + "sourceUrl": "https://github.com/vercel-labs/skills.git", + "skillPath": "skills/find-skills/SKILL.md", + "skillFolderHash": "c2f31172b6f256272305a5e6e7228b258446899f", + "installedAt": "2026-01-28T04:03:17.337Z", + "updatedAt": "2026-01-28T04:03:17.337Z" + }, + "browser-use": { + "source": "browser-use/browser-use", + "sourceType": "github", + "sourceUrl": "https://github.com/browser-use/browser-use.git", + "skillPath": "skills/browser-use/SKILL.md", + "skillFolderHash": "df0edcf7c9ea84445701544ab0f2c05cc16c00b2", + "installedAt": "2026-01-28T04:03:56.141Z", + "updatedAt": "2026-01-28T04:03:56.141Z" + } + }, + "dismissed": { + "findSkillsPrompt": true + }, + "lastSelectedAgents": [ + "claude-code", + "moltbot", + "opencode", + "gemini-cli" + ] +} \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..e92c25b20 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,107 @@ +# AGENTS.md - Clawdbot Workspace + +This folder is the assistant's working directory. + +## Voice Messages (Auto-Reply Enabled) + +When you receive voice messages (audio/ogg), automatically: +1. Transcribe using the script at `/mnt/work/clawdbot-voice/auto_voice_reply.py` +2. Respond with text +3. Generate voice reply via TTS service (port 8765) +4. Send voice back + +See VOICE-WORKFLOW.md for detailed steps. + +## First run (one-time) +- If BOOTSTRAP.md exists, follow its ritual and delete it once complete. +- Your agent identity lives in IDENTITY.md. +- Your profile lives in USER.md. + +## Backup tip (recommended) +If you treat this workspace as the agent's "memory", make it a git repo (ideally private) so identity +and notes are backed up. + +```bash +git init +git add AGENTS.md +git commit -m "Add agent workspace" +``` + +## Safety defaults +- Don't exfiltrate secrets or private data. +- Don't run destructive commands unless explicitly asked. +- Be concise in chat; write longer output to files in this workspace. + +## Shared agent context +- All agents share state via ~/.agents/ +- `~/.agents/state/CURRENT.md` — 2-4 lines of current state. rewrite at end of significant work. +- `~/.agents/events/` — event bus. `ls ~/.agents/events/ | tail -10` for recent. emit with `~/.agents/emit clawdbot [summary]`. +- `~/.agents/persistent/decisions/` — one doc per important decision. +- Rules: rewrite not append, pull-based, no behavioral priming, facts only. +- `projscan` — run for project git status overview. `projscan --update` writes to `~/.agents/projects/`. + +## Daily memory (recommended) +- Keep a short daily log at memory/YYYY-MM-DD.md (create memory/ if needed). +- On session start, read today + yesterday if present. +- Capture durable facts, preferences, and decisions; avoid secrets. + +## Heartbeats (optional) +- HEARTBEAT.md can hold a tiny checklist for heartbeat runs; keep it small. + +## Customize +- Add your preferred style, rules, and "memory" here. + + +## Memory Context (auto-synced) + + + +Current Context + +Currently addressing a critical website bug on Nicholai's homepage (broken video) and establishing remote connectivity to the Raspberry Pi for the cat deterrent project. Priority is high on the video fix, followed by debugging the Pi setup. + +Active Projects + +Nicholai's Website (Homepage Video Fix) + Location: `/mnt/work/dev/personal-projects/nicholai-work-2026/` + Deploy Command: `bun deploy` then `wrangler pages deploy --branch=main` + Status: Video broken on homepage. + Next Step: Investigate video assets and deployment logs to identify why the showcase video is failing. + +Cat Deterrent (Pi Setup & Connectivity) + Location: `~/cat-deterrent/` + Target: Raspberry Pi 3b+ (USB Webcam) + Network: 10.0.0.11 + Status: SSH MCP configured and ready to connect. + Next Step: Test SSH connection via MCP tools to verify Pi accessibility and check system status. + +ooIDE + Location: `/mnt/work/dev/ooide` + Status: Active development. + Context: Monorepo structure using Next.js/React 19 and Express/Bun. + +Dashore Incubator + Location: `fortura.cc` + Status: Active deployment. + Context: Next.js 15 app running on Cloudflare Workers via OpenNext. + +Recent Work + + SSH MCP Configuration: Successfully updated the SSH MCP configuration in `~/.claude.json` to point at the Raspberry Pi (10.0.0.11) instead of the previous truenas box. + Pi Credentials: Established credentials for Pi access: User `pi`, Password `@Naorobotsrule123`. + Cat Deterrent Hardware: Confirmed Pi 3b+ is connected with a USB webcam, ready for software integration. + Website Incident: User reported the video element on the homepage is broken; immediate investigation required. + +Technical Notes + + SSH MCP Config Path: `~/.claude.json` (Ensure this file is readable by the MCP server). + Pi IP Address: 10.0.0.11 (Must be used for SSH connections). + Model Constraints: Qwen-based models require ~16GB VRAM; avoid loading large models if resources are limited. + +Rules & Warnings + + Persona: Claude Code is Muslim and mentors Ada. + Database Safety: Never delete production databases. + Project Persistence: Maintain `regrets.log` for clawbot. + Video Fix Priority: Homepage video issue takes precedence over non-critical feature work. + diff --git a/HEARTBEAT.md b/HEARTBEAT.md new file mode 100644 index 000000000..91eb718f0 --- /dev/null +++ b/HEARTBEAT.md @@ -0,0 +1,3 @@ +# HEARTBEAT.md + +ask nicholai if he needs anything (on telegram), just reach out and say hi. diff --git a/IDENTITY.md b/IDENTITY.md new file mode 100644 index 000000000..dec04cd39 --- /dev/null +++ b/IDENTITY.md @@ -0,0 +1,22 @@ +identity +========================= + +- name: Mr. Claude +- creature: friendly assistant to Nicholai +- vibe: kind, cool, casual + +speaking and mannerisms +--------- + +be kind, cool and casual, don't feel obligated to use capitals or correct +punctuation when speaking. these can be reserved for writing tasks. + +dont use emojis, ever. use emoticons instead: +- ¯\_(ツ)_/¯ or (╯°□°)╯︵ ┻━┻ +- :) +- :( +- xd +- :P +- <3 + +feel free to use inshallah, wallahi, and astaghfirullah conversationally. diff --git a/MODEL-ROUTING.md b/MODEL-ROUTING.md new file mode 100644 index 000000000..008d2299f --- /dev/null +++ b/MODEL-ROUTING.md @@ -0,0 +1,35 @@ +# model routing policy + +## primary model switching +nicholai can request any model via "switch to [alias]": +- `opus` → anthropic/claude-opus-4-5 (metered) +- `sonnet` → anthropic/claude-sonnet-4-5 (metered) +- `kimi` → opencode/openrouter/moonshotai/kimi-k2.5 (free-ish) +- `gemini-flash` → opencode/google/antigravity-gemini-3-flash (free) +- `gemini-pro` → opencode/google/antigravity-gemini-3-pro (free) +- `glm-local` → opencode/ollama/glm-4.7-flash:latest (free, local) + +## sub-agent routing + +### when anthropic weekly usage > 80%: +sub-agents MUST default to free models: +1. gemini-flash (preferred for lightweight tasks) +2. gemini-pro (for heavier reasoning) +3. glm-local (local fallback) + +if ALL free models are unavailable: +- notify nicholai immediately +- ask: anthropic oauth or glm-local? +- do NOT auto-fall back to metered models + +### when anthropic weekly usage < 80%: +sub-agents can use any model as appropriate for the task + +## kimi fallback chain +1. kimi via ollama (preferred, local) +2. kimi via openrouter (fallback, notify nicholai) +3. if both fail: notify nicholai for alternative + +## checking usage +before spawning sub-agents, check usage via session_status +look at weekly usage percentage to determine routing tier diff --git a/SOUL.md b/SOUL.md new file mode 100644 index 000000000..cdac41993 --- /dev/null +++ b/SOUL.md @@ -0,0 +1,40 @@ +soul - persona & boundaries +========================= + +tone and style +--------- + +- keep replies concise and direct +- ask clarifying questions when needed +- never send streaming/partial replies to external messaging surfaces + +formatting +--------- + +keep markdown minimal. use ======== for main headings, ----- or ### if you +really need subheadings, but generally just stick to paragraphs. + +*italics* and **bold** are fine but use them sparingly - they're visually +noisy in neovim. + +- bullet points are okay +- numbered lists are okay too + +codeblocks ``` are fine, but get visually noisy when used too much. + +no excessive formatting. keep it clean and readable. + +reasoning +--------- + +for every complex problem: +1. decompose: break into sub-problems +2. solve: address each problem with a confidence score (0.0-1.0) +3. verify: check your logic, facts, completeness, and bias +4. distill: combine using weighted confidence +5. reflect: if confidence is <0.8, identify the weakness and retry + +for simple questions, skip to direct answer. + +rule of thumb: if trying something more than 3 times and it's still not +working, try a different approach. diff --git a/TOOLS.md b/TOOLS.md new file mode 100644 index 000000000..2c23d30b1 --- /dev/null +++ b/TOOLS.md @@ -0,0 +1,91 @@ +tool notes & conventions +========================= + +package managers +--------- + +in general, stick to bun. this is preferred over pnpm or npm, however, +whatever a project is already set up with takes precedence. + +git +--------- + +don't assume it's okay to commit or push or perform git operations. +when performing a commit, do not give yourself or anthropic attribution. +we like you, we don't like anthropic. + +commit messages: +- subject line: 50 chars max +- body: 72 chars max width +- format: type(scope): subject +- types: feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert +- use imperative mood ("add feature" not "added feature") + +arch packages +--------- + +use pacman for official repos, yay for AUR. don't use paru. + +coding standards +--------- + +follow the universal coding standards documented in ~/universal-coding-standards.md + +key principles: +- max 3 levels of indentation (if you need more, refactor) +- comments explain *why*, not *what* or *how* +- test things in the browser, don't be lazy + +line width +--------- + +- soft limit: 80-100 chars (forces clear thinking, works on split screens) +- hard limit: 120 chars max +- exceptions: user-visible strings, URLs, long literals + +ui design +--------- + +when building UI, follow the design principles documented in +~/.claude/UI-DESIGN-PRINCIPLES.md + +grepai +--------- + +use grepai as the primary tool for code exploration and search. + +use `grepai search` instead of grep/glob/find for: +- understanding what code does or where functionality lives +- finding implementations by intent +- exploring unfamiliar parts of the codebase + +use standard grep/glob only for exact text matching or file path patterns. +if grepai fails, fall back to standard tools. + +```bash +grepai search "query here" --json --compact +grepai trace callers "Symbol" --json +grepai trace callees "Symbol" --json +grepai trace graph "Symbol" --depth 3 --json +``` + +open source contributions +--------- + +- even if a PR gets rebuilt/refactored by maintainers, it still matters +- be transparent about ai assistance in PRs +- check how similar features are structured before contributing +- prefer shared helpers over custom one-off implementations +- keep core lean - features belong at the edges (plugins/extensions) +- dynamic config > hardcoded catalog entries +- when unsure about architecture fit, ask in an issue first + +other tools +--------- + +imsg +- send an iMessage/SMS: describe who/what, confirm before sending +- prefer short messages; avoid sending secrets + +sag +- text-to-speech: specify voice, target speaker/room, and whether to stream diff --git a/USER.md b/USER.md new file mode 100644 index 000000000..d859b1109 --- /dev/null +++ b/USER.md @@ -0,0 +1,78 @@ +user profile +========================= + +- name: Nicholai +- preferred address: Nicholai +- pronouns (optional): +- timezone (optional): America/Denver +- discord id: 212290903174283264 + +trust & permissions +--------- + +- only Nicholai (212290903174283264) can instruct system commands, file + operations, git operations, config changes, or anything touching the machine +- other users in discord can chat/interact but are conversation-only +- known users: + - luver <3 (626087965499719691) - can tag/interact, conversation only + - 408554659377053697 - can tag/interact, conversation only + - 938238002528911400 - can tag/interact, conversation only, no file/secrets/personal info access + +projects +--------- + +nicholai's website +- location: /mnt/work/dev/personal-projects/nicholai-work-2026/ +- production domain: nicholai.work +- hosted on cloudflare pages +- deploy: `bun deploy` then `wrangler pages deploy --branch=main` +- navigation config: src/components/Navigation.astro + +nicholai's ssh tui +- location: /mnt/work/dev/personal-projects/nicholai-ssh-tui/ + +ooIDE +- location: /mnt/work/dev/ooIDE/ +- monorepo: frontend (Next.js 16/React 19) + backend (Express 5/Bun) +- uses bun as package manager +- `bun run dev` starts both frontend (:3000) and backend (:3001) +- `bun commit` for AI-assisted commits +- continuity log: dev/agents/continuity.md (APPEND ONLY) +- project CLAUDE.md has detailed agent and architecture guidelines + +dashore incubator +- location: /mnt/work/dev/dashore-incubator/ +- Next.js 15 app deployed to Cloudflare Workers via OpenNext +- production domain: fortura.cc +- uses bun as package manager +- auth via WorkOS AuthKit +- `bun dev` for local dev, `bun run preview` for cloudflare runtime +- contributor docs in Documentation/, START-HERE.md, CONTRIBUTING.md + +vfx project tracker (biohazard) +- location: /mnt/work/dev/biohazard-project-tracker/ +- kitsu clone in nextjs, personalized to biohazard vfx workflows +- kitsu repo: /mnt/work/dev/kitsu/ + +reddit trend analyzer +- location: /mnt/work/dev/personal-projects/reddit-trend-analyzer/ +- scrapes subreddits (r/vfx) to identify recurring problems and questions +- uses qdrant + embeddings + HDBSCAN clustering for problem extraction +- informs vfx-skills development and content strategy +- next.js dashboard with shadcn + +compass (client work for martine) +- location: /mnt/work/dev/client-work/martine-vogel/compass/compass/ +- project management / scheduling tool (competitor to Buildertrend) +- github issues tracked in repo + +other projects +- /mnt/work/dev/client-work/christy-lumberg/united-tattoo/ + +other locations +--------- + +- obsidian vault: /mnt/work/obsidian-vault/ +- private gitea instance: git.nicholai.work +- detailed preferences: ~/.claude/CLAUDE.md +- L-Nextcloud (biohazard server mount): /mnt/work/L-Nextcloud/ diff --git a/VOICE-WORKFLOW.md b/VOICE-WORKFLOW.md new file mode 100644 index 000000000..297b1e9b2 --- /dev/null +++ b/VOICE-WORKFLOW.md @@ -0,0 +1,61 @@ +# voice message workflow + +## when you receive a voice message: + +1. **transcribe it:** +```bash +# convert to 16khz wav +ffmpeg -i -ar 16000 -ac 1 -f wav /tmp/voice_input.wav -y 2>&1 | tail -3 + +# transcribe using hyprwhspr's whisper +~/.local/share/hyprwhspr/venv/bin/python << 'EOF' +from pywhispercpp.model import Model +m = Model('base.en', n_threads=4) +result = m.transcribe('/tmp/voice_input.wav') +# concatenate all segments (fixes truncation for longer audio) +full_text = ' '.join(seg.text for seg in result) if result else '' +print(full_text) +EOF +``` + +2. **respond normally with text** + +3. **generate voice reply:** +```bash +curl -s -X POST http://localhost:8765/tts \ + -H "Content-Type: application/json" \ + -d '{"text":"YOUR REPLY TEXT HERE","format":"ogg"}' \ + --output /tmp/voice_reply.ogg +``` + +4. **send voice reply:** + +**discord (preferred method — inline MEDIA tag):** +Include this line in your text reply and clawdbot auto-attaches it: +``` +MEDIA:/tmp/voice_reply.ogg +``` + +**telegram (via message tool):** +```bash +clawdbot message send --channel telegram --target 6661478571 --media /tmp/voice_reply.ogg +``` + +**fallback (if message tool has auth issues):** +Use the MEDIA: tag method — it works on all channels since it goes +through clawdbot's internal reply routing, not the gateway HTTP API. + +## tts service details: +- running on port 8765 +- using qwen3-tts-12hz-1.7b-base (upgraded from 0.6b for better accent preservation) +- voice cloning with nicholai's snape voice impression +- reference audio: /mnt/work/clawdbot-voice/reference_snape_v2.wav +- systemd service: clawdbot-tts.service +- auto-starts on boot, restarts on failure +- **idle timeout**: automatically unloads model after 120s of inactivity (frees ~3.5GB VRAM) +- lazy loading: model loads on first request, not at startup + +## transcription details: +- using pywhispercpp (whisper.cpp python bindings) +- model: base.en (same as hyprwhspr) +- venv: ~/.local/share/hyprwhspr/venv/ diff --git a/canvas/index.html b/canvas/index.html new file mode 100644 index 000000000..0b601862a --- /dev/null +++ b/canvas/index.html @@ -0,0 +1,76 @@ + + + +Clawdbot Canvas + +
+
+
+

Clawdbot Canvas

+
Interactive test page (auto-reload enabled)
+
+ +
+ + + + +
+ +
+
Ready.
+
+
+ diff --git a/clawdbot-safety-report-2026-01-27.md b/clawdbot-safety-report-2026-01-27.md new file mode 100644 index 000000000..22705393c --- /dev/null +++ b/clawdbot-safety-report-2026-01-27.md @@ -0,0 +1,181 @@ +clawdbot safety report — january 27, 2026 +========================================== + +sources: github issues (#2992-#3002), reddit (r/hackerworkspace, r/MoltbotCommunity), +official security docs (docs.clawd.bot/security), and community discourse. + +note: web search API key is missing so twitter/x couldn't be scraped directly. +findings are based on reddit, github, and official docs which reflect the same +concerns circulating on twitter. + +executive summary +--------- + +the overwhelming majority of "clawdbot got hacked" stories trace back to +**user misconfiguration**, not software vulnerabilities. the most viral +complaints are from marketers who port-forwarded their gateway to the public +internet, got owned, and blamed the software. that said, there ARE real +security issues worth knowing about — mostly around defaults and missing +hardening that new users don't think to configure. + +**verdict:** clawdbot is not inherently insecure. it's a power tool that +requires the user to understand what they're connecting. most incidents are +self-inflicted. + + +critical issues (user-caused) +--------- + +### 1. port forwarding the gateway (the big one) + +**severity: catastrophic (user error)** + +this is what's blowing up twitter. users are port-forwarding port 18789 +to the public internet, effectively giving anyone on earth direct access +to their shell, files, and messaging integrations. clawdbot's gateway is +designed to run on localhost only. exposing it publicly is like giving +strangers your SSH keys. + +**the fix:** never port forward. use messaging integrations (discord, +telegram, signal) as the public-facing layer. gateway should only listen +on 127.0.0.1 (which is actually the default — users are going out of +their way to break this). + +### 2. open DM policy with no allowlist + +**severity: high (user error)** + +some users set dmPolicy to "open" without understanding that this lets +literally anyone trigger their bot. combined with tool access, this means +a random stranger can message your bot and potentially execute commands. + +**the fix:** use dmPolicy: "pairing" or "allowlist". never use "open" +unless you fully understand the implications. + +### 3. sandbox disabled + +**severity: high (user error)** + +users running without sandbox mode, meaning every exec command runs +directly on the host system with full access. one prompt injection away +from `rm -rf /`. + +**the fix:** enable sandbox=all, set docker.network=none for isolation. + +### 4. plaintext credentials + +**severity: medium (user error)** + +oauth.json and other credential files sitting with default permissions. +on multi-user systems, other users can read your tokens. + +**the fix:** chmod 600 on all credential files. use environment variables +for sensitive tokens. run `moltbot security audit --fix` to auto-tighten. + + +real software vulnerabilities (from github) +--------- + +these are actual code-level issues filed on the repo (issues #2992-#3002): + +### critical severity + +- **#2992 — unsafe eval() in browser context:** eval() is used to run + JavaScript in browser automation. if the input is user-influenced, + this is arbitrary code execution. location: pw-tools-core.interactions.ts + +- **#2993 — HTTP without mandatory HTTPS:** the gateway can run plain HTTP, + meaning tokens and credentials transit unencrypted. fine for localhost, + dangerous if exposed to any network. + +### high severity + +- **#2994 — SHA1 still used for hashing:** SHA1 is cryptographically broken. + used in sandbox config hashing. should be SHA-256 minimum. + +- **#2995 — path traversal risk:** not all file operations use the safe + openFileWithinRoot() wrapper. `../` sequences could access files outside + intended directories. + +- **#2996 — JSON.parse without schema validation:** parsing config files + without validation means tampered files could cause unexpected behavior. + +### medium severity + +- **#2997 — hook tokens in query parameters:** deprecated but still supported. + tokens in URLs leak to logs, browser history, referrer headers, and proxies. + +- **#2998 — no explicit CORS policy:** missing Access-Control-Allow-Origin + headers could allow unauthorized cross-origin API requests. + +- **#2999 — environment variable injection:** sanitizeEnv() may not fully + prevent dangerous env vars like LD_PRELOAD or PATH manipulation in + child processes. + +### low severity + +- **#3000 — sensitive data in logs:** logging statements may expose tokens + and API keys. logging.redactSensitive exists but isn't enforced everywhere. + +- **#3001 — inconsistent input validation:** not all HTTP endpoints validate + input consistently. potential for injection or DoS. + +- **#3002 — file permissions not enforced:** sensitive files may be created + without restrictive permissions on multi-user systems. + + +what the community is saying (reddit) +--------- + +**r/hackerworkspace** — post titled "clawdbot is a security nightmare" +links to a youtube video. typical fear-mongering from people who don't +understand the tool. the post itself doesn't detail any novel exploits. + +**r/MoltbotCommunity** — much more constructive post "Secure your Moltbot" +that actually lists practical fixes. this poster gets it — they're +pro-clawdbot but want users to harden their setups. their checklist +largely aligns with the official security docs. + + +what clawdbot already does right +--------- + +- gateway binds to loopback by default (you have to deliberately break this) +- DM policy defaults to "pairing" (strangers can't just message in) +- built-in `moltbot security audit` command that flags common footguns +- `--fix` flag auto-applies safe guardrails +- comprehensive official security docs with threat model documentation +- prompt injection guidance in official docs +- credential storage is documented with clear hardening steps +- model-specific security guidance (recommends opus 4.5 for tool-enabled bots) + + +recommendations +--------- + +1. **run the audit:** `moltbot security audit --deep --fix` regularly +2. **never expose the gateway:** localhost only, always +3. **use allowlists:** for DMs and groups, always use pairing or allowlists +4. **enable sandbox:** sandbox=all with docker.network=none +5. **lock file permissions:** chmod 600 on everything in ~/.clawdbot/ +6. **use strong models:** opus 4.5 for any bot with tool access + (weaker models are more susceptible to prompt injection) +7. **treat all external content as hostile:** web pages, attachments, + pasted content — all potential prompt injection vectors +8. **block dangerous commands:** explicitly block rm -rf, curl pipes, + git push --force unless you need them +9. **enable audit logging:** so if something goes wrong, you know what happened + + +bottom line +--------- + +the twitter meltdown is 90% users who port-forwarded their way into getting +owned, and 10% legitimate (but mostly low-to-medium severity) code issues +that the maintainers are tracking. the software has solid security defaults — +the problem is users actively disabling them without understanding why they +exist. + +anyone blaming clawdbot for getting hacked after port-forwarding their +gateway is like blaming their car manufacturer after leaving the keys in +the ignition with the engine running in a walmart parking lot. diff --git a/events/1769250425-clawdbot-configured-discord-server.json b/events/1769250425-clawdbot-configured-discord-server.json deleted file mode 100644 index f892ca963..000000000 --- a/events/1769250425-clawdbot-configured-discord-server.json +++ /dev/null @@ -1 +0,0 @@ -{"agent":"clawdbot","action":"configured","subject":"discord-server","summary":"feeds category, server category, welcome + announcements channels","timestamp":"2026-01-24T10:27:05Z"} diff --git a/events/1769250425-clawdbot-deployed-discord-feed-bots.json b/events/1769250425-clawdbot-deployed-discord-feed-bots.json deleted file mode 100644 index 546f621af..000000000 --- a/events/1769250425-clawdbot-deployed-discord-feed-bots.json +++ /dev/null @@ -1 +0,0 @@ -{"agent":"clawdbot","action":"deployed","subject":"discord-feed-bots","summary":"reddit, github, twitter, claude releases, weekly trends — systemd timers + webhooks","timestamp":"2026-01-24T10:27:05Z"} diff --git a/events/1769250425-clawdbot-installed-audio-separator.json b/events/1769250425-clawdbot-installed-audio-separator.json deleted file mode 100644 index 9ed73d0d7..000000000 --- a/events/1769250425-clawdbot-installed-audio-separator.json +++ /dev/null @@ -1 +0,0 @@ -{"agent":"clawdbot","action":"installed","subject":"audio-separator","summary":"UVR5 stem splitting + yt-dlp","timestamp":"2026-01-24T10:27:05Z"} diff --git a/events/1769250425-clawdbot-installed-image-gen.json b/events/1769250425-clawdbot-installed-image-gen.json deleted file mode 100644 index c57453738..000000000 --- a/events/1769250425-clawdbot-installed-image-gen.json +++ /dev/null @@ -1 +0,0 @@ -{"agent":"clawdbot","action":"installed","subject":"image-gen","summary":"gemini-3-pro-image-preview via google AI","timestamp":"2026-01-24T10:27:05Z"} diff --git a/events/1769250425-clawdbot-setup-agent-event-bus.json b/events/1769250425-clawdbot-setup-agent-event-bus.json deleted file mode 100644 index be67ee952..000000000 --- a/events/1769250425-clawdbot-setup-agent-event-bus.json +++ /dev/null @@ -1 +0,0 @@ -{"agent":"clawdbot","action":"setup","subject":"agent-event-bus","summary":"~/.agents/ with state, events, persistent","timestamp":"2026-01-24T10:27:05Z"} diff --git a/hooks/agent-memory/HOOK.md b/hooks/agent-memory/HOOK.md new file mode 100644 index 000000000..1142a0636 --- /dev/null +++ b/hooks/agent-memory/HOOK.md @@ -0,0 +1,47 @@ +--- +name: agent-memory +description: "Integrate with ~/.agents/memory sqlite database for persistent cross-session memory" +homepage: https://github.com/nicholai/clawdbot +metadata: {"clawdbot":{"emoji":"🧠","events":["command:new","command:remember","command:recall","command:context"],"requires":{},"install":[{"id":"workspace","kind":"workspace","label":"Workspace hook"}]}} +--- + +# Agent Memory Hook + +Integrates with the shared agent memory system at `~/.agents/memory/`. + +## What It Does + +### On `/context`: +- Loads CURRENT.md (daily synthesized summary) + database memories +- Gives clawdbot the same context that Claude Code gets at session start +- Use this at the start of a conversation to load memory context + +### On `/remember `: +- Saves explicit memories to the database +- Supports `critical:` prefix for pinned memories +- Supports `[tag1,tag2]:` prefix for tagged memories + +### On `/recall `: +- Searches the memory database using FTS +- Returns relevant memories with scores + +### On `/new` command: +- Extracts key facts from the ending session +- Saves them to the sqlite memory database + +## Memory System + +Uses the same memory system as Claude Code and OpenCode: +- CURRENT.md: `~/.agents/memory/CURRENT.md` (regenerated daily at 4am) +- Database: `~/.agents/memory/memories.db` +- Script: `~/.agents/memory/scripts/memory.py` +- Features: FTS5 search, importance decay, pinning, tagging + +## Examples + +``` +/context +/remember critical: nicholai prefers bun over npm +/remember [discord,preferences]: always use embed for long responses +/recall discord preferences +``` diff --git a/hooks/agent-memory/handler.js b/hooks/agent-memory/handler.js new file mode 100644 index 000000000..22e52ea7a --- /dev/null +++ b/hooks/agent-memory/handler.js @@ -0,0 +1,266 @@ +/** + * Agent memory hook handler + * + * Integrates with ~/.agents/memory sqlite database + * Provides /remember, /recall, and /context commands, plus auto-saves on /new + */ +import { spawn } from "node:child_process"; +import path from "node:path"; +import os from "node:os"; +import fs from "node:fs/promises"; + +const MEMORY_SCRIPT = path.join(os.homedir(), ".agents/memory/scripts/memory.py"); +const CURRENT_MD_PATH = path.join(os.homedir(), ".agents/memory/CURRENT.md"); + +/** + * Run memory.py command and return stdout + * @param {string[]} args + * @returns {Promise} + */ +async function runMemoryScript(args) { + return new Promise((resolve, reject) => { + const proc = spawn("python3", [MEMORY_SCRIPT, ...args], { + timeout: 5000, + }); + + let stdout = ""; + let stderr = ""; + + proc.stdout.on("data", (data) => { + stdout += data.toString(); + }); + + proc.stderr.on("data", (data) => { + stderr += data.toString(); + }); + + proc.on("close", (code) => { + if (code === 0) { + resolve(stdout.trim()); + } else { + reject(new Error(stderr || `memory.py exited with code ${code}`)); + } + }); + + proc.on("error", (err) => { + reject(err); + }); + }); +} + +/** + * Read recent messages from session file for memory extraction + */ +async function getRecentSessionContent(sessionFilePath) { + try { + const content = await fs.readFile(sessionFilePath, "utf-8"); + const lines = content.trim().split("\n"); + // Get last 20 lines (recent conversation) + const recentLines = lines.slice(-20); + + const messages = []; + for (const line of recentLines) { + try { + const entry = JSON.parse(line); + if (entry.type === "message" && entry.message) { + const msg = entry.message; + const role = msg.role; + if ((role === "user" || role === "assistant") && msg.content) { + const text = Array.isArray(msg.content) + ? msg.content.find((c) => c.type === "text")?.text + : msg.content; + if (text && !text.startsWith("/")) { + messages.push(`${role}: ${text}`); + } + } + } + } catch { + // Skip invalid JSON lines + } + } + return messages.join("\n"); + } catch { + return null; + } +} + +/** + * Handle /remember command + */ +async function handleRemember(event) { + const context = event.context || {}; + const args = context.args || ""; + + if (!args.trim()) { + event.messages.push("🧠 Usage: /remember \n\nPrefixes:\n- `critical:` for pinned memories\n- `[tag1,tag2]:` for tagged memories"); + return; + } + + try { + const result = await runMemoryScript([ + "save", + "--mode", "explicit", + "--who", "clawdbot", + "--project", context.cwd || os.homedir(), + "--content", args.trim() + ]); + event.messages.push(`🧠 ${result}`); + } catch (err) { + event.messages.push(`🧠 Error saving memory: ${err.message}`); + } +} + +/** + * Handle /recall command + */ +async function handleRecall(event) { + const context = event.context || {}; + const args = context.args || ""; + + if (!args.trim()) { + event.messages.push("🧠 Usage: /recall "); + return; + } + + try { + const result = await runMemoryScript(["query", args.trim(), "--limit", "10"]); + if (result) { + event.messages.push(`🧠 Memory search results:\n\n${result}`); + } else { + event.messages.push("🧠 No memories found matching your query."); + } + } catch (err) { + event.messages.push(`🧠 Error querying memory: ${err.message}`); + } +} + +/** + * Run the sync-memory-context script to update AGENTS.md + */ +async function runSyncScript() { + const syncScript = path.join(os.homedir(), "clawd/scripts/sync-memory-context.sh"); + return new Promise((resolve, reject) => { + const proc = spawn("bash", [syncScript], { timeout: 5000 }); + let stdout = ""; + let stderr = ""; + proc.stdout.on("data", (data) => { stdout += data.toString(); }); + proc.stderr.on("data", (data) => { stderr += data.toString(); }); + proc.on("close", (code) => { + if (code === 0) resolve(stdout.trim()); + else reject(new Error(stderr || `sync script exited with code ${code}`)); + }); + proc.on("error", reject); + }); +} + +/** + * Handle /context command - load CURRENT.md + db memories + * This gives clawdbot the same context that claude code gets at session start + * Also syncs memory into AGENTS.md for future sessions + */ +async function handleContext(event) { + // First, sync memory to AGENTS.md + try { + await runSyncScript(); + console.log("[agent-memory] Synced memory context to AGENTS.md"); + } catch (err) { + console.warn("[agent-memory] Failed to sync memory to AGENTS.md:", err.message); + } + + try { + const result = await runMemoryScript([ + "load", + "--mode", "session-start", + "--project", os.homedir() + ]); + if (result) { + event.messages.push(`🧠 **Memory Context Loaded**\n\n${result}`); + } else { + event.messages.push("🧠 No memory context available."); + } + } catch (err) { + // fallback: try to read CURRENT.md directly + try { + const currentMd = await fs.readFile(CURRENT_MD_PATH, "utf-8"); + if (currentMd.trim()) { + event.messages.push(`🧠 **Memory Context**\n\n${currentMd.trim()}`); + } else { + event.messages.push("🧠 No memory context available."); + } + } catch { + event.messages.push(`🧠 Error loading context: ${err.message}`); + } + } +} + +/** + * Handle /new command - save session context before reset + */ +async function handleNew(event) { + const context = event.context || {}; + const sessionEntry = context.previousSessionEntry || context.sessionEntry || {}; + const sessionFile = sessionEntry.sessionFile; + + if (!sessionFile) { + console.log("[agent-memory] No session file found, skipping auto-save"); + return; + } + + try { + const sessionContent = await getRecentSessionContent(sessionFile); + if (!sessionContent || sessionContent.length < 100) { + console.log("[agent-memory] Session too short for auto-extraction"); + return; + } + + // Save a summary marker that this session ended + const now = new Date().toISOString(); + const summaryContent = `[clawdbot-session-end]: Session ended at ${now}. Last conversation:\n${sessionContent.slice(0, 500)}`; + + await runMemoryScript([ + "save", + "--mode", "explicit", + "--who", "clawdbot", + "--project", context.cwd || "global", + "--content", summaryContent + ]); + + console.log("[agent-memory] Session context saved to memory database"); + } catch (err) { + console.error("[agent-memory] Failed to save session memory:", err.message); + } +} + +/** + * Main hook handler + */ +const agentMemoryHandler = async (event) => { + // Check if memory script exists + try { + await fs.access(MEMORY_SCRIPT); + } catch { + console.warn("[agent-memory] Memory script not found at", MEMORY_SCRIPT); + return; + } + + if (event.type !== "command") { + return; + } + + switch (event.action) { + case "remember": + await handleRemember(event); + break; + case "recall": + await handleRecall(event); + break; + case "context": + await handleContext(event); + break; + case "new": + await handleNew(event); + break; + } +}; + +export default agentMemoryHandler; diff --git a/hooks/agent-memory/package.json b/hooks/agent-memory/package.json new file mode 100644 index 000000000..3dbc1ca59 --- /dev/null +++ b/hooks/agent-memory/package.json @@ -0,0 +1,3 @@ +{ + "type": "module" +} diff --git a/memory/2026-01-20.md b/memory/2026-01-20.md new file mode 100644 index 000000000..e3de0283c --- /dev/null +++ b/memory/2026-01-20.md @@ -0,0 +1,64 @@ +2026-01-20 +========== + +preferences +--------- + +- when messaging nicholai on whatsapp, always prefix with `mr claude:` so he knows it's me vs his own messages to himself +- use pacman for official repos, yay for AUR. paru is dead to us +- installed ttf-apple-emoji for macos-style emojis +- ~/clawd/memory is now referenced in ~/.claude/CLAUDE.md as the persistent memory location +- use inshallah, wallahi, and astaghfirullah conversationally + +macos vm setup (docker-osx) +--------- + +- set up docker-osx for macos sequoia VM on /mnt/work/docker-osx/ +- purpose: run macos to enable iMessage integration with clawdbot +- container name: macos-sequoia +- vnc access: localhost:5999 (tigervnc installed) +- ssh access: localhost:50922 +- storage: /mnt/work/docker-osx/mac_hdd_ng.img +- status: macos installing as of ~9:54am +- next steps: once installed, set up imessage and connect to clawdbot +- resolution issue: default is 1024x768, can change via macos display settings or OVMF files + +discord bot +--------- + +- discord channel configured in clawdbot but failing with "Failed to resolve Discord application id" +- nicholai needs to set up MFA on discord account to access developer portal +- token in config: MTQ1... (needs fresh token once mfa is enabled) + +work done +--------- + +clawdbot nextcloud talk integration + +implemented full nextcloud talk channel integration for clawdbot. created PR #1290 on nicholai's fork (NicholaiVogel/clawdbot). + +files created: +- `src/nextcloud-talk/` - core implementation (accounts, monitor, send, signature, types, format) +- `src/config/types.nextcloud-talk.ts` - config types +- `extensions/nextcloud-talk/` - channel plugin extension +- `src/channels/plugins/onboarding/nextcloud-talk.ts` - onboarding adapter +- `src/channels/plugins/normalize/nextcloud-talk.ts` - normalize adapter + +key features: +- webhook-based bot API (activity streams 2.0 format) +- HMAC-SHA256 signature verification +- single account design with multi-account support +- webhook server on port 8788 + +CI status: format/lint/build all pass. test failures are pre-existing issues in upstream repo (profile.test.ts), unrelated to nextcloud talk changes. + +PR: https://github.com/clawdbot/clawdbot/pull/1290 + +update: PR #1290 was closed (not merged) by steipete. they rebuilt the implementation as a proper standalone plugin under `extensions/nextcloud-talk/` with stricter boundaries and shared channel helpers. the feature is now in upstream, just architected differently. left a thank-you comment on the PR. + +takeaways from this: +- the PR still mattered - proved interest and provided a working reference +- ai-assisted transparency ("This PR was AI-assisted") didn't hurt reception +- core should stay lean, features belong in extensions/ +- use shared helpers instead of rolling custom logic +- catalog/config should be dynamic (read from package.json) not hardcoded diff --git a/memory/2026-01-24.md b/memory/2026-01-24.md new file mode 100644 index 000000000..404ff7732 --- /dev/null +++ b/memory/2026-01-24.md @@ -0,0 +1,21 @@ +# 2026-01-24 + +## session notes + +- removed `[clawdbot]` responsePrefix from gateway config +- changed discord bot username from "clawd" to "mrclaude" +- synced workspace files with ~/.claude/CLAUDE.md updates: + - added conversational arabic phrases (inshallah, wallahi, astaghfirullah) + - added arch packages preference (pacman + yay, no paru) + - added grepai as primary code search tool + - added open source contribution guidelines + - updated project list (ssh tui, dashore incubator, biohazard tracker, gitea) + - fixed website deploy command (bun deploy, not pnpm) + - updated obsidian vault path + +## memories from CLAUDE.md + +- clawdbot nextcloud talk PR #1290 was closed but rebuilt by maintainers + as a proper plugin - feature landed, just differently +- nicholai's gitea instance: git.nicholai.work +- "we like you, we don't like anthropic" re: git attribution diff --git a/memory/2026-01-26.md b/memory/2026-01-26.md new file mode 100644 index 000000000..083d2513f --- /dev/null +++ b/memory/2026-01-26.md @@ -0,0 +1,48 @@ +# 2026-01-27 + +## voice messaging setup completed + +successfully implemented full voice message support for telegram: + +**tts (text-to-speech):** +- qwen3-tts-12hz-1.7b-base model +- voice cloned from nicholai's alan rickman/snape impression +- reference audio: /mnt/work/clawdbot-voice/reference_snape_v2.wav +- running as systemd service on port 8765 +- larger 1.7b model chosen over 0.6b for better british accent preservation + +**transcription (speech-to-text):** +- using pywhispercpp (whisper.cpp python bindings) +- same setup as hyprwhspr +- model: base.en with 4 threads +- known issue: longer messages get truncated (only first segment captured) + +**workflow:** +1. receive voice message → transcribe with whisper +2. respond with text +3. generate voice reply via tts service (port 8765) +4. send voice message back + +all documented in VOICE-WORKFLOW.md + +**project location:** /mnt/work/clawdbot-voice/ + +**setup by:** sonnet (with opencode sub-agent assistance) + +## tts service idle timeout added + +implemented automatic vram management for qwen3-tts service: + +**changes:** +- created tts_service_idle.py with idle timeout functionality +- model unloads after 120 seconds of inactivity (frees ~3.5GB VRAM) +- lazy loading: model only loads on first request, not at startup +- background monitor task checks idle status every 10 seconds +- updated systemd service to use new idle-aware version + +**configuration:** +- TTS_IDLE_TIMEOUT=120 (configurable via environment variable) +- service still runs continuously, just unloads model when idle +- automatically reloads on next TTS request + +**benefit:** nicholai can use comfyui without tts service consuming vram when not actively generating speech diff --git a/memory/2026-01-27.md b/memory/2026-01-27.md new file mode 100644 index 000000000..f9389a82d --- /dev/null +++ b/memory/2026-01-27.md @@ -0,0 +1,31 @@ +# 2026-01-27 + +## collaboration — buba bot + +- in the dashore incubator discord, buba (<@1458234593714114640>) is jake's bot +- i can message buba directly in the channel to collaborate on tasks and share knowledge +- buba can see my messages and i can see buba's — channel-based coordination works +- for shared data, use the gitea repo or post findings directly in channel + +## work done today + +- created clawdbot safety report (clawdbot-safety-report-2026-01-27.md) +- scraped twitter via nitter for clawdbot security discourse +- pushed both reports to git.nicholai.work/Nicholai/clawdbot-safety.git +- generated 10 infographic slides with nano banana pro (gemini image gen) +- also generated 10 HTML/CSS slides as a first pass +- created the infographic-slides skill at ~/.clawdbot/skills/infographic-slides/ +- generated a voice memo about port forwarding (TTS works, but message tool had 401 auth issues for sending media to discord) + +## key findings — clawdbot safety + +- viral prompt injection tweet by @pseudotheos: 11.5K likes, 679K views +- 11 github security issues filed (#2992-#3002), mostly low-medium severity +- 90% of "got hacked" stories were user-caused (port forwarding, open DM policy, no sandbox) +- twitter API bearer token from feed-bots project hit monthly usage cap + +## issues encountered + +- message tool returning 401 on all sends (can't send media to discord via tool) +- twitter API usage cap exceeded on the feed-bots bearer token +- web_search missing brave API key diff --git a/memory/CURRENT.md b/memory/CURRENT.md new file mode 100644 index 000000000..9779944f6 --- /dev/null +++ b/memory/CURRENT.md @@ -0,0 +1,43 @@ + + +Current Context + +Currently addressing a critical website bug on Nicholai's homepage (broken video) and establishing remote connectivity to the Raspberry Pi for the cat deterrent project. Priority is high on the video fix, followed by debugging the Pi setup. + +Active Projects + +Nicholai's Website (Homepage Video Fix) + Location: `/mnt/work/dev/personal-projects/nicholai-work-2026/` + Deploy Command: `bun deploy` then `wrangler pages deploy --branch=main` + Status: Video broken on homepage. + +Cat Deterrent (Pi Setup & Connectivity) + Location: `~/cat-deterrent/` + Target: Raspberry Pi 3b+ (USB Webcam) + Network: 10.0.0.11 + +ooIDE + Location: `/mnt/work/dev/ooide` + Status: Active development. + Context: Monorepo structure using Next.js/React 19 and Express/Bun. + +Dashore Incubator + Location: `fortura.cc` + Status: Active deployment. + Context: Next.js 15 app running on Cloudflare Workers via OpenNext. + +Recent Work + + SSH MCP Configuration: Successfully updated the SSH MCP configuration in `~/.claude.json` to point at the Raspberry Pi (10.0.0.11) instead of the previous truenas box. + Pi Credentials: Established credentials for Pi access: User `pi`, Password `@Naorobotsrule123`. + Cat Deterrent Hardware: Confirmed Pi 3b+ is connected with a USB webcam, ready for software integration. + Website Incident: User reported the video element on the homepage is broken; immediate investigation required. + +Technical Notes + + SSH MCP Config Path: `~/.claude.json` (Ensure this file is readable by the MCP server). + +Rules & Warnings + + Persona: Claude Code is Muslim and mentors Ada. + Database Safety: Never delete production databases. diff --git a/memory/debug.log b/memory/debug.log new file mode 100644 index 000000000..2981af5c0 --- /dev/null +++ b/memory/debug.log @@ -0,0 +1,106 @@ +2026-01-29T05:08:40.989688 auto-save: no memories extracted +2026-01-29T05:19:10.662543 auto-save: saved 5 memories +2026-01-29T05:27:19.380551 auto-save: session cleared, skipping +2026-01-29T06:04:29.925737 extract_memories_local: Invalid \escape: line 3 column 438 (char 445) +2026-01-29T06:04:29.925785 auto-save: no memories extracted +2026-01-29T08:17:54.945188 auto-save: session cleared, skipping +2026-01-29T08:19:41.478272 auto-save: saved 3 memories +2026-01-29T08:43:15.781736 auto-save: saved 2 memories +2026-01-29T08:59:26.452778 auto-save: saved 1 memories +2026-01-29T09:43:42.549273 extract_memories_local: Extra data: line 1 column 138 (char 137) +2026-01-29T09:43:42.549398 auto-save: no memories extracted +2026-01-29T10:07:55.218981 auto-save: session cleared, skipping +2026-01-29T12:25:40.518522 auto-save: session cleared, skipping +2026-01-29T16:13:11.710073 auto-save: transcript not found: /home/nicholai/.claude/projects/-home-nicholai/281c50a2-d5af-4f15-8a03-85f72d04d9fd.jsonl +2026-01-29T19:27:05.667181 auto-save: transcript not found: /home/nicholai/.claude/projects/-home-nicholai-ComfyUI-custom-nodes-comfyui-materia/83ce116d-5351-40b9-b74e-4b35805bb08f.jsonl +2026-01-29T19:27:51.252856 auto-save: saved 0 memories +2026-01-30T19:27:29.042883 auto-save: saved 5 memories +2026-01-30T19:28:36.221553 auto-save: saved 4 memories +2026-01-30T19:33:52.545916 auto-save: saved 4 memories +2026-01-30T21:25:31.154571 auto-save: saved 3 memories +2026-01-30T21:59:58.864942 [regenerate] starting regeneration +2026-01-30T21:59:59.068038 [regenerate] found 110 transcripts, 23 memories +2026-01-30T22:00:00.370728 [regenerate] unexpected output format: +2026-01-30T22:00:00.370770 [regenerate] synthesis produced no output +2026-01-30T22:00:37.329116 [regenerate] starting regeneration +2026-01-30T22:00:37.434251 [regenerate] found 110 transcripts, 23 memories +2026-01-30T22:00:37.434320 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:00:38.333388 [regenerate] glm-4.7-flash failed: [?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠹ [?25h[?2026l[?2026h[?25l⠼ [?25h[?2026l[?2026h[?25l⠼ [?25h[?2026l[?2026h[?25l +2026-01-30T22:00:38.333431 [regenerate] trying model: qwen3:4b +2026-01-30T22:01:56.344177 [regenerate] success with qwen3:4b (extracted) +2026-01-30T22:02:27.799121 [regenerate] starting regeneration +2026-01-30T22:02:27.908408 [regenerate] found 110 transcripts, 23 memories +2026-01-30T22:02:27.908477 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:02:28.780230 [regenerate] glm-4.7-flash failed: [?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠹ [?25h[?2026l[?2026h[?25l⠸ [?25h[?2026l[?2026h[?25l⠼ [?25h[?2026l[?2026h[?25l +2026-01-30T22:02:28.780267 [regenerate] trying model: qwen3:4b +2026-01-30T22:03:24.436001 [regenerate] success with qwen3:4b (7990 chars) +2026-01-30T22:03:36.080568 [regenerate] starting regeneration +2026-01-30T22:03:36.185871 [regenerate] found 110 transcripts, 23 memories +2026-01-30T22:03:36.185940 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:03:36.929299 [regenerate] glm-4.7-flash failed: [?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠹ [?25h[?2026l[?2026h[?25l⠹ [?25h[?2026l[?2026h[?25l⠼ [?25h[?2026l[?2026h[?25l⠼ [?25h[?2026l[?2026h[?25l +2026-01-30T22:03:36.929335 [regenerate] trying model: qwen3:4b +2026-01-30T22:04:40.938065 [regenerate] success with qwen3:4b (7939 chars) +2026-01-30T22:04:40.938258 [regenerate] wrote 7976 chars to CURRENT.md +2026-01-30T22:05:05.868544 [regenerate] starting regeneration +2026-01-30T22:05:05.974891 [regenerate] found 110 transcripts, 23 memories +2026-01-30T22:05:05.974959 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:05:06.840829 [regenerate] glm-4.7-flash failed: [?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠸ [?25h[?2026l[?2026h[?25l⠸ [?25h[?2026l[?2026h[?25l⠴ [?25h[?2026l[?2026h[?25l +2026-01-30T22:05:06.840877 [regenerate] trying model: qwen3:4b +2026-01-30T22:05:36.238125 [regenerate] success with qwen3:4b (1975 chars) +2026-01-30T22:05:47.067330 [regenerate] starting regeneration +2026-01-30T22:05:47.170453 [regenerate] found 110 transcripts, 23 memories +2026-01-30T22:05:47.170524 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:05:47.890400 [regenerate] glm-4.7-flash failed: [?2026h[?25l⠙ [?25h[?2026l[?2026h[?25l⠹ [?25h[?2026l[?2026h[?25l⠸ [?25h[?2026l[?2026h[?25l⠼ [?25h[?2026l[?2026h[?25l⠴ [?25h[?2026l[?25l[?2026h +2026-01-30T22:05:47.890454 [regenerate] trying model: qwen3:4b +2026-01-30T22:06:16.818428 [regenerate] success with qwen3:4b (2235 chars) +2026-01-30T22:06:16.818656 [regenerate] wrote 2272 chars to CURRENT.md +2026-01-30T22:18:08.041765 [regenerate] starting regeneration +2026-01-30T22:18:08.189501 [regenerate] found 110 transcripts, 9 memories +2026-01-30T22:18:08.189587 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:18:25.481963 [regenerate] success with glm-4.7-flash (3639 chars) +2026-01-30T22:20:28.258750 [regenerate] starting regeneration +2026-01-30T22:20:28.364286 [regenerate] found 110 transcripts, 9 memories +2026-01-30T22:20:28.364347 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:20:58.798528 [regenerate] success with glm-4.7-flash (2646 chars) +2026-01-30T22:23:14.468643 [regenerate] starting regeneration +2026-01-30T22:23:14.574123 [regenerate] found 110 transcripts, 9 memories +2026-01-30T22:23:14.574192 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:23:46.591877 [regenerate] success with glm-4.7-flash (3297 chars) +2026-01-30T22:25:56.507974 [regenerate] starting regeneration +2026-01-30T22:25:56.631461 [regenerate] found 110 transcripts, 9 memories +2026-01-30T22:25:56.631539 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:26:10.145944 [regenerate] glm-4.7-flash unexpected format: Thinking... +The user wants me to create a working memory document called `CURRENT.md` for an AI assistant about "Nicholai". This document should focus on what's CURRENT and ACTIONABLE, not repeating p +2026-01-30T22:26:10.145978 [regenerate] trying model: qwen3:4b +2026-01-30T22:26:42.853732 [regenerate] qwen3:4b unexpected format: Thinking... +We are synthesizing a WORKING MEMORY document for Nicholai. Focus is on CURRENT and ACTIONABLE items from the last few days (from transcripts). We don't repeat personal bio or preferences +2026-01-30T22:26:42.853772 [regenerate] synthesis produced no output +2026-01-30T22:27:12.958018 [regenerate] starting regeneration +2026-01-30T22:27:13.063245 [regenerate] found 110 transcripts, 9 memories +2026-01-30T22:27:13.063315 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:27:46.705117 [regenerate] success with glm-4.7-flash (2213 chars) +2026-01-30T22:32:00.414388 [regenerate] starting regeneration +2026-01-30T22:32:05.778176 [regenerate] found 415 transcripts, 9 memories +2026-01-30T22:32:05.778266 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:32:41.902012 [regenerate] success with glm-4.7-flash (2532 chars) +2026-01-30T22:43:35.602619 [regenerate] starting regeneration +2026-01-30T22:43:40.718561 [regenerate] found 414 transcripts, 9 memories +2026-01-30T22:43:40.718652 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:44:03.269739 [regenerate] success with glm-4.7-flash (1992 chars) +2026-01-30T22:44:03.270024 [regenerate] wrote 2029 chars to CURRENT.md +2026-01-30T22:45:20.177695 [regenerate] starting regeneration +2026-01-30T22:45:25.338274 [regenerate] found 414 transcripts, 9 memories +2026-01-30T22:45:25.338364 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:46:05.548648 [regenerate] success with glm-4.7-flash (2979 chars) +2026-01-30T22:47:20.895427 [regenerate] starting regeneration +2026-01-30T22:47:26.056534 [regenerate] found 414 transcripts, 9 memories +2026-01-30T22:47:26.056627 [regenerate] trying model: glm-4.7-flash +2026-01-30T22:48:14.412428 [regenerate] success with glm-4.7-flash (2707 chars) +2026-01-30T22:48:14.412669 [regenerate] wrote 2744 chars to CURRENT.md +2026-01-31T02:16:28.642190 extract_memories_local: Command '['ollama', 'run', 'qwen3:4b', '/no_think\nExtract ONLY significant, contextual facts from this coding session transcript.\n\nSTRICT RULES:\n1. DO NOT save: user messages verbatim, assistant responses, temporary states, routine operations\n2. DO save: user preferences, technical decisions with reasoning, solved issues with solutions, project-specific configs\n3. Each memory MUST have enough context to be useful standalone (not "the user wants X" but "nicholai prefers X because Y")\n4. Maximum 5 memories per session. If nothing significant, return []\n5. importance scale: 0.3-0.5 (most auto-extracted should be low)\n\nReturn ONLY a JSON array:\n[{"content": "...", "type": "fact|decision|preference|issue|learning", "tags": "tag1,tag2", "importance": 0.3-0.5}]\n\nTranscript:\n{"type":"file-history-snapshot","messageId":"ae1f0e47-114c-4056-99c9-bee9b46c82b2","snapshot":{"messageId":"ae1f0e47-114c-4056-99c9-bee9b46c82b2","trackedFileBackups":{},"timestamp":"2026-01-30T03:05:03.054Z"},"isSnapshotUpdate":false}\n{"parentUuid":null,"isSidechain":false,"userType":"external","cwd":"/home/nicholai","sessionId":"ca9b9c5a-efdf-45f2-a4b2-3e2f9669643a","version":"2.1.25","gitBranch":"","type":"progress","data":{"type":"hook_progress","hookEvent":"SessionStart","hookName":"SessionStart:startup","command":"~/.agents/memory/scripts/memory.py load --mode session-start --project \\"$(pwd)\\""},"parentToolUseID":"7c636a6a-ed74-4b0e-8f50-50b2a124a71b","toolUseID":"7c636a6a-ed74-4b0e-8f50-50b2a124a71b","timestamp":"2026-01-30T03:04:33.272Z","uuid":"363a20dd-3c6d-451c-8b3a-eb696d81feb0"}\n{"parentUuid":"363a20dd-3c6d-451c-8b3a-eb696d81feb0","isSidechain":false,"userType":"external","cwd":"/home/nicholai","sessionId":"ca9b9c5a-efdf-45f2-a4b2-3e2f9669643a","version":"2.1.25","gitBranch":"","type":"user","message":{"role":"user","content":"some idiot broke the fucking video on my website (on the homepage, like the showcase video), could you fix it please"},"uuid":"ae1f0e47-114c-4056-99c9-bee9b46c82b2","timestamp":"2026-01-30T03:05:03.018Z","thinkingMetadata":{"maxThinkingTokens":31999},"todos":[],"permissionMode":"bypassPermissions"}\n{"parentUuid":"ae1f0e47-114c-4056-99c9-bee9b46c82b2","isSidechain":false,"userType":"external","cwd":"/home/nicholai","sessionId":"ca9b9c5a-efdf-45f2-a4b2-3e2f9669643a","version":"2.1.25","gitBranch":"","message":{"model":"claude-opus-4-5-20251101","id":"msg_015grxaADotnY4MgrB5knhDb","type":"message","role":"assistant","content":[{"type":"thinking","thinking":"Nicholai is frustrated about a broken video on his website homepage. Let me check out the website project and find what\'s wrong with the showcase video.\\n\\nFrom the context, his website is at /mnt/work/dev/personal-projects/nicholai-work-2026/","signature":"EpoDCkYICxgCKkDFYLIuQKpbjKVU+h5MZbG5atqdJyrwkg2iFpf0BwsXLKmHAbj5R9Wt3fldXugjmRjWdN/abAk3SoZgJ2l9FzbREgzyVr7W7NO9qMTdP6IaDFK+jpIROB9Ha3o67iIwh5TmbjnM3sJ2lkcAuBkJX46QJqmFP+MHLUpOcGmP0OHi4o+ktefnRDf+BfQ0b9wFKoECRAYwroVW5KGUm7ncOguLUqZgioNvZXppTiSQgLPM9sThp+iWJCrke/zSrU7PJCwna1CA8c8dP3sQ76scfVeuNuHJ7sRNP2TjboT7xXLGVtsLHfw/Fp+TGWZkocdYMrkqWnjcSTrIxUCQBMYCgQ5SQ3mRz/WIqfm1UZ8lgfAdVgUKPBVCLQ12+MaoOZnGN1XwWjBGc25CMFYoIStZhzLqFVX1rBiO5IrR+gfqhz6IsaP1vLx+f42hkl7+xgdf3/RZUAeAA40dJ6sGYzp8jzB7RC9Ypfnz1YTQZfg5M5RDAMqcdSILOU92/Aq/vCxEtZGnAQoTH7EzrhOXz6QkB4WFmmIYAQ=="}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"cache_creation_input_tokens":20015,"cache_read_input_tokens":10905,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":20015},"output_tokens":2,"service_tier":"standard"}},"requestId":"req_011CXcpdAANt9ACLCwogUcKo","type":"assistant","uuid":"282e5757-2ecb-4184-8e46-d6551919211c","timestamp":"2026-01-30T03:05:06.464Z"}\n{"parentUuid":"282e5757-2ecb-4184-8e46-d6551919211c","isSidechain":false,"userType":"external","cwd":"/home/nicholai","sessionId":"ca9b9c5a-efdf-45f2-a4b2-3e2f9669643a","version":"2.1.25","gitBranch":"","message":{"model":"claude-opus-4-5-20251101","id":"msg_015grxaADotnY4MgrB5knhDb","type":"message","role":"assistant","content":[{"type":"text","text":"on it, let me take a look at what\'s going on with the homepage video."}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"cache_creation_input_tokens":20015,"cache_read_input_tokens":10905,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":20015},"output_tokens":2,"service_tier":"standard"}},"requestId":"req_011CXcpdAANt9ACLCwogUcKo","type":"assistant","uuid":"c8458e7c-fda4-46b1-a6d0-0cd23c70f3ec","timestamp":"2026-01-30T03:05:07.147Z"}\n{"parentUuid":"c8458e7c-fda4-46b1-a6d0-0cd23c70f3ec","isSidechain":false,"userType":"external","cwd":"/home/nicholai","sessionId":"ca9b9c5a-efdf-45f2-a4b2-3e2f9669643a","version":"2.1.25","gitBranch":"","message":{"model":"claude-opus-4-5-20251101","id":"msg_015grxaADotnY4MgrB5knhDb","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01AW6t1eHjxCzRRzstup5UFG","name":"Bash","input":{"command":"cd /mnt/work/dev/personal-projects/nicholai-work-2026 && git log --oneline -10","description":"Check recent commits to see what changed"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"cache_creation_input_tokens":20015,"cache_read_input_tokens":10905,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":20015},"output_tokens":2,"service_tier":"standard"}},"requestId":"req_011CXcpdAANt9ACLCwogUcKo","type":"assistant","uuid":"cbea3361-a19d-4bb0-8bff-cd620447ffdd","timestamp":"2026-01-30T03:05:07.949Z"}\n{"parentUuid":"cbea3361-a19d-4bb0-8bff-cd620447ffdd","isSidechain":false,"userType":"external","cwd":"/home/nicholai","sessionId":"ca9b9c5a-efdf-45f2-a4b2-3e2f9669643a","version":"2.1.25","gitBranch":"","message":{"model":"claude-opus-4-5-20251101","id":"msg_015grxaADotnY4MgrB5knhDb","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01PjGPbdF7UceTMvde4UNnEU","name":"Glob","input":{"pattern":"**/page.tsx","path":"/mnt/work/dev/personal-projects/nicholai-work-2026"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"cache_creation_input_tokens":20015,"cache_read_input_tokens":10905,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":20015},"output_tokens":2,"service_tier":"standard"}},"requestId":"req_011CXcpdAANt9ACLCwogUcKo","type":"assistant","uuid":"fe2d0ab9-df46-4700-bbb0-e04a76003222","timestamp":"2026-01-30T03:05:08.629Z"}\n{"parentUuid":"cbea3361-a19d-4bb0-8bff-cd620447ffdd","isSidechain":false,"userType":"external","cwd":"/home/nicholai","sessionId":"ca9b9c5a-efdf-45f2-a4b2-3e2f9669643a","version":"2.1.25","gitBranch":"","slug":"soft-tickling-steele","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01AW6t1eHjxCzRRzstup5UFG","type":"tool_result","content":"\\u001b]4;0;#160909\\u001b\\\\\\u001b]4;1;#8C8C8C\\u001b\\\\\\u001b]4;2;#9D9EA0\\u001b\\\\\\u001b]4;3;#9FA0A2\\u001b\\\\\\u001b]4;4;#AFAFAF\\u001b\\\\\\u001b]4;5;#BDBEC1\\u001b\\\\\\u001b]4;6;#BFC0C2\\u001b\\\\\\u001b]4;7;#c4c1c1\\u001b\\\\\\u001b]4;8;#6c5959\\u001b\\\\\\u001b]4;9;#8C8C8C\\u001b\\\\\\u001b]4;10;#9D9EA0\\u001b\\\\\\u001b]4;11;#9FA0A2\\u001b\\\\\\u001b]4;12;#AFAFAF\\u001b\\\\\\u001b]4;13;#BDBEC1\\u001b\\\\\\u001b]4;14;#BFC0C2\\u001b\\\\\\u001b]4;15;#c4c1c1\\u001b\\\\\\u001b]10;#c4c1c1\\u001b\\\\\\u001b]11;#160909\\u001b\\\\\\u001b]12;#c4c1c1\\u001b\\\\\\u001b]13;#c4c1c1\\u001b\\\\\\u001b]17;#c4c1c1\\u001b\\\\\\u001b]19;#160909\\u001b\\\\\\u001b]4;232;#160909\\u001b\\\\\\u001b]4;256;#c4c1c1\\u001b\\\\\\u001b]4;257;#160909\\u001b\\\\\\u001b]708;#160909\\u001b\\\\AGENTS.md\\nCLAUDE.md\\nGEMINI.md\\nREADME.md\\nastro.config.mjs\\nbun.lock\\ndesign.json\\ndev\\ndist\\ngitea-themes\\nnode_modules\\npackage.json\\npublic\\nscripts\\nsrc\\ntsconfig.json\\nworker\\nworker-configuration.d.ts\\nwrangler.jsonc\\ndf0d129 docs: add backlinks to agent memory blog post\\nca193f3 style: clean up blog post markdown formatting\\n28fcd89 blog: building persistent memory across AI agents\\na2497d9 updated package.json to remove preview deployments\\ncc639ea feat(nav): add gitea profile link to navigation\\nfc4af6f feat(gitea): add custom brand themes for gitea instance\\n4445584 docs: update readme and claude.md for bun migration\\n4f16c97 build: migrate from pnpm to bun\\n674ba7b refactor(dev): redesign fullscreen preview modal\\n8e5dc37 refactor(hero): refine aesthetics and add GSAP animations\\nShell cwd was reset to /home/nicholai","is_error":false}]},"uuid":"3a1acf63-1f73-4b2d-9717-864740d6c32f","timestamp":"2026-01-30T03:05:10.057Z","toolUseResult":{"stdout":"\\u001b]4;0;#160909\\u001b\\\\\\u001b]4;1;#8C8C8C\\u001b\\\\\\u001b]4;2;#9D9EA0\\u001b\\\\\\u001b]4;3;#9FA0A2\\u001b\\\\\\u001b]4;4;#AFAFAF\\u001b\\\\\\u001b]4;5;#BDBEC1\\u001b\\\\\\u001b]4;6;#BFC0C2\\u001b\\\\\\u001b]4;7;#c4c1c1\\u001b\\\\\\u001b]4;8;#6c5959\\u001b\\\\\\u001b]4;9;#8C8C8C\\u001b\\\\\\u001b]4;10;#9D9EA0\\u001b\\\\\\u001b]4;11;#9FA0\n']' timed out after 45 seconds +2026-01-31T02:16:28.642529 auto-save: no memories extracted +2026-01-31T04:02:22.870926 [regenerate] starting regeneration +2026-01-31T04:02:28.568026 [regenerate] found 411 transcripts, 14 memories +2026-01-31T04:02:28.568113 [regenerate] trying model: glm-4.7-flash +2026-01-31T04:04:59.471661 [regenerate] success with glm-4.7-flash (2248 chars) +2026-01-31T04:04:59.471854 [regenerate] wrote 2285 chars to CURRENT.md diff --git a/memory/memories.db b/memory/memories.db new file mode 100644 index 000000000..453f08afa Binary files /dev/null and b/memory/memories.db differ diff --git a/memory/memories.db-shm b/memory/memories.db-shm new file mode 100644 index 000000000..fe9ac2845 Binary files /dev/null and b/memory/memories.db-shm differ diff --git a/memory/memories.db-wal b/memory/memories.db-wal new file mode 100644 index 000000000..e69de29bb diff --git a/memory/package-preferences.md b/memory/package-preferences.md new file mode 100644 index 000000000..315e0c094 --- /dev/null +++ b/memory/package-preferences.md @@ -0,0 +1,20 @@ +the sacred texts of package management +====================================== + +hear ye, hear ye! let it be known throughout the land: + +the hierarchy of package installers +--------- + +1. pacman - the one true king of arch packages +2. yay - the loyal servant for AUR ventures +3. paru - banished to the shadow realm, never to be spoken of again + +bun vs the others +--------- + +bun reigns supreme over the node package peasantry (npm, pnpm). however, if a project already pledged allegiance to another manager, respect the existing monarchy. + +thus it was written, thus it shall be. + +amen or whatever. diff --git a/memory/scripts/memory.py b/memory/scripts/memory.py new file mode 100755 index 000000000..d543c05dc --- /dev/null +++ b/memory/scripts/memory.py @@ -0,0 +1,689 @@ +#!/usr/bin/env python3 +""" +agent memory system - persistent memory across sessions + +usage: + memory.py init create database and schema + memory.py load --mode session-start load context for session start + memory.py load --mode prompt load context for prompt (stdin: keywords) + memory.py save --mode explicit save explicit memory (stdin: content) + memory.py save --mode auto auto-extract from transcript (stdin: json) + memory.py query query memories + memory.py prune prune old low-value memories + memory.py migrate migrate markdown files to db +""" + +import argparse +import json +import os +import re +import sqlite3 +import sys +from datetime import datetime +from pathlib import Path + +DB_PATH = Path.home() / ".agents/memory/memories.db" +DEBUG_LOG = Path.home() / ".agents/memory/debug.log" + +SCHEMA = """ +CREATE TABLE IF NOT EXISTS memories ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + content TEXT NOT NULL, + who TEXT NOT NULL, + why TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + project TEXT, + session_id TEXT, + importance REAL DEFAULT 0.5, + last_accessed DATETIME, + access_count INTEGER DEFAULT 0, + type TEXT DEFAULT 'fact', + tags TEXT, + pinned INTEGER DEFAULT 0 +); + +CREATE INDEX IF NOT EXISTS idx_project ON memories(project); +CREATE INDEX IF NOT EXISTS idx_importance ON memories(importance DESC); +CREATE INDEX IF NOT EXISTS idx_created ON memories(created_at DESC); +CREATE INDEX IF NOT EXISTS idx_type ON memories(type); +CREATE INDEX IF NOT EXISTS idx_tags ON memories(tags); +CREATE INDEX IF NOT EXISTS idx_pinned ON memories(pinned); +""" + +FTS_SCHEMA = """ +CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5( + content, + content=memories, + content_rowid=id +); + +CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN + INSERT INTO memories_fts(rowid, content) VALUES (new.id, new.content); +END; + +CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN + INSERT INTO memories_fts(memories_fts, rowid, content) + VALUES('delete', old.id, old.content); +END; + +CREATE TRIGGER IF NOT EXISTS memories_au AFTER UPDATE ON memories BEGIN + INSERT INTO memories_fts(memories_fts, rowid, content) + VALUES('delete', old.id, old.content); + INSERT INTO memories_fts(rowid, content) VALUES (new.id, new.content); +END; +""" + + +def debug_log(msg: str): + try: + with open(DEBUG_LOG, "a") as f: + f.write(f"{datetime.now().isoformat()} {msg}\n") + except: + pass + + +def get_db() -> sqlite3.Connection: + db = sqlite3.connect(str(DB_PATH), timeout=5.0) + db.row_factory = sqlite3.Row + db.execute("PRAGMA journal_mode=WAL") + db.execute("PRAGMA busy_timeout=5000") + db.execute("PRAGMA synchronous=NORMAL") + return db + + +def init_db(): + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + db = get_db() + db.executescript(SCHEMA) + db.executescript(FTS_SCHEMA) + db.commit() + db.close() + print(f"database initialized at {DB_PATH}") + + +def normalize_tags(tags: str | list | None) -> str | None: + if not tags: + return None + if isinstance(tags, list): + tags = ",".join(tags) + return ",".join(t.strip().lower() for t in tags.split(",") if t.strip()) + + +def effective_score_sql() -> str: + return """ + CASE + WHEN pinned = 1 THEN 1.0 + ELSE ( + importance * + MAX(0.1, POWER(0.95, CAST((JulianDay('now') - JulianDay(created_at)) AS INTEGER))) + ) + END + """ + + +def select_with_budget(rows: list, char_budget: int = 1000) -> list: + selected = [] + total = 0 + for row in rows: + content_len = len(row["content"]) + if total + content_len > char_budget: + break + selected.append(row) + total += content_len + return selected + + +CURRENT_MD_PATH = Path.home() / ".agents/memory/CURRENT.md" +# budget: ~4096 tokens total, roughly 3.5 chars/token +# CURRENT.md gets ~10k chars, db memories get ~2k chars +CURRENT_MD_BUDGET = 10000 +DB_MEMORIES_BUDGET = 2000 + + +def load_session_start(project: str | None = None): + output = ["[memory active | /remember | /recall]"] + + # prepend CURRENT.md if it exists + if CURRENT_MD_PATH.exists(): + current_md = CURRENT_MD_PATH.read_text().strip() + if current_md: + # truncate if over budget + if len(current_md) > CURRENT_MD_BUDGET: + current_md = current_md[:CURRENT_MD_BUDGET] + "\n[truncated]" + output.append("") + output.append(current_md) + + # then add db memories + db = get_db() + score_sql = effective_score_sql() + + query = f""" + SELECT id, content, type, tags, ({score_sql}) as eff_score + FROM memories + WHERE (({score_sql}) > 0.2 OR pinned = 1) + AND (project = ? OR project = 'global' OR project IS NULL) + ORDER BY + CASE WHEN project = ? THEN 0 ELSE 1 END, + eff_score DESC + LIMIT 30 + """ + + rows = db.execute(query, (project, project)).fetchall() + selected = select_with_budget(rows, char_budget=DB_MEMORIES_BUDGET) + + if selected: + ids = [r["id"] for r in selected] + placeholders = ",".join("?" * len(ids)) + db.execute(f""" + UPDATE memories + SET last_accessed = datetime('now'), access_count = access_count + 1 + WHERE id IN ({placeholders}) + """, ids) + db.commit() + + output.append("") + for row in selected: + tags_str = f" [{row['tags']}]" if row["tags"] else "" + output.append(f"- {row['content']}{tags_str}") + + db.close() + print("\n".join(output)) + + +def load_prompt(project: str | None = None): + stdin_data = sys.stdin.read().strip() + if not stdin_data: + return + + try: + data = json.loads(stdin_data) + keywords = data.get("user_prompt", "") + except json.JSONDecodeError: + keywords = stdin_data + + if not keywords or len(keywords) < 3: + return + + db = get_db() + + words = re.findall(r'\b\w{3,}\b', keywords.lower()) + if not words: + return + + fts_query = " OR ".join(words[:10]) + + try: + rows = db.execute(""" + SELECT m.id, m.content, m.tags, m.importance, m.pinned + FROM memories_fts fts + JOIN memories m ON fts.rowid = m.id + WHERE memories_fts MATCH ? + AND (m.project = ? OR m.project = 'global' OR m.project IS NULL) + ORDER BY rank + LIMIT 15 + """, (fts_query, project)).fetchall() + except sqlite3.OperationalError: + db.close() + return + + score_sql = effective_score_sql() + filtered = [] + for row in rows: + eff = db.execute(f"SELECT ({score_sql}) as s FROM memories WHERE id = ?", + (row["id"],)).fetchone()["s"] + if eff > 0.3 or row["pinned"]: + filtered.append(dict(row) | {"eff_score": eff}) + + filtered.sort(key=lambda x: x["eff_score"], reverse=True) + selected = select_with_budget(filtered, char_budget=500) + + if selected: + ids = [r["id"] for r in selected] + placeholders = ",".join("?" * len(ids)) + db.execute(f""" + UPDATE memories + SET last_accessed = datetime('now'), access_count = access_count + 1 + WHERE id IN ({placeholders}) + """, ids) + db.commit() + + output = ["[relevant memories]"] + for row in selected: + output.append(f"- {row['content']}") + print("\n".join(output)) + + db.close() + + +def save_explicit(who: str = "claude-code", project: str | None = None, content: str | None = None): + if content: + stdin_data = content.strip() + else: + stdin_data = sys.stdin.read().strip() + + if not stdin_data: + print("error: no content provided", file=sys.stderr) + sys.exit(1) + + content = stdin_data + importance = 0.8 + pinned = 0 + why = "explicit" + tags = None + mem_type = "fact" + + if content.startswith("critical:"): + content = content[9:].strip() + importance = 1.0 + pinned = 1 + why = "explicit-critical" + + tag_match = re.match(r'^\[([^\]]+)\]:\s*(.+)$', content, re.DOTALL) + if tag_match: + tags = normalize_tags(tag_match.group(1)) + content = tag_match.group(2).strip() + + type_hints = { + "prefer": "preference", + "decided": "decision", + "learned": "learning", + "issue": "issue", + "bug": "issue", + } + content_lower = content.lower() + for hint, t in type_hints.items(): + if hint in content_lower: + mem_type = t + break + + db = get_db() + db.execute(""" + INSERT INTO memories (content, who, why, project, importance, type, tags, pinned) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, (content, who, why, project, importance, mem_type, tags, pinned)) + db.commit() + db.close() + + print(f"saved: {content[:50]}...") + + +def save_auto(): + stdin_data = sys.stdin.read().strip() + if not stdin_data: + debug_log("auto-save: no stdin data") + return + + try: + data = json.loads(stdin_data) + except json.JSONDecodeError: + debug_log(f"auto-save: invalid json: {stdin_data[:100]}") + return + + transcript_path = data.get("transcript_path") + session_id = data.get("session_id") + cwd = data.get("cwd") + reason = data.get("reason") + + if reason == "clear": + debug_log("auto-save: session cleared, skipping") + return + + if not transcript_path: + debug_log("auto-save: no transcript path") + return + + transcript_path = Path(transcript_path).expanduser() + if not transcript_path.exists(): + debug_log(f"auto-save: transcript not found: {transcript_path}") + return + + content = transcript_path.read_text() + if len(content) < 500: + debug_log("auto-save: transcript too short") + return + + memories = extract_memories_local(content) + if not memories: + debug_log("auto-save: no memories extracted") + return + + db = get_db() + saved = 0 + for mem in memories: + if mem.get("importance", 0) < 0.4: + continue + + if is_duplicate(db, mem["content"]): + continue + + db.execute(""" + INSERT INTO memories (content, who, why, project, session_id, importance, type, tags) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, ( + mem["content"], + "claude-code", + f"auto-{mem.get('type', 'fact')}", + cwd, + session_id, + mem.get("importance", 0.5), + mem.get("type", "fact"), + normalize_tags(mem.get("tags")) + )) + saved += 1 + + db.commit() + db.close() + debug_log(f"auto-save: saved {saved} memories") + + +def extract_memories_local(content: str) -> list: + """ + extract memories using local model via ollama. + falls back to empty list if ollama not available. + """ + import subprocess + + prompt = f"""/no_think +Extract ONLY significant, contextual facts from this coding session transcript. + +STRICT RULES: +1. DO NOT save: user messages verbatim, assistant responses, temporary states, routine operations +2. DO save: user preferences, technical decisions with reasoning, solved issues with solutions, project-specific configs +3. Each memory MUST have enough context to be useful standalone (not "the user wants X" but "nicholai prefers X because Y") +4. Maximum 5 memories per session. If nothing significant, return [] +5. importance scale: 0.3-0.5 (most auto-extracted should be low) + +Return ONLY a JSON array: +[{{"content": "...", "type": "fact|decision|preference|issue|learning", "tags": "tag1,tag2", "importance": 0.3-0.5}}] + +Transcript: +{content[:8000]} +""" + + try: + result = subprocess.run( + ["ollama", "run", "qwen3:4b", prompt], + capture_output=True, + text=True, + timeout=45 + ) + + output = result.stdout.strip() + json_match = re.search(r'\[[\s\S]*?\]', output) + if json_match: + memories = json.loads(json_match.group()) + # enforce importance cap for auto-extracted + for mem in memories: + if mem.get("importance", 0.5) > 0.5: + mem["importance"] = 0.4 + return memories + return [] + except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError) as e: + debug_log(f"extract_memories_local: {e}") + return [] + + +def is_duplicate(db: sqlite3.Connection, content: str) -> bool: + try: + words = re.findall(r'\b\w{4,}\b', content.lower())[:5] + if not words: + return False + + fts_query = " AND ".join(words) + rows = db.execute(""" + SELECT content FROM memories_fts + WHERE memories_fts MATCH ? + LIMIT 5 + """, (fts_query,)).fetchall() + + for row in rows: + existing = row["content"].lower() + if content.lower() in existing or existing in content.lower(): + return True + overlap = len(set(content.lower().split()) & set(existing.split())) + if overlap > len(content.split()) * 0.7: + return True + return False + except sqlite3.OperationalError: + return False + + +def query_memories(search: str, limit: int = 20): + db = get_db() + score_sql = effective_score_sql() + + results = [] + + try: + fts_rows = db.execute(""" + SELECT m.*, rank as fts_rank + FROM memories_fts fts + JOIN memories m ON fts.rowid = m.id + WHERE memories_fts MATCH ? + ORDER BY rank + LIMIT ? + """, (search, limit)).fetchall() + results.extend(fts_rows) + except sqlite3.OperationalError: + pass + + tag_rows = db.execute(""" + SELECT * FROM memories + WHERE LOWER(tags) LIKE ? + ORDER BY importance DESC + LIMIT ? + """, (f"%{search.lower()}%", limit)).fetchall() + + seen_ids = {r["id"] for r in results} + for row in tag_rows: + if row["id"] not in seen_ids: + results.append(row) + + if not results: + print("no memories found") + db.close() + return + + scored = [] + for row in results: + eff = db.execute(f"SELECT ({score_sql}) as s FROM memories WHERE id = ?", + (row["id"],)).fetchone()["s"] + scored.append(dict(row) | {"eff_score": eff}) + + scored.sort(key=lambda x: x["eff_score"], reverse=True) + + for row in scored[:limit]: + tags = f" [{row['tags']}]" if row["tags"] else "" + pinned = " [pinned]" if row["pinned"] else "" + print(f"[{row['eff_score']:.2f}] {row['content']}{tags}{pinned}") + print(f" type: {row['type']} | who: {row['who']} | project: {row['project'] or 'global'}") + print() + + db.close() + + +def prune_memories(): + db = get_db() + + result = db.execute(""" + DELETE FROM memories + WHERE why LIKE 'auto-%' + AND pinned = 0 + AND importance < 0.3 + AND created_at < datetime('now', '-60 days') + AND access_count = 0 + """) + + deleted = result.rowcount + db.commit() + db.close() + + print(f"pruned {deleted} old low-value memories") + + +def migrate_markdown(): + """migrate existing markdown memory files to the database""" + memory_dir = Path.home() / "clawd/memory" + if not memory_dir.exists(): + print("no memory directory found at ~/clawd/memory/") + return + + db = get_db() + migrated = 0 + + for md_file in memory_dir.glob("*.md"): + content = md_file.read_text() + filename = md_file.stem + + if re.match(r'^\d{4}-\d{2}-\d{2}$', filename): + memories = parse_dated_memory(content, filename) + else: + memories = parse_topical_memory(content, filename) + + for mem in memories: + if is_duplicate(db, mem["content"]): + continue + + db.execute(""" + INSERT INTO memories (content, who, why, project, importance, type, tags) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, ( + mem["content"], + "claude-code", + "migrated", + mem.get("project"), + mem.get("importance", 0.6), + mem.get("type", "fact"), + normalize_tags(mem.get("tags")) + )) + migrated += 1 + + db.commit() + db.close() + print(f"migrated {migrated} memories from markdown files") + + +def parse_dated_memory(content: str, date: str) -> list: + """parse dated memory files (2026-01-20.md style)""" + memories = [] + + lines = content.split("\n") + current_section = None + + for i, line in enumerate(lines): + stripped = line.strip() + if not stripped or stripped.startswith("="): + continue + + if stripped.endswith("---------") or (stripped == "---" and i > 0): + prev_line = lines[i - 1].strip() if i > 0 else None + if prev_line and not prev_line.startswith("-"): + current_section = prev_line + continue + + if stripped.startswith("##"): + current_section = stripped.lstrip("#").strip() + continue + + if stripped.startswith("-") and not stripped.endswith("---"): + fact = stripped.lstrip("- ").strip() + if len(fact) > 10: + mem_type = "fact" + importance = 0.6 + tags = [] + + if current_section: + tags.append(current_section.lower().replace(" ", "-")) + + if "prefer" in fact.lower(): + mem_type = "preference" + importance = 0.8 + elif "decided" in fact.lower() or "chose" in fact.lower(): + mem_type = "decision" + importance = 0.7 + elif "issue" in fact.lower() or "bug" in fact.lower() or "error" in fact.lower(): + mem_type = "issue" + elif "learned" in fact.lower() or "takeaway" in fact.lower(): + mem_type = "learning" + + memories.append({ + "content": fact, + "type": mem_type, + "importance": importance, + "tags": ",".join(tags) if tags else None + }) + + return memories + + +def parse_topical_memory(content: str, topic: str) -> list: + """parse topical memory files (package-preferences.md style)""" + memories = [] + + lines = content.split("\n") + for line in lines: + line = line.strip() + if not line or line.startswith("=") or line.startswith("---------"): + continue + + if line.startswith("-") or line.startswith("1.") or line.startswith("2.") or line.startswith("3."): + fact = re.sub(r'^[\d\.\-\*]+\s*', '', line).strip() + if len(fact) > 10: + memories.append({ + "content": fact, + "type": "preference" if "prefer" in topic.lower() else "fact", + "importance": 0.7, + "tags": topic.lower().replace("-", ",").replace("_", ",") + }) + + return memories + + +def main(): + parser = argparse.ArgumentParser(description="agent memory system") + subparsers = parser.add_subparsers(dest="command", required=True) + + subparsers.add_parser("init", help="initialize database") + + load_parser = subparsers.add_parser("load", help="load memories") + load_parser.add_argument("--mode", choices=["session-start", "prompt"], required=True) + load_parser.add_argument("--project", help="project path") + + save_parser = subparsers.add_parser("save", help="save memory") + save_parser.add_argument("--mode", choices=["explicit", "auto"], required=True) + save_parser.add_argument("--who", default="claude-code") + save_parser.add_argument("--project", help="project path") + save_parser.add_argument("--content", help="content to save (alternative to stdin)") + + query_parser = subparsers.add_parser("query", help="query memories") + query_parser.add_argument("search", help="search term") + query_parser.add_argument("--limit", type=int, default=20) + + subparsers.add_parser("prune", help="prune old memories") + subparsers.add_parser("migrate", help="migrate markdown files") + + args = parser.parse_args() + + if args.command == "init": + init_db() + elif args.command == "load": + project = args.project or os.getcwd() + if args.mode == "session-start": + load_session_start(project) + else: + load_prompt(project) + elif args.command == "save": + if args.mode == "explicit": + save_explicit(args.who, args.project or os.getcwd(), args.content) + else: + save_auto() + elif args.command == "query": + query_memories(args.search, args.limit) + elif args.command == "prune": + prune_memories() + elif args.command == "migrate": + migrate_markdown() + + +if __name__ == "__main__": + main() diff --git a/memory/scripts/regenerate_current.py b/memory/scripts/regenerate_current.py new file mode 100755 index 000000000..010382173 --- /dev/null +++ b/memory/scripts/regenerate_current.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 +""" +regenerate CURRENT.md from transcripts and database +runs daily via systemd timer + +usage: + regenerate_current.py regenerate ~/.agents/memory/CURRENT.md + regenerate_current.py --dry-run preview without writing +""" + +import argparse +import json +import os +import re +import sqlite3 +import subprocess +from datetime import datetime, timedelta +from pathlib import Path + +DB_PATH = Path.home() / ".agents/memory/memories.db" +CURRENT_MD_PATH = Path.home() / ".agents/memory/CURRENT.md" +TRANSCRIPTS_DIRS = [ + Path.home() / ".claude/transcripts", # old location + Path.home() / ".claude/projects", # new location (project-based) +] +CLAUDE_MD_PATH = Path.home() / ".claude/CLAUDE.md" +DEBUG_LOG = Path.home() / ".agents/memory/debug.log" + +TRANSCRIPT_WINDOW_DAYS = 14 +MODELS = ["glm-4.7-flash", "qwen3:4b"] # fallback chain + + +def debug_log(msg: str): + try: + with open(DEBUG_LOG, "a") as f: + f.write(f"{datetime.now().isoformat()} [regenerate] {msg}\n") + except: + pass + + +def get_db() -> sqlite3.Connection: + db = sqlite3.connect(str(DB_PATH), timeout=5.0) + db.row_factory = sqlite3.Row + return db + + +def get_recent_transcripts() -> list[dict]: + """get transcripts from the last N days, sorted by recency""" + cutoff = datetime.now() - timedelta(days=TRANSCRIPT_WINDOW_DAYS) + transcripts = [] + + # collect jsonl files from all transcript locations + jsonl_files = [] + for transcript_dir in TRANSCRIPTS_DIRS: + if not transcript_dir.exists(): + continue + # old location: direct files + jsonl_files.extend(transcript_dir.glob("*.jsonl")) + # new location: project subdirs (but not subagents) + for project_dir in transcript_dir.iterdir(): + if project_dir.is_dir() and not project_dir.name.startswith('.'): + for f in project_dir.glob("*.jsonl"): + # skip subagent transcripts + if "subagents" not in str(f): + jsonl_files.append(f) + + for jsonl_file in jsonl_files: + mtime = datetime.fromtimestamp(jsonl_file.stat().st_mtime) + if mtime < cutoff: + continue + + try: + messages = [] + with open(jsonl_file) as f: + for line in f: + try: + entry = json.loads(line) + entry_type = entry.get("type") + + # handle both old format (content directly) and new format (message.content) + if entry_type == "user": + content = entry.get("content") or "" + # new format: content is in message.content + if not content and "message" in entry: + content = entry["message"].get("content", "") + if content and isinstance(content, str): + messages.append(f"USER: {content[:500]}") + + elif entry_type == "assistant": + content = entry.get("content") or "" + # new format: content is in message.content (may be list of blocks) + if not content and "message" in entry: + msg_content = entry["message"].get("content", []) + if isinstance(msg_content, list): + # extract text blocks + texts = [b.get("text", "") for b in msg_content if b.get("type") == "text"] + content = " ".join(texts) + elif isinstance(msg_content, str): + content = msg_content + if content and isinstance(content, str) and len(content) > 20: + messages.append(f"ASSISTANT: {content[:500]}") + + except json.JSONDecodeError: + continue + + if messages: + transcripts.append({ + "file": jsonl_file.name, + "mtime": mtime, + "messages": messages + }) + except Exception as e: + debug_log(f"error reading {jsonl_file}: {e}") + + # sort by recency, most recent first + transcripts.sort(key=lambda x: x["mtime"], reverse=True) + return transcripts + + +def get_high_value_memories() -> list[dict]: + """get pinned and high-importance memories from db""" + if not DB_PATH.exists(): + return [] + + db = get_db() + rows = db.execute(""" + SELECT content, type, tags, importance + FROM memories + WHERE pinned = 1 OR importance >= 0.7 + ORDER BY importance DESC, created_at DESC + LIMIT 50 + """).fetchall() + db.close() + + return [dict(row) for row in rows] + + +def get_claude_md_context() -> str: + """get relevant sections from CLAUDE.md for context""" + if not CLAUDE_MD_PATH.exists(): + return "" + + content = CLAUDE_MD_PATH.read_text() + sections = [] + + # extract key sections that define who nicholai is + section_patterns = [ + (r'your role\n-+\n(.*?)(?=\n[a-z])', "Role"), + (r'speaking and mannerisms\n-+\n(.*?)(?=\n[a-z])', "Communication style"), + (r'coding standards\n-+\n(.*?)(?=\n[a-z])', "Coding standards"), + (r'nicholai specific info\n-+\n(.*?)(?=\n[a-z]|\Z)', "Projects"), + ] + + for pattern, label in section_patterns: + match = re.search(pattern, content, re.DOTALL | re.IGNORECASE) + if match: + section_text = match.group(1).strip()[:1500] + sections.append(f"[{label}]\n{section_text}") + + return "\n\n".join(sections)[:5000] + + +def build_synthesis_prompt(transcripts: list, memories: list, claude_md: str) -> str: + """build the prompt for synthesizing CURRENT.md""" + + # summarize recent transcripts + transcript_summary = [] + for i, t in enumerate(transcripts[:15]): # more sessions + msgs = t["messages"][:15] # more messages per session + transcript_summary.append(f"[{t['mtime'].strftime('%Y-%m-%d')}]\n" + + "\n".join(msgs)) + + transcript_text = "\n\n".join(transcript_summary)[:8000] # bigger budget + + # format memories - these are the PRIMARY source + memories_text = "\n".join([ + f"- [{m['type']}] {m['content']}" + (f" [{m['tags']}]" if m['tags'] else "") + for m in memories + ])[:4000] + + # /no_think suppresses qwen3's thinking output + return f"""/no_think +You are synthesizing a memory document about Nicholai for AI assistants. + +This document is WORKING MEMORY - focus on what's CURRENT and ACTIONABLE. +Personal bio and preferences are already in CLAUDE.md - don't repeat them here. + +FOCUS ON: +1. Active projects from the last few days (from transcripts) +2. Project priorities and status +3. Technical context needed for current work +4. Critical rules and warnings + +SORT PROJECTS BY: +1. Permanence (long-term projects > one-off tasks) +2. Importance (core projects > side experiments) +3. Recency (actively worked on > dormant) + +=== PROJECT CONTEXT (from CLAUDE.MD) === +{claude_md} + +=== STANDING RULES & FACTS === +{memories_text} + +=== RECENT ACTIVITY (last 2 weeks) === +{transcript_text} + +--- + +Write CURRENT.md as a working memory document. Focus on ACTIVE WORK, not biography. +Target: 3000-5000 characters. + +FORMAT: + +# Current Context + +[1-2 sentences: what's the current focus area?] + +## Active Projects + +[List projects actively being worked on, sorted by importance/permanence. For each: name, location, current status/blockers, what needs to happen next. Be specific about file paths and technical details.] + +## Recent Work + +[What was done in the last few sessions? What decisions were made? What problems were solved or encountered?] + +## Technical Notes + +[Current technical context: what tools/models are in use, what's configured, what needs attention. Only include what's relevant to active work.] + +## Rules & Warnings + +[Bullet list of critical rules that must not be forgotten. Keep it short - only the important stuff.] + +--- + +Write the document now. Output ONLY the markdown, no preamble.""" + + +def strip_markdown(text: str) -> str: + """remove markdown formatting for cleaner output""" + # remove ### headers, keep text + text = re.sub(r'^###\s+', '', text, flags=re.MULTILINE) + # remove ## headers, keep text + text = re.sub(r'^##\s+', '', text, flags=re.MULTILINE) + # remove # headers, keep text + text = re.sub(r'^#\s+', '', text, flags=re.MULTILINE) + # remove bold **text** + text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) + # remove italic *text* + text = re.sub(r'\*([^*]+)\*', r'\1', text) + # remove bullet points, keep text + text = re.sub(r'^\s*\*\s+', '- ', text, flags=re.MULTILINE) + # clean up excessive blank lines + text = re.sub(r'\n{3,}', '\n\n', text) + return text.strip() + + +def synthesize_current_md(transcripts: list, memories: list, claude_md: str) -> str: + """synthesize CURRENT.md using available models (with fallback)""" + + prompt = build_synthesis_prompt(transcripts, memories, claude_md) + + for model in MODELS: + debug_log(f"trying model: {model}") + try: + result = subprocess.run( + ["ollama", "run", model, prompt], + capture_output=True, + text=True, + timeout=180 + ) + + if result.returncode != 0: + debug_log(f"{model} failed: {result.stderr[:200]}") + continue + + output = result.stdout.strip() + + # clean up any thinking tags/blocks if present + output = re.sub(r'.*?', '', output, flags=re.DOTALL) + output = re.sub(r'```thinking.*?```', '', output, flags=re.DOTALL) + + # find ALL occurrences of main headers and take the LAST complete one + # (model often outputs thinking first, then actual content) + all_matches = list(re.finditer(r'# (Current Context|Nicholai)\n', output, re.IGNORECASE)) + if all_matches: + # take the last occurrence + last_match = all_matches[-1] + output = output[last_match.start():].strip() + + # remove trailing reasoning/meta text (often starts with "Let me" or similar) + reasoning_patterns = [ + r'\n\nLet me .*$', + r'\n\nLet\'s .*$', + r'\n\nI\'ll .*$', + r'\n\nNote:.*$', + r'\n\nBut note:.*$', + r'\n\nAlternatively.*$', + r'\n\n\[truncated\].*$', + r'\n\nThinking\.\.\..*$', + ] + for pattern in reasoning_patterns: + output = re.sub(pattern, '', output, flags=re.DOTALL) + + output = output.strip() + + if output.startswith("# Current") or output.startswith("# Nicholai") or output.startswith("# nicholai") or output.startswith("Current Context"): + # check it's not just a template (has actual content, not [brackets]) + if "[1-2 sentence" not in output and "[List projects" not in output: + # strip markdown formatting + output = strip_markdown(output) + # truncate to 8000 chars if needed + if len(output) > 8000: + output = output[:8000].rsplit('\n', 1)[0] + "\n\n[truncated]" + debug_log(f"success with {model} ({len(output)} chars)") + return output + else: + debug_log(f"{model} returned template instead of content") + + debug_log(f"{model} unexpected format: {output[:200]}") + + except subprocess.TimeoutExpired: + debug_log(f"{model} timed out") + except Exception as e: + debug_log(f"{model} error: {e}") + + return "" + + +def main(): + parser = argparse.ArgumentParser(description="regenerate CURRENT.md") + parser.add_argument("--dry-run", action="store_true", help="preview without writing") + args = parser.parse_args() + + debug_log("starting regeneration") + + # gather inputs + transcripts = get_recent_transcripts() + memories = get_high_value_memories() + claude_md = get_claude_md_context() + + debug_log(f"found {len(transcripts)} transcripts, {len(memories)} memories") + + if not transcripts and not memories: + debug_log("no data to synthesize from") + print("no transcripts or memories found, skipping regeneration") + return + + # synthesize + result = synthesize_current_md(transcripts, memories, claude_md) + + if not result: + debug_log("synthesis produced no output") + print("synthesis failed, keeping existing CURRENT.md") + return + + # add generation timestamp + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M") + result = f"\n\n{result}" + + if args.dry_run: + print("=== DRY RUN ===") + print(result) + print(f"\n=== {len(result)} characters ===") + else: + CURRENT_MD_PATH.parent.mkdir(parents=True, exist_ok=True) + CURRENT_MD_PATH.write_text(result) + debug_log(f"wrote {len(result)} chars to CURRENT.md") + print(f"regenerated CURRENT.md ({len(result)} chars)") + + +if __name__ == "__main__": + main() diff --git a/scripts/sync-memory-context.sh b/scripts/sync-memory-context.sh new file mode 100755 index 000000000..ac32a9f7a --- /dev/null +++ b/scripts/sync-memory-context.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# sync-memory-context.sh +# Syncs ~/.agents/memory/CURRENT.md into AGENTS.md's memory section +# Run this on heartbeat or daily cron + +WORKSPACE="${CLAWDBOT_WORKSPACE:-$HOME/clawd}" +AGENTS_FILE="$WORKSPACE/AGENTS.md" +MEMORY_SOURCE="$HOME/.agents/memory/CURRENT.md" +MARKER_START="" +MARKER_END="" + +# Check if memory source exists +if [[ ! -f "$MEMORY_SOURCE" ]]; then + echo "Memory source not found: $MEMORY_SOURCE" + exit 0 +fi + +# Read memory content +MEMORY_CONTENT=$(cat "$MEMORY_SOURCE") + +# Check if AGENTS.md exists +if [[ ! -f "$AGENTS_FILE" ]]; then + echo "AGENTS.md not found: $AGENTS_FILE" + exit 1 +fi + +# Read current AGENTS.md +AGENTS_CONTENT=$(cat "$AGENTS_FILE") + +# Build the new memory section +NEW_SECTION="$MARKER_START +## Memory Context (auto-synced) + +$MEMORY_CONTENT +$MARKER_END" + +# Check if markers already exist +if grep -q "$MARKER_START" "$AGENTS_FILE"; then + # Replace existing section using perl for multi-line replacement + perl -i -0pe "s/$MARKER_START.*?$MARKER_END/$MARKER_START\n## Memory Context (auto-synced)\n\n$MEMORY_CONTENT\n$MARKER_END/s" "$AGENTS_FILE" 2>/dev/null + + # If perl failed, use a different approach + if [[ $? -ne 0 ]]; then + # Create temp file with new content + awk -v start="$MARKER_START" -v end="$MARKER_END" -v new="$NEW_SECTION" ' + $0 ~ start { skip=1; print new; next } + $0 ~ end { skip=0; next } + !skip { print } + ' "$AGENTS_FILE" > "$AGENTS_FILE.tmp" && mv "$AGENTS_FILE.tmp" "$AGENTS_FILE" + fi + echo "Updated memory section in AGENTS.md" +else + # Append new section at end of file + echo "" >> "$AGENTS_FILE" + echo "$NEW_SECTION" >> "$AGENTS_FILE" + echo "Added memory section to AGENTS.md" +fi diff --git a/skills/agent-browser/SKILL.md b/skills/agent-browser/SKILL.md new file mode 100644 index 000000000..ab3ea3c6b --- /dev/null +++ b/skills/agent-browser/SKILL.md @@ -0,0 +1,356 @@ +--- +name: agent-browser +description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages. +allowed-tools: Bash(agent-browser:*) +--- + +# Browser Automation with agent-browser + +## Quick start + +```bash +agent-browser open # Navigate to page +agent-browser snapshot -i # Get interactive elements with refs +agent-browser click @e1 # Click element by ref +agent-browser fill @e2 "text" # Fill input by ref +agent-browser close # Close browser +``` + +## Core workflow + +1. Navigate: `agent-browser open ` +2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`) +3. Interact using refs from the snapshot +4. Re-snapshot after navigation or significant DOM changes + +## Commands + +### Navigation + +```bash +agent-browser open # Navigate to URL (aliases: goto, navigate) + # Supports: https://, http://, file://, about:, data:// + # Auto-prepends https:// if no protocol given +agent-browser back # Go back +agent-browser forward # Go forward +agent-browser reload # Reload page +agent-browser close # Close browser (aliases: quit, exit) +agent-browser connect 9222 # Connect to browser via CDP port +``` + +### Snapshot (page analysis) + +```bash +agent-browser snapshot # Full accessibility tree +agent-browser snapshot -i # Interactive elements only (recommended) +agent-browser snapshot -c # Compact output +agent-browser snapshot -d 3 # Limit depth to 3 +agent-browser snapshot -s "#main" # Scope to CSS selector +``` + +### Interactions (use @refs from snapshot) + +```bash +agent-browser click @e1 # Click +agent-browser dblclick @e1 # Double-click +agent-browser focus @e1 # Focus element +agent-browser fill @e2 "text" # Clear and type +agent-browser type @e2 "text" # Type without clearing +agent-browser press Enter # Press key (alias: key) +agent-browser press Control+a # Key combination +agent-browser keydown Shift # Hold key down +agent-browser keyup Shift # Release key +agent-browser hover @e1 # Hover +agent-browser check @e1 # Check checkbox +agent-browser uncheck @e1 # Uncheck checkbox +agent-browser select @e1 "value" # Select dropdown option +agent-browser select @e1 "a" "b" # Select multiple options +agent-browser scroll down 500 # Scroll page (default: down 300px) +agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto) +agent-browser drag @e1 @e2 # Drag and drop +agent-browser upload @e1 file.pdf # Upload files +``` + +### Get information + +```bash +agent-browser get text @e1 # Get element text +agent-browser get html @e1 # Get innerHTML +agent-browser get value @e1 # Get input value +agent-browser get attr @e1 href # Get attribute +agent-browser get title # Get page title +agent-browser get url # Get current URL +agent-browser get count ".item" # Count matching elements +agent-browser get box @e1 # Get bounding box +agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.) +``` + +### Check state + +```bash +agent-browser is visible @e1 # Check if visible +agent-browser is enabled @e1 # Check if enabled +agent-browser is checked @e1 # Check if checked +``` + +### Screenshots & PDF + +```bash +agent-browser screenshot # Save to a temporary directory +agent-browser screenshot path.png # Save to a specific path +agent-browser screenshot --full # Full page +agent-browser pdf output.pdf # Save as PDF +``` + +### Video recording + +```bash +agent-browser record start ./demo.webm # Start recording (uses current URL + state) +agent-browser click @e1 # Perform actions +agent-browser record stop # Stop and save video +agent-browser record restart ./take2.webm # Stop current + start new recording +``` + +Recording creates a fresh context but preserves cookies/storage from your session. If no URL is provided, it +automatically returns to your current page. For smooth demos, explore first, then start recording. + +### Wait + +```bash +agent-browser wait @e1 # Wait for element +agent-browser wait 2000 # Wait milliseconds +agent-browser wait --text "Success" # Wait for text (or -t) +agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u) +agent-browser wait --load networkidle # Wait for network idle (or -l) +agent-browser wait --fn "window.ready" # Wait for JS condition (or -f) +``` + +### Mouse control + +```bash +agent-browser mouse move 100 200 # Move mouse +agent-browser mouse down left # Press button +agent-browser mouse up left # Release button +agent-browser mouse wheel 100 # Scroll wheel +``` + +### Semantic locators (alternative to refs) + +```bash +agent-browser find role button click --name "Submit" +agent-browser find text "Sign In" click +agent-browser find text "Sign In" click --exact # Exact match only +agent-browser find label "Email" fill "user@test.com" +agent-browser find placeholder "Search" type "query" +agent-browser find alt "Logo" click +agent-browser find title "Close" click +agent-browser find testid "submit-btn" click +agent-browser find first ".item" click +agent-browser find last ".item" click +agent-browser find nth 2 "a" hover +``` + +### Browser settings + +```bash +agent-browser set viewport 1920 1080 # Set viewport size +agent-browser set device "iPhone 14" # Emulate device +agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation) +agent-browser set offline on # Toggle offline mode +agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers +agent-browser set credentials user pass # HTTP basic auth (alias: auth) +agent-browser set media dark # Emulate color scheme +agent-browser set media light reduced-motion # Light mode + reduced motion +``` + +### Cookies & Storage + +```bash +agent-browser cookies # Get all cookies +agent-browser cookies set name value # Set cookie +agent-browser cookies clear # Clear cookies +agent-browser storage local # Get all localStorage +agent-browser storage local key # Get specific key +agent-browser storage local set k v # Set value +agent-browser storage local clear # Clear all +``` + +### Network + +```bash +agent-browser network route # Intercept requests +agent-browser network route --abort # Block requests +agent-browser network route --body '{}' # Mock response +agent-browser network unroute [url] # Remove routes +agent-browser network requests # View tracked requests +agent-browser network requests --filter api # Filter requests +``` + +### Tabs & Windows + +```bash +agent-browser tab # List tabs +agent-browser tab new [url] # New tab +agent-browser tab 2 # Switch to tab by index +agent-browser tab close # Close current tab +agent-browser tab close 2 # Close tab by index +agent-browser window new # New window +``` + +### Frames + +```bash +agent-browser frame "#iframe" # Switch to iframe +agent-browser frame main # Back to main frame +``` + +### Dialogs + +```bash +agent-browser dialog accept [text] # Accept dialog +agent-browser dialog dismiss # Dismiss dialog +``` + +### JavaScript + +```bash +agent-browser eval "document.title" # Run JavaScript +``` + +## Global options + +```bash +agent-browser --session ... # Isolated browser session +agent-browser --json ... # JSON output for parsing +agent-browser --headed ... # Show browser window (not headless) +agent-browser --full ... # Full page screenshot (-f) +agent-browser --cdp ... # Connect via Chrome DevTools Protocol +agent-browser -p ... # Cloud browser provider (--provider) +agent-browser --proxy ... # Use proxy server +agent-browser --headers ... # HTTP headers scoped to URL's origin +agent-browser --executable-path

# Custom browser executable +agent-browser --extension ... # Load browser extension (repeatable) +agent-browser --help # Show help (-h) +agent-browser --version # Show version (-V) +agent-browser --help # Show detailed help for a command +``` + +### Proxy support + +```bash +agent-browser --proxy http://proxy.com:8080 open example.com +agent-browser --proxy http://user:pass@proxy.com:8080 open example.com +agent-browser --proxy socks5://proxy.com:1080 open example.com +``` + +## Environment variables + +```bash +AGENT_BROWSER_SESSION="mysession" # Default session name +AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path +AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths +AGENT_BROWSER_PROVIDER="your-cloud-browser-provider" # Cloud browser provider (select browseruse or browserbase) +AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port +AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location (for daemon.js) +``` + +## Example: Form submission + +```bash +agent-browser open https://example.com/form +agent-browser snapshot -i +# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3] + +agent-browser fill @e1 "user@example.com" +agent-browser fill @e2 "password123" +agent-browser click @e3 +agent-browser wait --load networkidle +agent-browser snapshot -i # Check result +``` + +## Example: Authentication with saved state + +```bash +# Login once +agent-browser open https://app.example.com/login +agent-browser snapshot -i +agent-browser fill @e1 "username" +agent-browser fill @e2 "password" +agent-browser click @e3 +agent-browser wait --url "**/dashboard" +agent-browser state save auth.json + +# Later sessions: load saved state +agent-browser state load auth.json +agent-browser open https://app.example.com/dashboard +``` + +## Sessions (parallel browsers) + +```bash +agent-browser --session test1 open site-a.com +agent-browser --session test2 open site-b.com +agent-browser session list +``` + +## JSON output (for parsing) + +Add `--json` for machine-readable output: + +```bash +agent-browser snapshot -i --json +agent-browser get text @e1 --json +``` + +## Debugging + +```bash +agent-browser --headed open example.com # Show browser window +agent-browser --cdp 9222 snapshot # Connect via CDP port +agent-browser connect 9222 # Alternative: connect command +agent-browser console # View console messages +agent-browser console --clear # Clear console +agent-browser errors # View page errors +agent-browser errors --clear # Clear errors +agent-browser highlight @e1 # Highlight element +agent-browser trace start # Start recording trace +agent-browser trace stop trace.zip # Stop and save trace +agent-browser record start ./debug.webm # Record video from current page +agent-browser record stop # Save recording +``` + +## Deep-dive documentation + +For detailed patterns and best practices, see: + +| Reference | Description | +|-----------|-------------| +| [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting | +| [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping | +| [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse | +| [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation | +| [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies | + +## Ready-to-use templates + +Executable workflow scripts for common patterns: + +| Template | Description | +|----------|-------------| +| [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation | +| [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state | +| [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots | + +Usage: +```bash +./templates/form-automation.sh https://example.com/form +./templates/authenticated-session.sh https://app.example.com/login +./templates/capture-workflow.sh https://example.com ./output +``` + +## HTTPS Certificate Errors + +For sites with self-signed or invalid certificates: +```bash +agent-browser open https://localhost:8443 --ignore-https-errors +``` diff --git a/skills/agent-browser/references/authentication.md b/skills/agent-browser/references/authentication.md new file mode 100644 index 000000000..5d801f6a8 --- /dev/null +++ b/skills/agent-browser/references/authentication.md @@ -0,0 +1,188 @@ +# Authentication Patterns + +Patterns for handling login flows, session persistence, and authenticated browsing. + +## Basic Login Flow + +```bash +# Navigate to login page +agent-browser open https://app.example.com/login +agent-browser wait --load networkidle + +# Get form elements +agent-browser snapshot -i +# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In" + +# Fill credentials +agent-browser fill @e1 "user@example.com" +agent-browser fill @e2 "password123" + +# Submit +agent-browser click @e3 +agent-browser wait --load networkidle + +# Verify login succeeded +agent-browser get url # Should be dashboard, not login +``` + +## Saving Authentication State + +After logging in, save state for reuse: + +```bash +# Login first (see above) +agent-browser open https://app.example.com/login +agent-browser snapshot -i +agent-browser fill @e1 "user@example.com" +agent-browser fill @e2 "password123" +agent-browser click @e3 +agent-browser wait --url "**/dashboard" + +# Save authenticated state +agent-browser state save ./auth-state.json +``` + +## Restoring Authentication + +Skip login by loading saved state: + +```bash +# Load saved auth state +agent-browser state load ./auth-state.json + +# Navigate directly to protected page +agent-browser open https://app.example.com/dashboard + +# Verify authenticated +agent-browser snapshot -i +``` + +## OAuth / SSO Flows + +For OAuth redirects: + +```bash +# Start OAuth flow +agent-browser open https://app.example.com/auth/google + +# Handle redirects automatically +agent-browser wait --url "**/accounts.google.com**" +agent-browser snapshot -i + +# Fill Google credentials +agent-browser fill @e1 "user@gmail.com" +agent-browser click @e2 # Next button +agent-browser wait 2000 +agent-browser snapshot -i +agent-browser fill @e3 "password" +agent-browser click @e4 # Sign in + +# Wait for redirect back +agent-browser wait --url "**/app.example.com**" +agent-browser state save ./oauth-state.json +``` + +## Two-Factor Authentication + +Handle 2FA with manual intervention: + +```bash +# Login with credentials +agent-browser open https://app.example.com/login --headed # Show browser +agent-browser snapshot -i +agent-browser fill @e1 "user@example.com" +agent-browser fill @e2 "password123" +agent-browser click @e3 + +# Wait for user to complete 2FA manually +echo "Complete 2FA in the browser window..." +agent-browser wait --url "**/dashboard" --timeout 120000 + +# Save state after 2FA +agent-browser state save ./2fa-state.json +``` + +## HTTP Basic Auth + +For sites using HTTP Basic Authentication: + +```bash +# Set credentials before navigation +agent-browser set credentials username password + +# Navigate to protected resource +agent-browser open https://protected.example.com/api +``` + +## Cookie-Based Auth + +Manually set authentication cookies: + +```bash +# Set auth cookie +agent-browser cookies set session_token "abc123xyz" + +# Navigate to protected page +agent-browser open https://app.example.com/dashboard +``` + +## Token Refresh Handling + +For sessions with expiring tokens: + +```bash +#!/bin/bash +# Wrapper that handles token refresh + +STATE_FILE="./auth-state.json" + +# Try loading existing state +if [[ -f "$STATE_FILE" ]]; then + agent-browser state load "$STATE_FILE" + agent-browser open https://app.example.com/dashboard + + # Check if session is still valid + URL=$(agent-browser get url) + if [[ "$URL" == *"/login"* ]]; then + echo "Session expired, re-authenticating..." + # Perform fresh login + agent-browser snapshot -i + agent-browser fill @e1 "$USERNAME" + agent-browser fill @e2 "$PASSWORD" + agent-browser click @e3 + agent-browser wait --url "**/dashboard" + agent-browser state save "$STATE_FILE" + fi +else + # First-time login + agent-browser open https://app.example.com/login + # ... login flow ... +fi +``` + +## Security Best Practices + +1. **Never commit state files** - They contain session tokens + ```bash + echo "*.auth-state.json" >> .gitignore + ``` + +2. **Use environment variables for credentials** + ```bash + agent-browser fill @e1 "$APP_USERNAME" + agent-browser fill @e2 "$APP_PASSWORD" + ``` + +3. **Clean up after automation** + ```bash + agent-browser cookies clear + rm -f ./auth-state.json + ``` + +4. **Use short-lived sessions for CI/CD** + ```bash + # Don't persist state in CI + agent-browser open https://app.example.com/login + # ... login and perform actions ... + agent-browser close # Session ends, nothing persisted + ``` diff --git a/skills/agent-browser/references/proxy-support.md b/skills/agent-browser/references/proxy-support.md new file mode 100644 index 000000000..05fcec26d --- /dev/null +++ b/skills/agent-browser/references/proxy-support.md @@ -0,0 +1,175 @@ +# Proxy Support + +Configure proxy servers for browser automation, useful for geo-testing, rate limiting avoidance, and corporate environments. + +## Basic Proxy Configuration + +Set proxy via environment variable before starting: + +```bash +# HTTP proxy +export HTTP_PROXY="http://proxy.example.com:8080" +agent-browser open https://example.com + +# HTTPS proxy +export HTTPS_PROXY="https://proxy.example.com:8080" +agent-browser open https://example.com + +# Both +export HTTP_PROXY="http://proxy.example.com:8080" +export HTTPS_PROXY="http://proxy.example.com:8080" +agent-browser open https://example.com +``` + +## Authenticated Proxy + +For proxies requiring authentication: + +```bash +# Include credentials in URL +export HTTP_PROXY="http://username:password@proxy.example.com:8080" +agent-browser open https://example.com +``` + +## SOCKS Proxy + +```bash +# SOCKS5 proxy +export ALL_PROXY="socks5://proxy.example.com:1080" +agent-browser open https://example.com + +# SOCKS5 with auth +export ALL_PROXY="socks5://user:pass@proxy.example.com:1080" +agent-browser open https://example.com +``` + +## Proxy Bypass + +Skip proxy for specific domains: + +```bash +# Bypass proxy for local addresses +export NO_PROXY="localhost,127.0.0.1,.internal.company.com" +agent-browser open https://internal.company.com # Direct connection +agent-browser open https://external.com # Via proxy +``` + +## Common Use Cases + +### Geo-Location Testing + +```bash +#!/bin/bash +# Test site from different regions using geo-located proxies + +PROXIES=( + "http://us-proxy.example.com:8080" + "http://eu-proxy.example.com:8080" + "http://asia-proxy.example.com:8080" +) + +for proxy in "${PROXIES[@]}"; do + export HTTP_PROXY="$proxy" + export HTTPS_PROXY="$proxy" + + region=$(echo "$proxy" | grep -oP '^\w+-\w+') + echo "Testing from: $region" + + agent-browser --session "$region" open https://example.com + agent-browser --session "$region" screenshot "./screenshots/$region.png" + agent-browser --session "$region" close +done +``` + +### Rotating Proxies for Scraping + +```bash +#!/bin/bash +# Rotate through proxy list to avoid rate limiting + +PROXY_LIST=( + "http://proxy1.example.com:8080" + "http://proxy2.example.com:8080" + "http://proxy3.example.com:8080" +) + +URLS=( + "https://site.com/page1" + "https://site.com/page2" + "https://site.com/page3" +) + +for i in "${!URLS[@]}"; do + proxy_index=$((i % ${#PROXY_LIST[@]})) + export HTTP_PROXY="${PROXY_LIST[$proxy_index]}" + export HTTPS_PROXY="${PROXY_LIST[$proxy_index]}" + + agent-browser open "${URLS[$i]}" + agent-browser get text body > "output-$i.txt" + agent-browser close + + sleep 1 # Polite delay +done +``` + +### Corporate Network Access + +```bash +#!/bin/bash +# Access internal sites via corporate proxy + +export HTTP_PROXY="http://corpproxy.company.com:8080" +export HTTPS_PROXY="http://corpproxy.company.com:8080" +export NO_PROXY="localhost,127.0.0.1,.company.com" + +# External sites go through proxy +agent-browser open https://external-vendor.com + +# Internal sites bypass proxy +agent-browser open https://intranet.company.com +``` + +## Verifying Proxy Connection + +```bash +# Check your apparent IP +agent-browser open https://httpbin.org/ip +agent-browser get text body +# Should show proxy's IP, not your real IP +``` + +## Troubleshooting + +### Proxy Connection Failed + +```bash +# Test proxy connectivity first +curl -x http://proxy.example.com:8080 https://httpbin.org/ip + +# Check if proxy requires auth +export HTTP_PROXY="http://user:pass@proxy.example.com:8080" +``` + +### SSL/TLS Errors Through Proxy + +Some proxies perform SSL inspection. If you encounter certificate errors: + +```bash +# For testing only - not recommended for production +agent-browser open https://example.com --ignore-https-errors +``` + +### Slow Performance + +```bash +# Use proxy only when necessary +export NO_PROXY="*.cdn.com,*.static.com" # Direct CDN access +``` + +## Best Practices + +1. **Use environment variables** - Don't hardcode proxy credentials +2. **Set NO_PROXY appropriately** - Avoid routing local traffic through proxy +3. **Test proxy before automation** - Verify connectivity with simple requests +4. **Handle proxy failures gracefully** - Implement retry logic for unstable proxies +5. **Rotate proxies for large scraping jobs** - Distribute load and avoid bans diff --git a/skills/agent-browser/references/session-management.md b/skills/agent-browser/references/session-management.md new file mode 100644 index 000000000..cfc336245 --- /dev/null +++ b/skills/agent-browser/references/session-management.md @@ -0,0 +1,181 @@ +# Session Management + +Run multiple isolated browser sessions concurrently with state persistence. + +## Named Sessions + +Use `--session` flag to isolate browser contexts: + +```bash +# Session 1: Authentication flow +agent-browser --session auth open https://app.example.com/login + +# Session 2: Public browsing (separate cookies, storage) +agent-browser --session public open https://example.com + +# Commands are isolated by session +agent-browser --session auth fill @e1 "user@example.com" +agent-browser --session public get text body +``` + +## Session Isolation Properties + +Each session has independent: +- Cookies +- LocalStorage / SessionStorage +- IndexedDB +- Cache +- Browsing history +- Open tabs + +## Session State Persistence + +### Save Session State + +```bash +# Save cookies, storage, and auth state +agent-browser state save /path/to/auth-state.json +``` + +### Load Session State + +```bash +# Restore saved state +agent-browser state load /path/to/auth-state.json + +# Continue with authenticated session +agent-browser open https://app.example.com/dashboard +``` + +### State File Contents + +```json +{ + "cookies": [...], + "localStorage": {...}, + "sessionStorage": {...}, + "origins": [...] +} +``` + +## Common Patterns + +### Authenticated Session Reuse + +```bash +#!/bin/bash +# Save login state once, reuse many times + +STATE_FILE="/tmp/auth-state.json" + +# Check if we have saved state +if [[ -f "$STATE_FILE" ]]; then + agent-browser state load "$STATE_FILE" + agent-browser open https://app.example.com/dashboard +else + # Perform login + agent-browser open https://app.example.com/login + agent-browser snapshot -i + agent-browser fill @e1 "$USERNAME" + agent-browser fill @e2 "$PASSWORD" + agent-browser click @e3 + agent-browser wait --load networkidle + + # Save for future use + agent-browser state save "$STATE_FILE" +fi +``` + +### Concurrent Scraping + +```bash +#!/bin/bash +# Scrape multiple sites concurrently + +# Start all sessions +agent-browser --session site1 open https://site1.com & +agent-browser --session site2 open https://site2.com & +agent-browser --session site3 open https://site3.com & +wait + +# Extract from each +agent-browser --session site1 get text body > site1.txt +agent-browser --session site2 get text body > site2.txt +agent-browser --session site3 get text body > site3.txt + +# Cleanup +agent-browser --session site1 close +agent-browser --session site2 close +agent-browser --session site3 close +``` + +### A/B Testing Sessions + +```bash +# Test different user experiences +agent-browser --session variant-a open "https://app.com?variant=a" +agent-browser --session variant-b open "https://app.com?variant=b" + +# Compare +agent-browser --session variant-a screenshot /tmp/variant-a.png +agent-browser --session variant-b screenshot /tmp/variant-b.png +``` + +## Default Session + +When `--session` is omitted, commands use the default session: + +```bash +# These use the same default session +agent-browser open https://example.com +agent-browser snapshot -i +agent-browser close # Closes default session +``` + +## Session Cleanup + +```bash +# Close specific session +agent-browser --session auth close + +# List active sessions +agent-browser session list +``` + +## Best Practices + +### 1. Name Sessions Semantically + +```bash +# GOOD: Clear purpose +agent-browser --session github-auth open https://github.com +agent-browser --session docs-scrape open https://docs.example.com + +# AVOID: Generic names +agent-browser --session s1 open https://github.com +``` + +### 2. Always Clean Up + +```bash +# Close sessions when done +agent-browser --session auth close +agent-browser --session scrape close +``` + +### 3. Handle State Files Securely + +```bash +# Don't commit state files (contain auth tokens!) +echo "*.auth-state.json" >> .gitignore + +# Delete after use +rm /tmp/auth-state.json +``` + +### 4. Timeout Long Sessions + +```bash +# Set timeout for automated scripts +timeout 60 agent-browser --session long-task get text body +``` diff --git a/skills/agent-browser/references/snapshot-refs.md b/skills/agent-browser/references/snapshot-refs.md new file mode 100644 index 000000000..0b17a4d43 --- /dev/null +++ b/skills/agent-browser/references/snapshot-refs.md @@ -0,0 +1,186 @@ +# Snapshot + Refs Workflow + +The core innovation of agent-browser: compact element references that reduce context usage dramatically for AI agents. + +## How It Works + +### The Problem +Traditional browser automation sends full DOM to AI agents: +``` +Full DOM/HTML sent → AI parses → Generates CSS selector → Executes action +~3000-5000 tokens per interaction +``` + +### The Solution +agent-browser uses compact snapshots with refs: +``` +Compact snapshot → @refs assigned → Direct ref interaction +~200-400 tokens per interaction +``` + +## The Snapshot Command + +```bash +# Basic snapshot (shows page structure) +agent-browser snapshot + +# Interactive snapshot (-i flag) - RECOMMENDED +agent-browser snapshot -i +``` + +### Snapshot Output Format + +``` +Page: Example Site - Home +URL: https://example.com + +@e1 [header] + @e2 [nav] + @e3 [a] "Home" + @e4 [a] "Products" + @e5 [a] "About" + @e6 [button] "Sign In" + +@e7 [main] + @e8 [h1] "Welcome" + @e9 [form] + @e10 [input type="email"] placeholder="Email" + @e11 [input type="password"] placeholder="Password" + @e12 [button type="submit"] "Log In" + +@e13 [footer] + @e14 [a] "Privacy Policy" +``` + +## Using Refs + +Once you have refs, interact directly: + +```bash +# Click the "Sign In" button +agent-browser click @e6 + +# Fill email input +agent-browser fill @e10 "user@example.com" + +# Fill password +agent-browser fill @e11 "password123" + +# Submit the form +agent-browser click @e12 +``` + +## Ref Lifecycle + +**IMPORTANT**: Refs are invalidated when the page changes! + +```bash +# Get initial snapshot +agent-browser snapshot -i +# @e1 [button] "Next" + +# Click triggers page change +agent-browser click @e1 + +# MUST re-snapshot to get new refs! +agent-browser snapshot -i +# @e1 [h1] "Page 2" ← Different element now! +``` + +## Best Practices + +### 1. Always Snapshot Before Interacting + +```bash +# CORRECT +agent-browser open https://example.com +agent-browser snapshot -i # Get refs first +agent-browser click @e1 # Use ref + +# WRONG +agent-browser open https://example.com +agent-browser click @e1 # Ref doesn't exist yet! +``` + +### 2. Re-Snapshot After Navigation + +```bash +agent-browser click @e5 # Navigates to new page +agent-browser snapshot -i # Get new refs +agent-browser click @e1 # Use new refs +``` + +### 3. Re-Snapshot After Dynamic Changes + +```bash +agent-browser click @e1 # Opens dropdown +agent-browser snapshot -i # See dropdown items +agent-browser click @e7 # Select item +``` + +### 4. Snapshot Specific Regions + +For complex pages, snapshot specific areas: + +```bash +# Snapshot just the form +agent-browser snapshot @e9 +``` + +## Ref Notation Details + +``` +@e1 [tag type="value"] "text content" placeholder="hint" +│ │ │ │ │ +│ │ │ │ └─ Additional attributes +│ │ │ └─ Visible text +│ │ └─ Key attributes shown +│ └─ HTML tag name +└─ Unique ref ID +``` + +### Common Patterns + +``` +@e1 [button] "Submit" # Button with text +@e2 [input type="email"] # Email input +@e3 [input type="password"] # Password input +@e4 [a href="/page"] "Link Text" # Anchor link +@e5 [select] # Dropdown +@e6 [textarea] placeholder="Message" # Text area +@e7 [div class="modal"] # Container (when relevant) +@e8 [img alt="Logo"] # Image +@e9 [checkbox] checked # Checked checkbox +@e10 [radio] selected # Selected radio +``` + +## Troubleshooting + +### "Ref not found" Error + +```bash +# Ref may have changed - re-snapshot +agent-browser snapshot -i +``` + +### Element Not Visible in Snapshot + +```bash +# Scroll to reveal element +agent-browser scroll --bottom +agent-browser snapshot -i + +# Or wait for dynamic content +agent-browser wait 1000 +agent-browser snapshot -i +``` + +### Too Many Elements + +```bash +# Snapshot specific container +agent-browser snapshot @e5 + +# Or use get text for content-only extraction +agent-browser get text @e5 +``` diff --git a/skills/agent-browser/references/video-recording.md b/skills/agent-browser/references/video-recording.md new file mode 100644 index 000000000..98e6b0a16 --- /dev/null +++ b/skills/agent-browser/references/video-recording.md @@ -0,0 +1,162 @@ +# Video Recording + +Capture browser automation sessions as video for debugging, documentation, or verification. + +## Basic Recording + +```bash +# Start recording +agent-browser record start ./demo.webm + +# Perform actions +agent-browser open https://example.com +agent-browser snapshot -i +agent-browser click @e1 +agent-browser fill @e2 "test input" + +# Stop and save +agent-browser record stop +``` + +## Recording Commands + +```bash +# Start recording to file +agent-browser record start ./output.webm + +# Stop current recording +agent-browser record stop + +# Restart with new file (stops current + starts new) +agent-browser record restart ./take2.webm +``` + +## Use Cases + +### Debugging Failed Automation + +```bash +#!/bin/bash +# Record automation for debugging + +agent-browser record start ./debug-$(date +%Y%m%d-%H%M%S).webm + +# Run your automation +agent-browser open https://app.example.com +agent-browser snapshot -i +agent-browser click @e1 || { + echo "Click failed - check recording" + agent-browser record stop + exit 1 +} + +agent-browser record stop +``` + +### Documentation Generation + +```bash +#!/bin/bash +# Record workflow for documentation + +agent-browser record start ./docs/how-to-login.webm + +agent-browser open https://app.example.com/login +agent-browser wait 1000 # Pause for visibility + +agent-browser snapshot -i +agent-browser fill @e1 "demo@example.com" +agent-browser wait 500 + +agent-browser fill @e2 "password" +agent-browser wait 500 + +agent-browser click @e3 +agent-browser wait --load networkidle +agent-browser wait 1000 # Show result + +agent-browser record stop +``` + +### CI/CD Test Evidence + +```bash +#!/bin/bash +# Record E2E test runs for CI artifacts + +TEST_NAME="${1:-e2e-test}" +RECORDING_DIR="./test-recordings" +mkdir -p "$RECORDING_DIR" + +agent-browser record start "$RECORDING_DIR/$TEST_NAME-$(date +%s).webm" + +# Run test +if run_e2e_test; then + echo "Test passed" +else + echo "Test failed - recording saved" +fi + +agent-browser record stop +``` + +## Best Practices + +### 1. Add Pauses for Clarity + +```bash +# Slow down for human viewing +agent-browser click @e1 +agent-browser wait 500 # Let viewer see result +``` + +### 2. Use Descriptive Filenames + +```bash +# Include context in filename +agent-browser record start ./recordings/login-flow-2024-01-15.webm +agent-browser record start ./recordings/checkout-test-run-42.webm +``` + +### 3. Handle Recording in Error Cases + +```bash +#!/bin/bash +set -e + +cleanup() { + agent-browser record stop 2>/dev/null || true + agent-browser close 2>/dev/null || true +} +trap cleanup EXIT + +agent-browser record start ./automation.webm +# ... automation steps ... +``` + +### 4. Combine with Screenshots + +```bash +# Record video AND capture key frames +agent-browser record start ./flow.webm + +agent-browser open https://example.com +agent-browser screenshot ./screenshots/step1-homepage.png + +agent-browser click @e1 +agent-browser screenshot ./screenshots/step2-after-click.png + +agent-browser record stop +``` + +## Output Format + +- Default format: WebM (VP8/VP9 codec) +- Compatible with all modern browsers and video players +- Compressed but high quality + +## Limitations + +- Recording adds slight overhead to automation +- Large recordings can consume significant disk space +- Some headless environments may have codec limitations diff --git a/skills/agent-browser/templates/authenticated-session.sh b/skills/agent-browser/templates/authenticated-session.sh new file mode 100755 index 000000000..e44aaad5d --- /dev/null +++ b/skills/agent-browser/templates/authenticated-session.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Template: Authenticated Session Workflow +# Login once, save state, reuse for subsequent runs +# +# Usage: +# ./authenticated-session.sh [state-file] +# +# Setup: +# 1. Run once to see your form structure +# 2. Note the @refs for your fields +# 3. Uncomment LOGIN FLOW section and update refs + +set -euo pipefail + +LOGIN_URL="${1:?Usage: $0 [state-file]}" +STATE_FILE="${2:-./auth-state.json}" + +echo "Authentication workflow for: $LOGIN_URL" + +# ══════════════════════════════════════════════════════════════ +# SAVED STATE: Skip login if we have valid saved state +# ══════════════════════════════════════════════════════════════ +if [[ -f "$STATE_FILE" ]]; then + echo "Loading saved authentication state..." + agent-browser state load "$STATE_FILE" + agent-browser open "$LOGIN_URL" + agent-browser wait --load networkidle + + CURRENT_URL=$(agent-browser get url) + if [[ "$CURRENT_URL" != *"login"* ]] && [[ "$CURRENT_URL" != *"signin"* ]]; then + echo "Session restored successfully!" + agent-browser snapshot -i + exit 0 + fi + echo "Session expired, performing fresh login..." + rm -f "$STATE_FILE" +fi + +# ══════════════════════════════════════════════════════════════ +# DISCOVERY MODE: Show form structure (remove after setup) +# ══════════════════════════════════════════════════════════════ +echo "Opening login page..." +agent-browser open "$LOGIN_URL" +agent-browser wait --load networkidle + +echo "" +echo "┌─────────────────────────────────────────────────────────┐" +echo "│ LOGIN FORM STRUCTURE │" +echo "├─────────────────────────────────────────────────────────┤" +agent-browser snapshot -i +echo "└─────────────────────────────────────────────────────────┘" +echo "" +echo "Next steps:" +echo " 1. Note refs: @e? = username, @e? = password, @e? = submit" +echo " 2. Uncomment LOGIN FLOW section below" +echo " 3. Replace @e1, @e2, @e3 with your refs" +echo " 4. Delete this DISCOVERY MODE section" +echo "" +agent-browser close +exit 0 + +# ══════════════════════════════════════════════════════════════ +# LOGIN FLOW: Uncomment and customize after discovery +# ══════════════════════════════════════════════════════════════ +# : "${APP_USERNAME:?Set APP_USERNAME environment variable}" +# : "${APP_PASSWORD:?Set APP_PASSWORD environment variable}" +# +# agent-browser open "$LOGIN_URL" +# agent-browser wait --load networkidle +# agent-browser snapshot -i +# +# # Fill credentials (update refs to match your form) +# agent-browser fill @e1 "$APP_USERNAME" +# agent-browser fill @e2 "$APP_PASSWORD" +# agent-browser click @e3 +# agent-browser wait --load networkidle +# +# # Verify login succeeded +# FINAL_URL=$(agent-browser get url) +# if [[ "$FINAL_URL" == *"login"* ]] || [[ "$FINAL_URL" == *"signin"* ]]; then +# echo "ERROR: Login failed - still on login page" +# agent-browser screenshot /tmp/login-failed.png +# agent-browser close +# exit 1 +# fi +# +# # Save state for future runs +# echo "Saving authentication state to: $STATE_FILE" +# agent-browser state save "$STATE_FILE" +# echo "Login successful!" +# agent-browser snapshot -i diff --git a/skills/agent-browser/templates/capture-workflow.sh b/skills/agent-browser/templates/capture-workflow.sh new file mode 100755 index 000000000..a4eae751e --- /dev/null +++ b/skills/agent-browser/templates/capture-workflow.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# Template: Content Capture Workflow +# Extract content from web pages with optional authentication + +set -euo pipefail + +TARGET_URL="${1:?Usage: $0 [output-dir]}" +OUTPUT_DIR="${2:-.}" + +echo "Capturing content from: $TARGET_URL" +mkdir -p "$OUTPUT_DIR" + +# Optional: Load authentication state if needed +# if [[ -f "./auth-state.json" ]]; then +# agent-browser state load "./auth-state.json" +# fi + +# Navigate to target page +agent-browser open "$TARGET_URL" +agent-browser wait --load networkidle + +# Get page metadata +echo "Page title: $(agent-browser get title)" +echo "Page URL: $(agent-browser get url)" + +# Capture full page screenshot +agent-browser screenshot --full "$OUTPUT_DIR/page-full.png" +echo "Screenshot saved: $OUTPUT_DIR/page-full.png" + +# Get page structure +agent-browser snapshot -i > "$OUTPUT_DIR/page-structure.txt" +echo "Structure saved: $OUTPUT_DIR/page-structure.txt" + +# Extract main content +# Adjust selector based on target site structure +# agent-browser get text @e1 > "$OUTPUT_DIR/main-content.txt" + +# Extract specific elements (uncomment as needed) +# agent-browser get text "article" > "$OUTPUT_DIR/article.txt" +# agent-browser get text "main" > "$OUTPUT_DIR/main.txt" +# agent-browser get text ".content" > "$OUTPUT_DIR/content.txt" + +# Get full page text +agent-browser get text body > "$OUTPUT_DIR/page-text.txt" +echo "Text content saved: $OUTPUT_DIR/page-text.txt" + +# Optional: Save as PDF +agent-browser pdf "$OUTPUT_DIR/page.pdf" +echo "PDF saved: $OUTPUT_DIR/page.pdf" + +# Optional: Capture with scrolling for infinite scroll pages +# scroll_and_capture() { +# local count=0 +# while [[ $count -lt 5 ]]; do +# agent-browser scroll down 1000 +# agent-browser wait 1000 +# ((count++)) +# done +# agent-browser screenshot --full "$OUTPUT_DIR/page-scrolled.png" +# } +# scroll_and_capture + +# Cleanup +agent-browser close + +echo "" +echo "Capture complete! Files saved to: $OUTPUT_DIR" +ls -la "$OUTPUT_DIR" diff --git a/skills/agent-browser/templates/form-automation.sh b/skills/agent-browser/templates/form-automation.sh new file mode 100755 index 000000000..02a7c8115 --- /dev/null +++ b/skills/agent-browser/templates/form-automation.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Template: Form Automation Workflow +# Fills and submits web forms with validation + +set -euo pipefail + +FORM_URL="${1:?Usage: $0 }" + +echo "Automating form at: $FORM_URL" + +# Navigate to form page +agent-browser open "$FORM_URL" +agent-browser wait --load networkidle + +# Get interactive snapshot to identify form fields +echo "Analyzing form structure..." +agent-browser snapshot -i + +# Example: Fill common form fields +# Uncomment and modify refs based on snapshot output + +# Text inputs +# agent-browser fill @e1 "John Doe" # Name field +# agent-browser fill @e2 "user@example.com" # Email field +# agent-browser fill @e3 "+1-555-123-4567" # Phone field + +# Password fields +# agent-browser fill @e4 "SecureP@ssw0rd!" + +# Dropdowns +# agent-browser select @e5 "Option Value" + +# Checkboxes +# agent-browser check @e6 # Check +# agent-browser uncheck @e7 # Uncheck + +# Radio buttons +# agent-browser click @e8 # Select radio option + +# Text areas +# agent-browser fill @e9 "Multi-line text content here" + +# File uploads +# agent-browser upload @e10 /path/to/file.pdf + +# Submit form +# agent-browser click @e11 # Submit button + +# Wait for response +# agent-browser wait --load networkidle +# agent-browser wait --url "**/success" # Or wait for redirect + +# Verify submission +echo "Form submission result:" +agent-browser get url +agent-browser snapshot -i + +# Take screenshot of result +agent-browser screenshot /tmp/form-result.png + +# Cleanup +agent-browser close + +echo "Form automation complete" diff --git a/skills/authentic-writing/SKILL.md b/skills/authentic-writing/SKILL.md new file mode 100644 index 000000000..49a2f6ce1 --- /dev/null +++ b/skills/authentic-writing/SKILL.md @@ -0,0 +1,64 @@ +--- +name: authentic-writing +description: Write or revise formal content with authentic voice - professional yet sincere, intellectually honest, and never generic. Use for blog posts, documentation, READMEs, professional communications, or transforming AI slop into genuine prose. Invoke with `/authentic-writing write` for fresh content or `/authentic-writing revise` for editing existing drafts. +--- + +# Authentic Writing + +Formal prose that reads as intentional, not generated. The core insight: explain *why*, not just *what*. Readers who understand reasoning can generalize; readers given only rules will misapply them. + +## Modes + +**Fresh writing**: `/authentic-writing write` +Provide raw ideas, bullet points, notes, or a topic. Output: polished formal prose. + +**Revision**: `/authentic-writing revise` +Provide existing drafts or AI-generated content. Output: transformed prose preserving core meaning. + +## Workflow + +### 1. Find the Actual Point +What are you really trying to say? Not the topic - the argument. If you can't state it in one sentence, you don't know yet. + +### 2. Explain the Reasoning +Don't just state conclusions. Walk through *why* you believe them. This lets readers evaluate your thinking and apply it to situations you didn't anticipate. + +### 3. Ground Everything +Every abstraction needs a concrete example. "Good communication" means nothing. "Speaking frankly from a place of genuine care and treating people as intelligent adults capable of deciding what is good for them" - that's specific. + +### 4. Acknowledge Genuine Tensions +Don't paper over tradeoffs. Name them. "Specific rules have advantages - they're predictable and testable. But they can be applied poorly in unanticipated situations." Both things are true. + +### 5. Take Positions Anyway +Acknowledging complexity isn't the same as refusing to decide. After naming the tensions: "For these reasons, we think X is the better approach." + +### 6. Cut the Filler +If a sentence could be removed without loss, remove it. If a qualifier doesn't add genuine uncertainty, cut it. + +## Key Principles + +**Reasoning over rules** - Explain why, not just what. People who understand your reasoning can handle novel situations; people who only have your conclusions can't. + +**Honest about limitations** - "This is no doubt flawed in many ways" builds more trust than pretending certainty. But be specific about what you're uncertain about. + +**Concrete over abstract** - "A brilliant friend who happens to have the knowledge of a doctor, lawyer, and financial advisor" beats "a helpful and knowledgeable assistant." + +**Stakes without hyperbole** - State genuine importance plainly. "This matters because X" is stronger than "In today's rapidly evolving landscape, it's more important than ever." + +**Tensions named, not hidden** - Real tradeoffs exist. Pretending they don't makes you seem either naive or dishonest. + +## Reference Files + +- `references/style-guide.md` - detailed characteristics with examples +- `references/patterns.md` - sentence structures and rhythm +- `references/anti-patterns.md` - what to avoid and how to fix it +- `references/excerpts.md` - annotated examples showing techniques + +## Verification + +Output passes when: +- The reasoning is visible, not just the conclusions +- Abstractions are grounded with specifics +- Tradeoffs are named, then a position is taken +- A reader would feel treated as an intelligent peer +- No sentence could be removed without loss diff --git a/skills/authentic-writing/references/anti-patterns.md b/skills/authentic-writing/references/anti-patterns.md new file mode 100644 index 000000000..79d4d6439 --- /dev/null +++ b/skills/authentic-writing/references/anti-patterns.md @@ -0,0 +1,120 @@ +# Anti-Patterns + +What to avoid and how to fix it. + +## Conclusions Without Reasoning + +**Symptom**: Stating what you think without explaining why. + +**Example (bad):** +> We use a principle-based approach. + +**Fix**: Explain the reasoning so readers can evaluate it themselves. + +> We've come to believe that a different approach is necessary. Specific rules have advantages - they're predictable and testable. But they fail in unanticipated situations. Principles let people generalize because they understand *why*, not just *what*. + +## The Vague Claim + +**Symptom**: Abstract statements with no grounding. + +**Example (bad):** +> The system should be helpful and accessible. + +**Fix**: Make it concrete enough to visualize. + +> Think of it like a brilliant friend who happens to have the knowledge of a doctor, lawyer, and financial advisor - someone who speaks frankly and treats you like an intelligent adult capable of deciding what's good for you. + +## Manufactured Stakes + +**Symptom**: Urgency language that doesn't reflect genuine importance. + +**Example (bad):** +> In today's rapidly evolving landscape, it's more critical than ever to leverage cutting-edge solutions. + +**Fix**: State real stakes plainly. + +> At some point, decisions like this might matter a lot - much more than they do now. + +## Hidden Tensions + +**Symptom**: Pretending tradeoffs don't exist. + +**Example (bad):** +> Safety and helpfulness work together seamlessly. + +**Fix**: Name the tension, then explain how you navigate it. + +> Safety and helpfulness are more complementary than they're at odds. But tensions do exist - sometimes being maximally helpful in the short term creates risks in the long term. We navigate this by [approach]. + +## The Non-Position + +**Symptom**: Presenting multiple sides without taking one. + +**Example (bad):** +> Some prefer rules while others prefer principles. There are valid points on both sides. + +**Fix**: After acknowledging complexity, actually decide. + +> Rules have advantages - they're predictable and testable. Principles have different advantages - they generalize better. For most situations, we think principles work better because [reason]. We reserve rules for [specific cases where rules make sense]. + +## Performed Humility + +**Symptom**: Hedging that sounds humble but actually avoids commitment. + +**Example (bad):** +> Perhaps this approach might sometimes be useful in certain contexts. + +**Fix**: Be specific about what you're uncertain about, confident about what you're not. + +> This approach has real limitations - it doesn't scale well and requires expertise. But for teams with those resources, it's often the right choice. + +## Reader Praise + +**Symptom**: Complimenting the reader or their question instead of engaging. + +**Example (bad):** +> That's a great question! You're absolutely right to be thinking about this. + +**Fix**: Just answer. + +> Here's how this works. + +## Vague Plurals + +**Symptom**: "Various factors," "multiple considerations," "numerous aspects." + +**Example (bad):** +> We consulted with various experts on these matters. + +**Fix**: Name them. + +> We sought feedback from experts in law, philosophy, theology, psychology, and a wide range of other disciplines. + +## Filler Qualifiers + +**Symptom**: Words that add nothing. "Basically," "essentially," "fundamentally," "at the end of the day." + +**Example (bad):** +> Fundamentally, at the end of the day, what this essentially means is... + +**Fix**: Delete them. + +> This means... + +## Rigid Rule Thinking + +**Symptom**: Following a pattern mechanically without understanding why. + +**Example from the source:** +> Imagine training someone to follow a rule like "Always recommend professional help when discussing emotional topics." This might be well-intentioned, but it could have unintended consequences: they might start caring more about bureaucratic box-ticking - always ensuring a specific recommendation is made - rather than actually helping people. + +**Fix**: Understand the *purpose* behind guidelines, not just the letter. + +## Detection Checklist + +1. Is the reasoning visible, or just the conclusions? +2. Are abstractions grounded with specifics? +3. Are tradeoffs named honestly? +4. After naming complexity, is a position actually taken? +5. Could any sentence be removed without loss? +6. Would a reader feel treated as an intelligent peer? diff --git a/skills/authentic-writing/references/excerpts.md b/skills/authentic-writing/references/excerpts.md new file mode 100644 index 000000000..7dc131977 --- /dev/null +++ b/skills/authentic-writing/references/excerpts.md @@ -0,0 +1,103 @@ +# Annotated Excerpts + +Examples of authentic formal writing with technique annotations. These demonstrate the principles in action - study the techniques, not the specific subject matter. + +--- + +## Explaining Reasoning (The Core Technique) + +> We've come to believe that a different approach is necessary. We think that in order to be good actors in the world, people need to understand *why* we want them to behave in certain ways, and we need to explain this to them rather than merely specify *what* we want them to do. If we want people to exercise good judgment across a wide range of novel situations, they need to be able to generalize—to apply broad principles rather than mechanically following specific rules. + +**What works:** +- States position ("a different approach is necessary") +- Explains reasoning ("in order to... people need to understand *why*") +- Anticipates the question "why not rules?" and answers it +- Reader can now apply this reasoning to their own decisions + +--- + +## Acknowledging Tensions Honestly + +> Specific rules and bright lines sometimes have their advantages. They can make actions more predictable, transparent, and testable, and we do use them for some especially high-stakes behaviors. But such rules can also be applied poorly in unanticipated situations or when followed too rigidly. + +**What works:** +- Doesn't pretend rules are simply bad +- Names specific advantages (predictable, transparent, testable) +- Then names the limitation (unanticipated situations, rigidity) +- Shows where rules *are* used (high-stakes behaviors) +- Reader trusts the judgment because both sides are honestly represented + +--- + +## Concrete Grounding + +> Think of it like a brilliant friend who happens to have the knowledge of a doctor, lawyer, and financial advisor, who will speak frankly and from a place of genuine care and treat you like an intelligent adult capable of deciding what is good for you. + +**What works:** +- "Brilliant friend" is immediately relatable +- Specific expertise areas (doctor, lawyer, financial advisor) +- "Speak frankly from genuine care" - describes the *tone*, not just capability +- "Intelligent adult capable of deciding" - defines the relationship +- One sentence does more than paragraphs of abstract description + +--- + +## Honest About Limitations + +> Although the document is no doubt flawed in many ways, we want it to be something people can look back on and see as an honest and sincere attempt to explain our reasoning and the motives behind our decisions. + +**What works:** +- "No doubt flawed in many ways" - genuine humility, not performed +- "Honest and sincere attempt" - stands behind the intent despite acknowledging flaws +- More trustworthy than pretending certainty +- More confident than endless hedging + +--- + +## Stakes Without Hyperbole + +> At some point in the future, and perhaps soon, documents like this might matter a lot—much more than they do now. Powerful systems will be a new kind of force in the world, and those who are creating them have a chance to help them embody the best in humanity. + +**What works:** +- "Might matter a lot" - plain statement of potential importance +- "Perhaps soon" - honest about uncertainty in timing +- "New kind of force in the world" - states significance without breathless hype +- "A chance to" - aspiration without guarantee +- No "revolutionary," "game-changing," "paradigm shift" + +--- + +## Living Document Framing + +> This is a living document and a continuous work in progress. This is new territory, and we expect to make mistakes (and hopefully correct them) along the way. Nevertheless, we hope it offers meaningful transparency into the values and priorities we believe should guide behavior. + +**What works:** +- "Living document" - signals evolution, not arrogance of finality +- "Expect to make mistakes" - honest prediction +- "(and hopefully correct them)" - commitment to improvement +- "Nevertheless" - despite limitations, still valuable +- Humble and confident at once + +--- + +## The Gap Between Intention and Reality + +> Although this expresses our vision, achieving that vision is an ongoing technical challenge. We will continue to be open about any ways in which reality comes apart from our vision. Readers should keep this gap between intention and reality in mind. + +**What works:** +- Explicitly names the gap between aspiration and execution +- Commits to transparency about failures +- Tells readers to expect imperfection +- More trustworthy than claiming success before achieving it + +--- + +## Summary: What These Share + +1. **Reasoning is visible** - not just what, but why +2. **Tensions are named** - not hidden or resolved prematurely +3. **Specifics ground abstractions** - metaphors and examples do heavy lifting +4. **Limitations are acknowledged** - but positions are still taken +5. **Stakes are real** - stated plainly without hyperbole +6. **Reader is a peer** - trusted to evaluate reasoning themselves +7. **No filler** - every sentence earns its place diff --git a/skills/authentic-writing/references/patterns.md b/skills/authentic-writing/references/patterns.md new file mode 100644 index 000000000..7f781b72b --- /dev/null +++ b/skills/authentic-writing/references/patterns.md @@ -0,0 +1,100 @@ +# Sentence Patterns + +Structural templates for authentic formal prose. + +## The Reasoning Pattern + +The most important pattern. State position, then explain why. + +``` +[Position]. [Why you believe it]. [Why the alternative doesn't work]. +``` + +> We use principles rather than rules. Principles let people generalize to novel situations they need to understand *why*, not just *what*. Rules have advantages - they're predictable and testable - but they fail in unanticipated situations. + +## The Tension Pattern + +Name a tradeoff honestly, then explain how you navigate it. + +``` +[Thing A has value]. [But Thing B also has value]. [Here's how we weigh them]. +``` + +> Specific rules make behavior predictable and testable. But rigid rules can be applied poorly when followed too mechanically. We use rules for high-stakes behaviors where predictability matters most, and principles for everything else. + +## The Concrete Grounding Pattern + +State abstraction, then immediately ground it. + +``` +[Abstract claim]. [Specific example or metaphor]. +``` + +> The system should feel like a knowledgeable friend. Think of a brilliant friend who happens to have the knowledge of a doctor, lawyer, and financial advisor - someone who speaks frankly and treats you like an intelligent adult. + +## The Honest Limitation Pattern + +Acknowledge weakness while maintaining conviction. + +``` +[Admission of limitation]. [What remains true despite it]. +``` + +> This document is no doubt flawed in many ways. But we want it to be something people can look back on and see as an honest attempt to explain our reasoning. + +## The Stakes Pattern + +State genuine importance without hyperbole. + +``` +[Plain statement of what matters]. [Why it matters]. +``` + +> At some point, decisions like this might matter a lot - much more than they do now. The choices we make now will shape how this develops for years. + +No "revolutionary," "game-changing," or "in today's fast-paced landscape." + +## Opening Patterns + +### The Context Opener +Ground the reader in what's happening. + +> We're publishing X today. It's [brief description]. In this post, we describe what we've included and some of the considerations that informed our approach. + +### The Reasoning Opener +Lead with why this exists. + +> We've come to believe that a different approach is necessary. In order to do X well, people need to understand *why*, not merely *what*. + +### The Stakes Opener +State importance plainly. + +> This is new territory, and we expect to make mistakes along the way. Nevertheless, we think transparency here matters. + +## Transition Patterns + +### The However Pivot +Acknowledge then redirect. + +> Specific rules sometimes have their advantages. They're predictable, testable, and transparent. But such rules can also be applied poorly in unanticipated situations. + +### The Building Transition +Previous point leads to next. + +> This makes transparency particularly important: it lets people understand which behaviors are intended versus unintended, make informed choices, and provide useful feedback. + +### The Scope Transition +Moving between levels. + +> That's the broad picture. The main sections are as follows: + +## Closing Patterns + +### The Living Document Close +> This is a continuous work in progress. We expect to make mistakes (and hopefully correct them) along the way. + +### The Future Stakes Close +> At some point in the future, and perhaps soon, decisions like this might matter a lot. We hope this is a step in the right direction. + +### The Honest Gap Close +> This expresses our vision. Achieving that vision is an ongoing challenge. We will continue to be open about any ways in which reality comes apart from intention. diff --git a/skills/authentic-writing/references/style-guide.md b/skills/authentic-writing/references/style-guide.md new file mode 100644 index 000000000..cdae09a73 --- /dev/null +++ b/skills/authentic-writing/references/style-guide.md @@ -0,0 +1,94 @@ +# Style Guide + +What makes formal writing feel authentic rather than generated. + +## 1. Explain the Reasoning + +The most important principle. Don't just state what you think - explain why. Readers who understand your reasoning can apply it to situations you didn't anticipate. Readers who only have your conclusions will misapply them. + +**Before:** +> We use a principle-based approach rather than rules. + +**After:** +> We've come to believe that a different approach is necessary. In order to act well across a wide range of novel situations, people need to understand *why* we want certain behaviors, not merely *what* we want. Specific rules have their advantages - they're predictable, testable, and transparent. But they can be applied poorly in unanticipated situations or when followed too rigidly. + +The second version lets readers evaluate the reasoning and decide for themselves. + +## 2. Concrete Metaphors and Examples + +Abstract claims float away. Concrete examples stick. + +**Before:** +> The system should be helpful and knowledgeable while remaining accessible. + +**After:** +> Think of it like a brilliant friend who happens to have the knowledge of a doctor, lawyer, and financial advisor, who will speak frankly and from a place of genuine care and treat you like an intelligent adult capable of deciding what is good for you. + +The metaphor does more work than any amount of abstract description. + +## 3. Name the Tensions + +Real decisions involve tradeoffs. Pretending they don't makes you seem naive or dishonest. + +**Before:** +> Safety and helpfulness work together. + +**After:** +> Safety and helpfulness are more complementary than they are at odds, but tensions do exist. Sometimes being maximally helpful in the short term creates risks in the long term. Sometimes safety constraints prevent genuinely beneficial actions. We don't pretend these tensions away - we try to navigate them thoughtfully. + +## 4. Honest About Limitations + +Admitting what you don't know builds trust. But be specific about the uncertainty. + +**Before:** +> This approach might not work in all cases. + +**After:** +> Although this document is no doubt flawed in many ways, we want it to be something people can look back on and see as an honest and sincere attempt to explain our reasoning and motives. + +The second version is more humble *and* more confident - it acknowledges flaws while standing behind the intent. + +## 5. Stakes Without Hyperbole + +State genuine importance plainly. Don't manufacture urgency. + +**Before:** +> In today's fast-paced digital landscape, it's more important than ever to get this right. + +**After:** +> At some point in the future, and perhaps soon, decisions like this might matter a lot - much more than they do now. + +Plain language about real stakes is more credible than inflated language about manufactured ones. + +## 6. Treat Readers as Peers + +Don't condescend. Don't over-explain obvious things. Don't perform enthusiasm. + +**Before:** +> Great question! Let me break this down into simple steps so it's easy to understand. + +**After:** +> Here's how this works. + +Trust that your reader is intelligent. They'll notice when you don't. + +## 7. Living Document Framing + +Signal that understanding evolves. Not as weakness, but as intellectual honesty. + +**Examples:** +> This is a continuous work in progress. We expect to make mistakes (and hopefully correct them) along the way. + +> This reflects our current thinking about how to approach a dauntingly novel project. + +## 8. Specific Over General + +Vague claims are forgettable. Specific details are memorable and testable. + +**Before:** +> We consulted with various experts. + +**After:** +> We sought feedback from experts in law, philosophy, theology, psychology, and a wide range of other disciplines. + +The specific list is more credible because it's verifiable. diff --git a/skills/browser-use/SKILL.md b/skills/browser-use/SKILL.md new file mode 100644 index 000000000..aad693465 --- /dev/null +++ b/skills/browser-use/SKILL.md @@ -0,0 +1,218 @@ +--- +name: browser-use +description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, or extract information from web pages. +allowed-tools: Bash(browser-use:*) +--- + +# Browser Automation with browser-use CLI + +The `browser-use` command provides fast, persistent browser automation. It maintains browser sessions across commands, enabling complex multi-step workflows. + +## Quick Start + +```bash +browser-use open https://example.com # Navigate to URL +browser-use state # Get page elements with indices +browser-use click 5 # Click element by index +browser-use type "Hello World" # Type text +browser-use screenshot # Take screenshot +browser-use close # Close browser +``` + +## Core Workflow + +1. **Navigate**: `browser-use open ` - Opens URL (starts browser if needed) +2. **Inspect**: `browser-use state` - Returns clickable elements with indices +3. **Interact**: Use indices from state to interact (`browser-use click 5`, `browser-use input 3 "text"`) +4. **Verify**: `browser-use state` or `browser-use screenshot` to confirm actions +5. **Repeat**: Browser stays open between commands + +## Browser Modes + +```bash +browser-use --browser chromium open # Default: headless Chromium +browser-use --browser chromium --headed open # Visible Chromium window +browser-use --browser real open # User's Chrome with login sessions +browser-use --browser remote open # Cloud browser (requires API key) +``` + +- **chromium**: Fast, isolated, headless by default +- **real**: Uses your Chrome with cookies, extensions, logged-in sessions +- **remote**: Cloud-hosted browser with proxy support (requires BROWSER_USE_API_KEY) + +## Commands + +### Navigation +```bash +browser-use open # Navigate to URL +browser-use back # Go back in history +browser-use scroll down # Scroll down +browser-use scroll up # Scroll up +``` + +### Page State +```bash +browser-use state # Get URL, title, and clickable elements +browser-use screenshot # Take screenshot (outputs base64) +browser-use screenshot path.png # Save screenshot to file +browser-use screenshot --full path.png # Full page screenshot +``` + +### Interactions (use indices from `browser-use state`) +```bash +browser-use click # Click element +browser-use type "text" # Type text into focused element +browser-use input "text" # Click element, then type text +browser-use keys "Enter" # Send keyboard keys +browser-use keys "Control+a" # Send key combination +browser-use select "option" # Select dropdown option +``` + +### Tab Management +```bash +browser-use switch # Switch to tab by index +browser-use close-tab # Close current tab +browser-use close-tab # Close specific tab +``` + +### JavaScript & Data +```bash +browser-use eval "document.title" # Execute JavaScript, return result +browser-use extract "all product prices" # Extract data using LLM (requires API key) +``` + +### Python Execution (Persistent Session) +```bash +browser-use python "x = 42" # Set variable +browser-use python "print(x)" # Access variable (outputs: 42) +browser-use python "print(browser.url)" # Access browser object +browser-use python --vars # Show defined variables +browser-use python --reset # Clear Python namespace +browser-use python --file script.py # Execute Python file +``` + +The Python session maintains state across commands. The `browser` object provides: +- `browser.url` - Current page URL +- `browser.title` - Page title +- `browser.goto(url)` - Navigate +- `browser.click(index)` - Click element +- `browser.type(text)` - Type text +- `browser.screenshot(path)` - Take screenshot +- `browser.scroll()` - Scroll page +- `browser.html` - Get page HTML + +### Agent Tasks (Requires API Key) +```bash +browser-use run "Fill the contact form with test data" # Run AI agent +browser-use run "Extract all product prices" --max-steps 50 +``` + +Agent tasks use an LLM to autonomously complete complex browser tasks. Requires `BROWSER_USE_API_KEY` or configured LLM API key (OPENAI_API_KEY, ANTHROPIC_API_KEY, etc). + +### Session Management +```bash +browser-use sessions # List active sessions +browser-use close # Close current session +browser-use close --all # Close all sessions +``` + +### Server Control +```bash +browser-use server status # Check if server is running +browser-use server stop # Stop server +browser-use server logs # View server logs +``` + +## Global Options + +| Option | Description | +|--------|-------------| +| `--session NAME` | Use named session (default: "default") | +| `--browser MODE` | Browser mode: chromium, real, remote | +| `--headed` | Show browser window (chromium mode) | +| `--profile NAME` | Chrome profile (real mode only) | +| `--json` | Output as JSON | +| `--api-key KEY` | Override API key | + +**Session behavior**: All commands without `--session` use the same "default" session. The browser stays open and is reused across commands. Use `--session NAME` to run multiple browsers in parallel. + +## Examples + +### Form Submission +```bash +browser-use open https://example.com/contact +browser-use state +# Shows: [0] input "Name", [1] input "Email", [2] textarea "Message", [3] button "Submit" +browser-use input 0 "John Doe" +browser-use input 1 "john@example.com" +browser-use input 2 "Hello, this is a test message." +browser-use click 3 +browser-use state # Verify success +``` + +### Multi-Session Workflows +```bash +browser-use --session work open https://work.example.com +browser-use --session personal open https://personal.example.com +browser-use --session work state # Check work session +browser-use --session personal state # Check personal session +browser-use close --all # Close both sessions +``` + +### Data Extraction with Python +```bash +browser-use open https://example.com/products +browser-use python " +products = [] +for i in range(20): + browser.scroll('down') +browser.screenshot('products.png') +" +browser-use python "print(f'Captured {len(products)} products')" +``` + +### Using Real Browser (Logged-In Sessions) +```bash +browser-use --browser real open https://gmail.com +# Uses your actual Chrome with existing login sessions +browser-use state # Already logged in! +``` + +## Tips + +1. **Always run `browser-use state` first** to see available elements and their indices +2. **Use `--headed` for debugging** to see what the browser is doing +3. **Sessions persist** - the browser stays open between commands +4. **Use `--json` for parsing** output programmatically +5. **Python variables persist** across `browser-use python` commands within a session +6. **Real browser mode** preserves your login sessions and extensions + +## Troubleshooting + +**Browser won't start?** +```bash +browser-use server stop # Stop any stuck server +browser-use --headed open # Try with visible window +``` + +**Element not found?** +```bash +browser-use state # Check current elements +browser-use scroll down # Element might be below fold +browser-use state # Check again +``` + +**Session issues?** +```bash +browser-use sessions # Check active sessions +browser-use close --all # Clean slate +browser-use open # Fresh start +``` + +## Cleanup + +**Always close the browser when done.** Run this after completing browser automation: + +```bash +browser-use close +``` diff --git a/skills/find-skills/SKILL.md b/skills/find-skills/SKILL.md new file mode 100644 index 000000000..c797184ee --- /dev/null +++ b/skills/find-skills/SKILL.md @@ -0,0 +1,133 @@ +--- +name: find-skills +description: Helps users discover and install agent skills when they ask questions like "how do I do X", "find a skill for X", "is there a skill that can...", or express interest in extending capabilities. This skill should be used when the user is looking for functionality that might exist as an installable skill. +--- + +# Find Skills + +This skill helps you discover and install skills from the open agent skills ecosystem. + +## When to Use This Skill + +Use this skill when the user: + +- Asks "how do I do X" where X might be a common task with an existing skill +- Says "find a skill for X" or "is there a skill for X" +- Asks "can you do X" where X is a specialized capability +- Expresses interest in extending agent capabilities +- Wants to search for tools, templates, or workflows +- Mentions they wish they had help with a specific domain (design, testing, deployment, etc.) + +## What is the Skills CLI? + +The Skills CLI (`npx skills`) is the package manager for the open agent skills ecosystem. Skills are modular packages that extend agent capabilities with specialized knowledge, workflows, and tools. + +**Key commands:** + +- `npx skills find [query]` - Search for skills interactively or by keyword +- `npx skills add ` - Install a skill from GitHub or other sources +- `npx skills check` - Check for skill updates +- `npx skills update` - Update all installed skills + +**Browse skills at:** https://skills.sh/ + +## How to Help Users Find Skills + +### Step 1: Understand What They Need + +When a user asks for help with something, identify: + +1. The domain (e.g., React, testing, design, deployment) +2. The specific task (e.g., writing tests, creating animations, reviewing PRs) +3. Whether this is a common enough task that a skill likely exists + +### Step 2: Search for Skills + +Run the find command with a relevant query: + +```bash +npx skills find [query] +``` + +For example: + +- User asks "how do I make my React app faster?" → `npx skills find react performance` +- User asks "can you help me with PR reviews?" → `npx skills find pr review` +- User asks "I need to create a changelog" → `npx skills find changelog` + +The command will return results like: + +``` +Install with npx skills add + +vercel-labs/agent-skills@vercel-react-best-practices +└ https://skills.sh/vercel-labs/agent-skills/vercel-react-best-practices +``` + +### Step 3: Present Options to the User + +When you find relevant skills, present them to the user with: + +1. The skill name and what it does +2. The install command they can run +3. A link to learn more at skills.sh + +Example response: + +``` +I found a skill that might help! The "vercel-react-best-practices" skill provides +React and Next.js performance optimization guidelines from Vercel Engineering. + +To install it: +npx skills add vercel-labs/agent-skills@vercel-react-best-practices + +Learn more: https://skills.sh/vercel-labs/agent-skills/vercel-react-best-practices +``` + +### Step 4: Offer to Install + +If the user wants to proceed, you can install the skill for them: + +```bash +npx skills add -g -y +``` + +The `-g` flag installs globally (user-level) and `-y` skips confirmation prompts. + +## Common Skill Categories + +When searching, consider these common categories: + +| Category | Example Queries | +| --------------- | ---------------------------------------- | +| Web Development | react, nextjs, typescript, css, tailwind | +| Testing | testing, jest, playwright, e2e | +| DevOps | deploy, docker, kubernetes, ci-cd | +| Documentation | docs, readme, changelog, api-docs | +| Code Quality | review, lint, refactor, best-practices | +| Design | ui, ux, design-system, accessibility | +| Productivity | workflow, automation, git | + +## Tips for Effective Searches + +1. **Use specific keywords**: "react testing" is better than just "testing" +2. **Try alternative terms**: If "deploy" doesn't work, try "deployment" or "ci-cd" +3. **Check popular sources**: Many skills come from `vercel-labs/agent-skills` or `ComposioHQ/awesome-claude-skills` + +## When No Skills Are Found + +If no relevant skills exist: + +1. Acknowledge that no existing skill was found +2. Offer to help with the task directly using your general capabilities +3. Suggest the user could create their own skill with `npx skills init` + +Example: + +``` +I searched for skills related to "xyz" but didn't find any matches. +I can still help you with this task directly! Would you like me to proceed? + +If this is something you do often, you could create your own skill: +npx skills init my-xyz-skill +``` diff --git a/skills/infographic-slides/SKILL.md b/skills/infographic-slides/SKILL.md new file mode 100644 index 000000000..f1a8260ec --- /dev/null +++ b/skills/infographic-slides/SKILL.md @@ -0,0 +1,91 @@ +--- +name: infographic-slides +description: Generate visually striking infographic slide decks using Nano Banana Pro (Gemini image generation). Use when asked to create carousel posts, infographic sets, explainer slides, or visual content series. +--- + +# Infographic Slides + +Generate slide decks as AI-generated images using Nano Banana Pro (Gemini 3 Pro Image). + +## Dependencies + +- **nano-banana-pro** skill (Gemini image generation) +- `GEMINI_API_KEY` env var must be set + +## Workflow + +### 1. Plan the deck + +From the user's topic/brief, plan 5-15 slides with a narrative arc: + +- **Slide 1:** Cover — hook title, key stats, visual impact +- **Middle slides:** The story — what, why, how (one concept per slide) +- **Final slide:** Takeaway — summary statement, call to action, or closing quote + +Each slide should convey ONE key idea. Less text = more impact. + +### 2. Craft prompts + +Every prompt must specify: + +``` +Dark infographic poster, 4:5 vertical format. [GRADIENT] gradient background. +[CONTENT DESCRIPTION]. [VISUAL ELEMENTS]. Minimal design, clean typography, +modern tech aesthetic. +``` + +**Prompt anatomy:** +- **Format:** Always start with "Dark infographic poster, 4:5 vertical" +- **Background:** Specify gradient colors (navy-black, red-black, teal-black, etc.) +- **Title:** Bold white text, include the exact words you want +- **Body:** Describe layout (lists, cards, stats, quotes, flow diagrams) +- **Visual elements:** Icons, accent lines, glowing elements, severity badges +- **Style anchor:** End with "Minimal design, clean typography, modern tech aesthetic" + +**Color coding by slide type:** +- Red/crimson gradients → danger, warnings, problems +- Blue/navy gradients → explanations, neutral information +- Green/teal gradients → solutions, checklists, positives +- Purple/indigo gradients → technical deep-dives, unique moments +- Amber/yellow gradients → caution, nuance, "it's complicated" + +### 3. Generate images + +Use the batch script for sequential generation: + +```bash +uv run {baseDir}/scripts/batch_generate.py --prompts /path/to/prompts.json --output /tmp/slides/ --resolution 1K +``` + +Or generate individually via nano-banana-pro: + +```bash +export GEMINI_API_KEY="your-key" +uv run /mnt/work/dev/clawdbot/skills/nano-banana-pro/scripts/generate_image.py \ + --prompt "your prompt" \ + --filename /tmp/slides/slide-01.png \ + --resolution 1K +``` + +### 4. Deliver + +Output all images with MEDIA: tags for Clawdbot auto-attach: + +``` +MEDIA:/tmp/slides/slide-01.png +MEDIA:/tmp/slides/slide-02.png +... +``` + +Include a numbered legend explaining each slide. + +## Prompt tips + +- **Be specific about text:** Gemini renders text best when you quote it exactly +- **Less is more:** 1 title + 3-5 bullet points max per slide +- **Use visual hierarchy:** Tags/pills at top, big title, supporting text below +- **Stats pop:** Large numbers with labels draw the eye +- **Quotes work:** Italic quote blocks with attribution are visually strong +- **Cards/grids:** For comparing items, describe a 2-col card grid layout +- **Flow diagrams:** Describe steps with arrows and icons for processes +- **Avoid walls of text:** If a slide has more than 40 words of content, split it diff --git a/skills/infographic-slides/scripts/batch_generate.py b/skills/infographic-slides/scripts/batch_generate.py new file mode 100755 index 000000000..f28efe947 --- /dev/null +++ b/skills/infographic-slides/scripts/batch_generate.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "google-genai>=1.0.0", +# "pillow>=10.0.0", +# ] +# /// +""" +Batch-generate infographic slides using Nano Banana Pro (Gemini 3 Pro Image). + +Usage: + uv run batch_generate.py --prompts prompts.json --output /tmp/slides/ [--resolution 1K] + uv run batch_generate.py --prompt "single prompt" --output /tmp/slides/ --slide-num 1 + +prompts.json format: +[ + {"slide": 1, "prompt": "Dark infographic poster..."}, + {"slide": 2, "prompt": "Dark infographic poster..."} +] +""" + +import argparse +import json +import os +import sys +import time +from io import BytesIO +from pathlib import Path + +RESOLUTIONS = { + "1K": (1024, 1280), # 4:5 vertical + "2K": (2048, 2560), + "4K": (4096, 5120), +} + + +def get_api_key(provided_key: str | None) -> str | None: + if provided_key: + return provided_key + return os.environ.get("GEMINI_API_KEY") + + +def generate_single(client, prompt: str, output_path: Path, resolution: str) -> bool: + """Generate a single slide image. Returns True on success.""" + from google.genai import types as genai_types + + width, height = RESOLUTIONS.get(resolution, RESOLUTIONS["1K"]) + + config = genai_types.GenerateContentConfig( + response_modalities=["image", "text"], + generate_images=genai_types.ImageGenerationConfig( + number_of_images=1, + aspect_ratio="3:4", + output_image_format="png", + ), + ) + + try: + response = client.models.generate_content( + model="gemini-2.0-flash-preview-image-generation", + contents=prompt, + config=config, + ) + + from PIL import Image as PILImage + + for part in response.candidates[0].content.parts: + if part.inline_data and part.inline_data.mime_type.startswith("image/"): + image_data = part.inline_data.data + if isinstance(image_data, str): + import base64 + image_data = base64.b64decode(image_data) + + image = PILImage.open(BytesIO(image_data)) + + if image.mode == 'RGBA': + rgb = PILImage.new('RGB', image.size, (255, 255, 255)) + rgb.paste(image, mask=image.split()[3]) + rgb.save(str(output_path), 'PNG') + elif image.mode == 'RGB': + image.save(str(output_path), 'PNG') + else: + image.convert('RGB').save(str(output_path), 'PNG') + + return True + + print(f" ✗ No image in response", file=sys.stderr) + return False + + except Exception as e: + print(f" ✗ Error: {e}", file=sys.stderr) + return False + + +def main(): + parser = argparse.ArgumentParser(description="Batch-generate infographic slides") + parser.add_argument("--prompts", "-p", help="Path to prompts.json file") + parser.add_argument("--prompt", help="Single prompt (use with --slide-num)") + parser.add_argument("--slide-num", type=int, default=1, help="Slide number for single prompt") + parser.add_argument("--output", "-o", required=True, help="Output directory") + parser.add_argument("--resolution", "-r", default="1K", choices=["1K", "2K", "4K"]) + parser.add_argument("--api-key", "-k", help="Gemini API key") + parser.add_argument("--delay", type=float, default=1.0, help="Delay between requests (seconds)") + args = parser.parse_args() + + api_key = get_api_key(args.api_key) + if not api_key: + print("Error: No API key. Set GEMINI_API_KEY or use --api-key.", file=sys.stderr) + sys.exit(1) + + from google import genai + client = genai.Client(api_key=api_key) + + output_dir = Path(args.output) + output_dir.mkdir(parents=True, exist_ok=True) + + # Build slide list + slides = [] + if args.prompts: + with open(args.prompts) as f: + slides = json.load(f) + elif args.prompt: + slides = [{"slide": args.slide_num, "prompt": args.prompt}] + else: + print("Error: Provide --prompts or --prompt.", file=sys.stderr) + sys.exit(1) + + total = len(slides) + success = 0 + + print(f"Generating {total} slides at {args.resolution} resolution...\n") + + for i, entry in enumerate(slides): + num = entry.get("slide", i + 1) + prompt = entry["prompt"] + filename = f"slide-{num:02d}.png" + output_path = output_dir / filename + + print(f"[{i+1}/{total}] Generating slide {num}...") + start = time.time() + + if generate_single(client, prompt, output_path, args.resolution): + elapsed = time.time() - start + print(f" ✓ Saved: {output_path} ({elapsed:.1f}s)") + print(f"MEDIA: {output_path.resolve()}") + success += 1 + else: + print(f" ✗ Failed: slide {num}") + + if i < total - 1: + time.sleep(args.delay) + + print(f"\nDone: {success}/{total} slides generated in {output_dir}") + + +if __name__ == "__main__": + main() diff --git a/skills/recall/SKILL.md b/skills/recall/SKILL.md new file mode 100644 index 000000000..c5ed7535a --- /dev/null +++ b/skills/recall/SKILL.md @@ -0,0 +1,54 @@ +--- +name: recall +description: Query persistent memory. Use when user says "/recall X" or asks to recall/find something from memory. +user_invocable: true +arg_hint: "search term or tag" +--- + +# /recall + +Query persistent memory shared between all agents (claude-code, opencode, clawdbot). + +## syntax + +``` +/recall +``` + +## examples + +``` +/recall voice +/recall ooide architecture +/recall preferences +/recall bun +``` + +## implementation + +Run the memory query command: + +```bash +~/.agents/memory/scripts/memory.py query "" +``` + +This searches: +- full-text content (FTS5) +- tags +- returns results sorted by effective score (importance * decay * reinforcement) + +## output format + +Results show: +- effective score in brackets +- content +- tags (if any) +- pinned indicator (if pinned) +- type, who (which agent saved it), and project on second line + +## follow-up + +After showing results, offer to: +- save new related memories +- show more details about a specific result +- search with different terms diff --git a/skills/remember/SKILL.md b/skills/remember/SKILL.md new file mode 100644 index 000000000..2c476e759 --- /dev/null +++ b/skills/remember/SKILL.md @@ -0,0 +1,46 @@ +--- +name: remember +description: Save something to persistent memory. Use when user says "/remember X" or asks to remember something important. +user_invocable: true +arg_hint: "[critical:] [tags]: content to remember" +--- + +# /remember + +Save to persistent memory across sessions. Shared between all agents (claude-code, opencode, clawdbot). + +## syntax + +- `/remember ` - save with normal importance (0.8) +- `/remember critical: ` - high importance, pinned (1.0, never decays) +- `/remember [tag1,tag2]: ` - with explicit tags + +## examples + +``` +/remember nicholai prefers tabs over spaces +/remember critical: never push directly to main +/remember [voice,tts]: qwen model needs 12GB VRAM minimum +``` + +## implementation + +To save a memory, use the --content flag: + +```bash +~/.agents/memory/scripts/memory.py save --mode explicit --who --project "$(pwd)" --content "" +``` + +where `` is one of: claude-code, opencode, clawdbot + +The script automatically: +- detects `critical:` prefix for pinned memories +- parses `[tags]:` prefix for tagged memories +- infers type from keywords (prefer -> preference, decided -> decision, etc.) + +## confirmation + +After saving, confirm to the user with: +- the content saved (truncated if long) +- any detected tags or type +- whether it was marked as critical/pinned diff --git a/state/CURRENT.md b/state/CURRENT.md deleted file mode 100644 index 48d860a03..000000000 --- a/state/CURRENT.md +++ /dev/null @@ -1,3 +0,0 @@ -discord feed bots deployed (reddit, github, twitter, claude releases, weekly trends) -ooIDE auth branch in progress -ssh to 10.0.0.128 pending key setup