From fd4cad363b3c0441660c5c31d08ce7bfe6ddd152 Mon Sep 17 00:00:00 2001 From: Nicholai Date: Wed, 21 Jan 2026 05:35:27 -0700 Subject: [PATCH] initial commit of CLI --- .env.example | 3 + .gitignore | 39 ++++ CLAUDE.md | 74 +++++++ README.md | 15 ++ brief.md | 346 ++++++++++++++++++++++++++++++++ bun.lock | 223 ++++++++++++++++++++ package.json | 21 ++ src/cli.ts | 238 ++++++++++++++++++++++ src/embeddings/ollama.ts | 175 ++++++++++++++++ src/index.ts | 59 ++++++ src/scraper/comments.ts | 120 +++++++++++ src/scraper/reddit.ts | 120 +++++++++++ src/scraper/types.ts | 87 ++++++++ src/storage/qdrant.ts | 183 +++++++++++++++++ src/storage/types.ts | 24 +++ src/tui/app.ts | 268 +++++++++++++++++++++++++ src/tui/components/export.ts | 92 +++++++++ src/tui/components/progress.ts | 77 +++++++ src/tui/components/search.ts | 94 +++++++++ src/tui/components/stats.ts | 51 +++++ src/tui/components/trending.ts | 49 +++++ src/tui/components/url-input.ts | 115 +++++++++++ src/utils/rate-limit.ts | 66 ++++++ src/utils/text.ts | 69 +++++++ tsconfig.json | 29 +++ 25 files changed, 2637 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 brief.md create mode 100644 bun.lock create mode 100644 package.json create mode 100644 src/cli.ts create mode 100644 src/embeddings/ollama.ts create mode 100644 src/index.ts create mode 100644 src/scraper/comments.ts create mode 100644 src/scraper/reddit.ts create mode 100644 src/scraper/types.ts create mode 100644 src/storage/qdrant.ts create mode 100644 src/storage/types.ts create mode 100644 src/tui/app.ts create mode 100644 src/tui/components/export.ts create mode 100644 src/tui/components/progress.ts create mode 100644 src/tui/components/search.ts create mode 100644 src/tui/components/stats.ts create mode 100644 src/tui/components/trending.ts create mode 100644 src/tui/components/url-input.ts create mode 100644 src/utils/rate-limit.ts create mode 100644 src/utils/text.ts create mode 100644 tsconfig.json diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e62a7ab --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +QDRANT_URL=https://vectors.biohazardvfx.com +QDRANT_API_KEY=your-api-key-here +OLLAMA_HOST=http://localhost:11434 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..76ba5de --- /dev/null +++ b/.gitignore @@ -0,0 +1,39 @@ +# dependencies (bun install) +node_modules + +# output +out +dist +*.tgz + +# code coverage +coverage +*.lcov + +# logs +logs +_.log +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# caches +.eslintcache +.cache +*.tsbuildinfo + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store + +# exported data +reddit-trends.json +reddit-trends.csv +.env diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6544f16 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,74 @@ +reddit trend analyzer +=== + +a CLI tool that scrapes reddit discussions, embeds them with ollama, stores in qdrant, and provides a TUI dashboard for discovering common problems/trends. + +running +--- + +```bash +bun start # run the app +bun dev # run with watch mode +``` + +prerequisites +--- + +- ollama running locally with nomic-embed-text model (`ollama pull nomic-embed-text`) +- qdrant accessible at QDRANT_URL (or localhost:6333) + +env vars +--- + +``` +QDRANT_URL=https://vectors.biohazardvfx.com +QDRANT_API_KEY= +OLLAMA_HOST=http://localhost:11434 # optional, defaults to this +``` + +architecture +--- + +``` +src/ + index.ts # entry point, connection checks, TUI setup + scraper/ + reddit.ts # fetch subreddit posts with pagination + comments.ts # fetch comments for each post + types.ts # reddit json response types + embeddings/ + ollama.ts # batch embed text with nomic-embed-text (768 dims) + storage/ + qdrant.ts # create collection, upsert, search + types.ts # point payload schema + tui/ + app.ts # main dashboard, wires everything together + components/ + url-input.ts # subreddit url input + progress.ts # scraping/embedding progress bars + stats.ts # collection stats panel + trending.ts # trending topics view + search.ts # semantic search interface + export.ts # export to json/csv + utils/ + rate-limit.ts # delay helper for reddit api + text.ts # text preprocessing for embedding +``` + +keybindings +--- + +- `q` or `ctrl+c` - quit +- `enter` - start scrape (when url is entered) +- `tab` - switch between url and search inputs +- `e` - export results to json +- `c` - export results to csv +- `r` - refresh stats from qdrant + +coding notes +--- + +- uses @opentui/core standalone (no react/solid) +- reddit rate limiting: 3s delay between requests +- embeddings batched in groups of 10 +- qdrant collection: reddit_trends with indexes on subreddit, type, created, score diff --git a/README.md b/README.md new file mode 100644 index 0000000..2e15f0e --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# reddit-trend-analyzer + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run index.ts +``` + +This project was created using `bun init` in bun v1.3.5. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime. diff --git a/brief.md b/brief.md new file mode 100644 index 0000000..63ccf6e --- /dev/null +++ b/brief.md @@ -0,0 +1,346 @@ +reddit trend analyzer +=== + +a tool for discovering common problems and questions in reddit communities to inform content strategy and tool development. + +core goal +--- + +find what people struggle with most -> create content/tools that solve those problems -> organic SEO growth + +tech stack +--- + +- vector database: qdrant +- embeddings: nomic-embed-text (ollama) +- framework: next.js +- components: shadcn +- charts: recharts (simple, shadcn-compatible) +- theme: shadcn tokens from globals.css inline theme ONLY + +data pipeline +--- + +``` +reddit scrape -> text cleaning -> embedding -> qdrant storage + | + clustering (HDBSCAN) + | + problem extraction (LLM) + | + frequency + engagement scoring +``` + +core features +--- + +**1. data ingestion** + +existing CLI handles this well: +- scrape subreddit posts + comments +- embed with nomic-embed-text +- store in qdrant with metadata (score, created, subreddit, type) + +**2. problem clustering** + +the key feature. group similar discussions to surface recurring themes. + +- cluster embeddings using HDBSCAN (density-based, handles noise well) +- extract cluster centroids as topic anchors +- LLM pass to generate human-readable problem statements from each cluster +- rank clusters by: + - size (discussion count) + - total engagement (sum of upvotes) + - recency (still being talked about?) + +output example: +``` +| problem | discussions | upvotes | last seen | +|----------------------------------------------|-------------|---------|-----------| +| users struggle with X when doing Y | 47 | 2.3k | 2d ago | +| confusion about how to configure Z | 31 | 890 | 1w ago | +| no good free alternative to [competitor] | 28 | 1.1k | 3d ago | +``` + +**3. question extraction** + +pull out actual questions people ask. + +- pattern matching: "how do I", "why does", "is there a way to", "what's the best", etc. +- deduplicate semantically similar questions (vector similarity > 0.9) +- rank by engagement +- group under parent problem clusters + +output: FAQ-ready list for blog posts, docs, or schema markup + +**4. search + explore** + +- semantic search across all scraped content +- filter by: subreddit, date range, min upvotes, type (post/comment) +- click through to original reddit discussions + +**5. export** + +- problem clusters as markdown content briefs +- questions as FAQ schema (json-ld ready) +- csv for spreadsheet analysis +- raw json for custom processing + +dashboard views +--- + +**home / stats** + +simple overview: +- total posts/comments in db +- subreddits being tracked +- problem clusters identified +- recent scrape activity + +**problem explorer** (main view) + +sortable/filterable table of problem clusters: +- columns: problem summary, discussion count, total upvotes, avg sentiment, last active +- expand row -> sample discussions + extracted questions +- select multiple -> bulk export as content briefs +- search within problems + +**question bank** + +all extracted questions: +- grouped by parent problem cluster (collapsible) +- search/filter +- copy as json-ld FAQ schema +- mark as "addressed" when content exists + +**scrape manager** + +- list of tracked subreddits +- manual scrape trigger +- scrape history with stats +- add/remove subreddits + + +To give the user "Ultimate Control," the dashboard should include: + +1. **Similarity Sensitivity Slider:** A global control that adjusts how strict the vector database is. Lower similarity = more broad, creative connections. Higher similarity = more specific, literal results. +2. **The "Impact Score" Weighting:** Allow users to toggle what "Importance" means to them. Is it **Upvote Count**? **Sentiment Extremity**? Or **Topic Velocity**? Adjusting these weights should re-order the "Competitor Hijack" table in real-time. +3. **Command Palette:** Instead of clicking through menus, a "Ctrl + K" command bar allows the user to type "Find gaps in comparison intent" to instantly update the visualizations. + +implementation phases +--- + +**phase 1: clustering + extraction (backend)** + +- [ ] add HDBSCAN clustering to pipeline +- [ ] LLM integration for problem summarization (claude or local) +- [ ] question extraction with pattern matching + dedup +- [ ] store clusters in qdrant (or sqlite sidecar) +- [ ] CLI commands: `cluster`, `problems`, `questions` + +**phase 2: web UI** + +- [ ] next.js app with shadcn +- [ ] problem explorer table (tanstack table) +- [ ] question bank view +- [ ] semantic search +- [ ] export functionality +- [ ] basic stats dashboard + +**phase 3: polish** + +- [ ] scheduled/recurring scrapes +- [ ] better semantic deduplication +- [ ] sentiment scoring (optional) +- [ ] "addressed" tracking (link to published content) + +env vars +--- + +``` +QDRANT_URL=https://vectors.biohazardvfx.com +QDRANT_API_KEY= +OLLAMA_HOST=http://localhost:11434 +ANTHROPIC_API_KEY= # for problem summarization +``` + +success criteria +--- + +tool is working if: +- we can identify 10+ distinct problems from a subreddit scrape +- problem summaries are actionable (could write a blog post about it) +- question extraction gives us real FAQs people are asking +- export format is immediately usable for content planning + +everything else is nice-to-have. + +--- + +theme (globals.css) +--- + +```css +:root { + --background: oklch(0.9551 0 0); + --foreground: oklch(0.3211 0 0); + --card: oklch(0.9702 0 0); + --card-foreground: oklch(0.3211 0 0); + --popover: oklch(0.9702 0 0); + --popover-foreground: oklch(0.3211 0 0); + --primary: oklch(0.4891 0 0); + --primary-foreground: oklch(1.0000 0 0); + --secondary: oklch(0.9067 0 0); + --secondary-foreground: oklch(0.3211 0 0); + --muted: oklch(0.8853 0 0); + --muted-foreground: oklch(0.5103 0 0); + --accent: oklch(0.8078 0 0); + --accent-foreground: oklch(0.3211 0 0); + --destructive: oklch(0.5594 0.1900 25.8625); + --destructive-foreground: oklch(1.0000 0 0); + --border: oklch(0.8576 0 0); + --input: oklch(0.9067 0 0); + --ring: oklch(0.4891 0 0); + --chart-1: oklch(0.4891 0 0); + --chart-2: oklch(0.4863 0.0361 196.0278); + --chart-3: oklch(0.6534 0 0); + --chart-4: oklch(0.7316 0 0); + --chart-5: oklch(0.8078 0 0); + --sidebar: oklch(0.9370 0 0); + --sidebar-foreground: oklch(0.3211 0 0); + --sidebar-primary: oklch(0.4891 0 0); + --sidebar-primary-foreground: oklch(1.0000 0 0); + --sidebar-accent: oklch(0.8078 0 0); + --sidebar-accent-foreground: oklch(0.3211 0 0); + --sidebar-border: oklch(0.8576 0 0); + --sidebar-ring: oklch(0.4891 0 0); + --font-sans: Montserrat, sans-serif; + --font-serif: Georgia, serif; + --font-mono: Fira Code, monospace; + --radius: 0.35rem; + --shadow-x: 0px; + --shadow-y: 2px; + --shadow-blur: 0px; + --shadow-spread: 0px; + --shadow-opacity: 0.15; + --shadow-color: hsl(0 0% 20% / 0.1); + --shadow-2xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07); + --shadow-xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07); + --shadow-sm: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15); + --shadow: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15); + --shadow-md: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 2px 4px -1px hsl(0 0% 20% / 0.15); + --shadow-lg: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 4px 6px -1px hsl(0 0% 20% / 0.15); + --shadow-xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 8px 10px -1px hsl(0 0% 20% / 0.15); + --shadow-2xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.38); + --tracking-normal: 0em; + --spacing: 0.25rem; +} + +.dark { + --background: oklch(0.2178 0 0); + --foreground: oklch(0.8853 0 0); + --card: oklch(0.2435 0 0); + --card-foreground: oklch(0.8853 0 0); + --popover: oklch(0.2435 0 0); + --popover-foreground: oklch(0.8853 0 0); + --primary: oklch(0.7058 0 0); + --primary-foreground: oklch(0.2178 0 0); + --secondary: oklch(0.3092 0 0); + --secondary-foreground: oklch(0.8853 0 0); + --muted: oklch(0.2850 0 0); + --muted-foreground: oklch(0.5999 0 0); + --accent: oklch(0.3715 0 0); + --accent-foreground: oklch(0.8853 0 0); + --destructive: oklch(0.6591 0.1530 22.1703); + --destructive-foreground: oklch(1.0000 0 0); + --border: oklch(0.3290 0 0); + --input: oklch(0.3092 0 0); + --ring: oklch(0.7058 0 0); + --chart-1: oklch(0.7058 0 0); + --chart-2: oklch(0.6714 0.0339 206.3482); + --chart-3: oklch(0.5452 0 0); + --chart-4: oklch(0.4604 0 0); + --chart-5: oklch(0.3715 0 0); + --sidebar: oklch(0.2393 0 0); + --sidebar-foreground: oklch(0.8853 0 0); + --sidebar-primary: oklch(0.7058 0 0); + --sidebar-primary-foreground: oklch(0.2178 0 0); + --sidebar-accent: oklch(0.3715 0 0); + --sidebar-accent-foreground: oklch(0.8853 0 0); + --sidebar-border: oklch(0.3290 0 0); + --sidebar-ring: oklch(0.7058 0 0); + --font-sans: Inter, sans-serif; + --font-serif: Georgia, serif; + --font-mono: Fira Code, monospace; + --radius: 0.35rem; + --shadow-x: 0px; + --shadow-y: 2px; + --shadow-blur: 0px; + --shadow-spread: 0px; + --shadow-opacity: 0.15; + --shadow-color: hsl(0 0% 20% / 0.1); + --shadow-2xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07); + --shadow-xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07); + --shadow-sm: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15); + --shadow: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15); + --shadow-md: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 2px 4px -1px hsl(0 0% 20% / 0.15); + --shadow-lg: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 4px 6px -1px hsl(0 0% 20% / 0.15); + --shadow-xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 8px 10px -1px hsl(0 0% 20% / 0.15); + --shadow-2xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.38); +} + +@theme inline { + --color-background: var(--background); + --color-foreground: var(--foreground); + --color-card: var(--card); + --color-card-foreground: var(--card-foreground); + --color-popover: var(--popover); + --color-popover-foreground: var(--popover-foreground); + --color-primary: var(--primary); + --color-primary-foreground: var(--primary-foreground); + --color-secondary: var(--secondary); + --color-secondary-foreground: var(--secondary-foreground); + --color-muted: var(--muted); + --color-muted-foreground: var(--muted-foreground); + --color-accent: var(--accent); + --color-accent-foreground: var(--accent-foreground); + --color-destructive: var(--destructive); + --color-destructive-foreground: var(--destructive-foreground); + --color-border: var(--border); + --color-input: var(--input); + --color-ring: var(--ring); + --color-chart-1: var(--chart-1); + --color-chart-2: var(--chart-2); + --color-chart-3: var(--chart-3); + --color-chart-4: var(--chart-4); + --color-chart-5: var(--chart-5); + --color-sidebar: var(--sidebar); + --color-sidebar-foreground: var(--sidebar-foreground); + --color-sidebar-primary: var(--sidebar-primary); + --color-sidebar-primary-foreground: var(--sidebar-primary-foreground); + --color-sidebar-accent: var(--sidebar-accent); + --color-sidebar-accent-foreground: var(--sidebar-accent-foreground); + --color-sidebar-border: var(--sidebar-border); + --color-sidebar-ring: var(--sidebar-ring); + + --font-sans: var(--font-sans); + --font-mono: var(--font-mono); + --font-serif: var(--font-serif); + + --radius-sm: calc(var(--radius) - 4px); + --radius-md: calc(var(--radius) - 2px); + --radius-lg: var(--radius); + --radius-xl: calc(var(--radius) + 4px); + + --shadow-2xs: var(--shadow-2xs); + --shadow-xs: var(--shadow-xs); + --shadow-sm: var(--shadow-sm); + --shadow: var(--shadow); + --shadow-md: var(--shadow-md); + --shadow-lg: var(--shadow-lg); + --shadow-xl: var(--shadow-xl); + --shadow-2xl: var(--shadow-2xl); +} +``` + + diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..faedcaf --- /dev/null +++ b/bun.lock @@ -0,0 +1,223 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "reddit-trend-analyzer", + "dependencies": { + "@opentui/core": "^0.1.74", + "@qdrant/js-client-rest": "^1.16.2", + "ollama": "^0.6.3", + }, + "devDependencies": { + "@types/bun": "latest", + }, + "peerDependencies": { + "typescript": "^5", + }, + }, + }, + "packages": { + "@dimforge/rapier2d-simd-compat": ["@dimforge/rapier2d-simd-compat@0.17.3", "", {}, "sha512-bijvwWz6NHsNj5e5i1vtd3dU2pDhthSaTUZSh14DUGGKJfw8eMnlWZsxwHBxB/a3AXVNDjL9abuHw1k9FGR+jg=="], + + "@jimp/core": ["@jimp/core@1.6.0", "", { "dependencies": { "@jimp/file-ops": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "await-to-js": "^3.0.0", "exif-parser": "^0.1.12", "file-type": "^16.0.0", "mime": "3" } }, "sha512-EQQlKU3s9QfdJqiSrZWNTxBs3rKXgO2W+GxNXDtwchF3a4IqxDheFX1ti+Env9hdJXDiYLp2jTRjlxhPthsk8w=="], + + "@jimp/diff": ["@jimp/diff@1.6.0", "", { "dependencies": { "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "pixelmatch": "^5.3.0" } }, "sha512-+yUAQ5gvRC5D1WHYxjBHZI7JBRusGGSLf8AmPRPCenTzh4PA+wZ1xv2+cYqQwTfQHU5tXYOhA0xDytfHUf1Zyw=="], + + "@jimp/file-ops": ["@jimp/file-ops@1.6.0", "", {}, "sha512-Dx/bVDmgnRe1AlniRpCKrGRm5YvGmUwbDzt+MAkgmLGf+jvBT75hmMEZ003n9HQI/aPnm/YKnXjg/hOpzNCpHQ=="], + + "@jimp/js-bmp": ["@jimp/js-bmp@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "bmp-ts": "^1.0.9" } }, "sha512-FU6Q5PC/e3yzLyBDXupR3SnL3htU7S3KEs4e6rjDP6gNEOXRFsWs6YD3hXuXd50jd8ummy+q2WSwuGkr8wi+Gw=="], + + "@jimp/js-gif": ["@jimp/js-gif@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "gifwrap": "^0.10.1", "omggif": "^1.0.10" } }, "sha512-N9CZPHOrJTsAUoWkWZstLPpwT5AwJ0wge+47+ix3++SdSL/H2QzyMqxbcDYNFe4MoI5MIhATfb0/dl/wmX221g=="], + + "@jimp/js-jpeg": ["@jimp/js-jpeg@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "jpeg-js": "^0.4.4" } }, "sha512-6vgFDqeusblf5Pok6B2DUiMXplH8RhIKAryj1yn+007SIAQ0khM1Uptxmpku/0MfbClx2r7pnJv9gWpAEJdMVA=="], + + "@jimp/js-png": ["@jimp/js-png@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "pngjs": "^7.0.0" } }, "sha512-AbQHScy3hDDgMRNfG0tPjL88AV6qKAILGReIa3ATpW5QFjBKpisvUaOqhzJ7Reic1oawx3Riyv152gaPfqsBVg=="], + + "@jimp/js-tiff": ["@jimp/js-tiff@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "utif2": "^4.1.0" } }, "sha512-zhReR8/7KO+adijj3h0ZQUOiun3mXUv79zYEAKvE0O+rP7EhgtKvWJOZfRzdZSNv0Pu1rKtgM72qgtwe2tFvyw=="], + + "@jimp/plugin-blit": ["@jimp/plugin-blit@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-M+uRWl1csi7qilnSK8uxK4RJMSuVeBiO1AY0+7APnfUbQNZm6hCe0CCFv1Iyw1D/Dhb8ph8fQgm5mwM0eSxgVA=="], + + "@jimp/plugin-blur": ["@jimp/plugin-blur@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/utils": "1.6.0" } }, "sha512-zrM7iic1OTwUCb0g/rN5y+UnmdEsT3IfuCXCJJNs8SZzP0MkZ1eTvuwK9ZidCuMo4+J3xkzCidRwYXB5CyGZTw=="], + + "@jimp/plugin-circle": ["@jimp/plugin-circle@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-xt1Gp+LtdMKAXfDp3HNaG30SPZW6AQ7dtAtTnoRKorRi+5yCJjKqXRgkewS5bvj8DEh87Ko1ydJfzqS3P2tdWw=="], + + "@jimp/plugin-color": ["@jimp/plugin-color@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "tinycolor2": "^1.6.0", "zod": "^3.23.8" } }, "sha512-J5q8IVCpkBsxIXM+45XOXTrsyfblyMZg3a9eAo0P7VPH4+CrvyNQwaYatbAIamSIN1YzxmO3DkIZXzRjFSz1SA=="], + + "@jimp/plugin-contain": ["@jimp/plugin-contain@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-blit": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-oN/n+Vdq/Qg9bB4yOBOxtY9IPAtEfES8J1n9Ddx+XhGBYT1/QTU/JYkGaAkIGoPnyYvmLEDqMz2SGihqlpqfzQ=="], + + "@jimp/plugin-cover": ["@jimp/plugin-cover@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-crop": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-Iow0h6yqSC269YUJ8HC3Q/MpCi2V55sMlbkkTTx4zPvd8mWZlC0ykrNDeAy9IJegrQ7v5E99rJwmQu25lygKLA=="], + + "@jimp/plugin-crop": ["@jimp/plugin-crop@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-KqZkEhvs+21USdySCUDI+GFa393eDIzbi1smBqkUPTE+pRwSWMAf01D5OC3ZWB+xZsNla93BDS9iCkLHA8wang=="], + + "@jimp/plugin-displace": ["@jimp/plugin-displace@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-4Y10X9qwr5F+Bo5ME356XSACEF55485j5nGdiyJ9hYzjQP9nGgxNJaZ4SAOqpd+k5sFaIeD7SQ0Occ26uIng5Q=="], + + "@jimp/plugin-dither": ["@jimp/plugin-dither@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0" } }, "sha512-600d1RxY0pKwgyU0tgMahLNKsqEcxGdbgXadCiVCoGd6V6glyCvkNrnnwC0n5aJ56Htkj88PToSdF88tNVZEEQ=="], + + "@jimp/plugin-fisheye": ["@jimp/plugin-fisheye@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-E5QHKWSCBFtpgZarlmN3Q6+rTQxjirFqo44ohoTjzYVrDI6B6beXNnPIThJgPr0Y9GwfzgyarKvQuQuqCnnfbA=="], + + "@jimp/plugin-flip": ["@jimp/plugin-flip@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-/+rJVDuBIVOgwoyVkBjUFHtP+wmW0r+r5OQ2GpatQofToPVbJw1DdYWXlwviSx7hvixTWLKVgRWQ5Dw862emDg=="], + + "@jimp/plugin-hash": ["@jimp/plugin-hash@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/js-bmp": "1.6.0", "@jimp/js-jpeg": "1.6.0", "@jimp/js-png": "1.6.0", "@jimp/js-tiff": "1.6.0", "@jimp/plugin-color": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "any-base": "^1.1.0" } }, "sha512-wWzl0kTpDJgYVbZdajTf+4NBSKvmI3bRI8q6EH9CVeIHps9VWVsUvEyb7rpbcwVLWYuzDtP2R0lTT6WeBNQH9Q=="], + + "@jimp/plugin-mask": ["@jimp/plugin-mask@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-Cwy7ExSJMZszvkad8NV8o/Z92X2kFUFM8mcDAhNVxU0Q6tA0op2UKRJY51eoK8r6eds/qak3FQkXakvNabdLnA=="], + + "@jimp/plugin-print": ["@jimp/plugin-print@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/js-jpeg": "1.6.0", "@jimp/js-png": "1.6.0", "@jimp/plugin-blit": "1.6.0", "@jimp/types": "1.6.0", "parse-bmfont-ascii": "^1.0.6", "parse-bmfont-binary": "^1.0.6", "parse-bmfont-xml": "^1.1.6", "simple-xml-to-json": "^1.2.2", "zod": "^3.23.8" } }, "sha512-zarTIJi8fjoGMSI/M3Xh5yY9T65p03XJmPsuNet19K/Q7mwRU6EV2pfj+28++2PV2NJ+htDF5uecAlnGyxFN2A=="], + + "@jimp/plugin-quantize": ["@jimp/plugin-quantize@1.6.0", "", { "dependencies": { "image-q": "^4.0.0", "zod": "^3.23.8" } }, "sha512-EmzZ/s9StYQwbpG6rUGBCisc3f64JIhSH+ncTJd+iFGtGo0YvSeMdAd+zqgiHpfZoOL54dNavZNjF4otK+mvlg=="], + + "@jimp/plugin-resize": ["@jimp/plugin-resize@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-uSUD1mqXN9i1SGSz5ov3keRZ7S9L32/mAQG08wUwZiEi5FpbV0K8A8l1zkazAIZi9IJzLlTauRNU41Mi8IF9fA=="], + + "@jimp/plugin-rotate": ["@jimp/plugin-rotate@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-crop": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-JagdjBLnUZGSG4xjCLkIpQOZZ3Mjbg8aGCCi4G69qR+OjNpOeGI7N2EQlfK/WE8BEHOW5vdjSyglNqcYbQBWRw=="], + + "@jimp/plugin-threshold": ["@jimp/plugin-threshold@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-color": "1.6.0", "@jimp/plugin-hash": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-M59m5dzLoHOVWdM41O8z9SyySzcDn43xHseOH0HavjsfQsT56GGCC4QzU1banJidbUrePhzoEdS42uFE8Fei8w=="], + + "@jimp/types": ["@jimp/types@1.6.0", "", { "dependencies": { "zod": "^3.23.8" } }, "sha512-7UfRsiKo5GZTAATxm2qQ7jqmUXP0DxTArztllTcYdyw6Xi5oT4RaoXynVtCD4UyLK5gJgkZJcwonoijrhYFKfg=="], + + "@jimp/utils": ["@jimp/utils@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "tinycolor2": "^1.6.0" } }, "sha512-gqFTGEosKbOkYF/WFj26jMHOI5OH2jeP1MmC/zbK6BF6VJBf8rIC5898dPfSzZEbSA0wbbV5slbntWVc5PKLFA=="], + + "@opentui/core": ["@opentui/core@0.1.74", "", { "dependencies": { "bun-ffi-structs": "0.1.2", "diff": "8.0.2", "jimp": "1.6.0", "yoga-layout": "3.2.1" }, "optionalDependencies": { "@dimforge/rapier2d-simd-compat": "^0.17.3", "@opentui/core-darwin-arm64": "0.1.74", "@opentui/core-darwin-x64": "0.1.74", "@opentui/core-linux-arm64": "0.1.74", "@opentui/core-linux-x64": "0.1.74", "@opentui/core-win32-arm64": "0.1.74", "@opentui/core-win32-x64": "0.1.74", "bun-webgpu": "0.1.4", "planck": "^1.4.2", "three": "0.177.0" }, "peerDependencies": { "web-tree-sitter": "0.25.10" } }, "sha512-g4W16ymv12JdgZ+9B4t7mpIICvzWy2+eHERfmDf80ALduOQCUedKQdULcBFhVCYUXIkDRtIy6CID5thMAah3FA=="], + + "@opentui/core-darwin-arm64": ["@opentui/core-darwin-arm64@0.1.74", "", { "os": "darwin", "cpu": "arm64" }, "sha512-rfmlDLtm/u17CnuhJgCxPeYMvOST+A2MOdVOk46IurtHO849bdYqK6iudKNlFRs1FOrymgSKF9GlWBHAOKeRjg=="], + + "@opentui/core-darwin-x64": ["@opentui/core-darwin-x64@0.1.74", "", { "os": "darwin", "cpu": "x64" }, "sha512-WAD8orsDV0ZdW/5GwjOOB4FY96772xbkz+rcV7WRzEFUVaqoBaC04IuqYzS9d5s+cjkbT5Cpj47hrVYkkVQKng=="], + + "@opentui/core-linux-arm64": ["@opentui/core-linux-arm64@0.1.74", "", { "os": "linux", "cpu": "arm64" }, "sha512-lgmHzrzLy4e+rgBS+lhtsMLLgIMLbtLNMm6EzVPyYVDlLDGjM7+ulXMem7AtpaRrWrUUl4REiG9BoQUsCFDwYA=="], + + "@opentui/core-linux-x64": ["@opentui/core-linux-x64@0.1.74", "", { "os": "linux", "cpu": "x64" }, "sha512-8Mn2WbdBQ29xCThuPZezjDhd1N3+fXwKkGvCBOdTI0le6h2A/vCNbfUVjwfr/EGZSRXxCG+Yapol34BAULGpOA=="], + + "@opentui/core-win32-arm64": ["@opentui/core-win32-arm64@0.1.74", "", { "os": "win32", "cpu": "arm64" }, "sha512-dvYUXz03avnI6ZluyLp00HPmR0UT/IE/6QS97XBsgJlUTtpnbKkBtB5jD1NHwWkElaRj1Qv2QP36ngFoJqbl9g=="], + + "@opentui/core-win32-x64": ["@opentui/core-win32-x64@0.1.74", "", { "os": "win32", "cpu": "x64" }, "sha512-3wfWXaAKOIlDQz6ZZIESf2M+YGZ7uFHijjTEM8w/STRlLw8Y6+QyGYi1myHSM4d6RSO+/s2EMDxvjDf899W9vQ=="], + + "@qdrant/js-client-rest": ["@qdrant/js-client-rest@1.16.2", "", { "dependencies": { "@qdrant/openapi-typescript-fetch": "1.2.6", "undici": "^6.0.0" }, "peerDependencies": { "typescript": ">=4.7" } }, "sha512-Zm4wEZURrZ24a+Hmm4l1QQYjiz975Ep3vF0yzWR7ICGcxittNz47YK2iBOk8kb8qseCu8pg7WmO1HOIsO8alvw=="], + + "@qdrant/openapi-typescript-fetch": ["@qdrant/openapi-typescript-fetch@1.2.6", "", {}, "sha512-oQG/FejNpItrxRHoyctYvT3rwGZOnK4jr3JdppO/c78ktDvkWiPXPHNsrDf33K9sZdRb6PR7gi4noIapu5q4HA=="], + + "@tokenizer/token": ["@tokenizer/token@0.3.0", "", {}, "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="], + + "@types/bun": ["@types/bun@1.3.6", "", { "dependencies": { "bun-types": "1.3.6" } }, "sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA=="], + + "@types/node": ["@types/node@25.0.9", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-/rpCXHlCWeqClNBwUhDcusJxXYDjZTyE8v5oTO7WbL8eij2nKhUeU89/6xgjU7N4/Vh3He0BtyhJdQbDyhiXAw=="], + + "@webgpu/types": ["@webgpu/types@0.1.69", "", {}, "sha512-RPmm6kgRbI8e98zSD3RVACvnuktIja5+yLgDAkTmxLr90BEwdTXRQWNLF3ETTTyH/8mKhznZuN5AveXYFEsMGQ=="], + + "abort-controller": ["abort-controller@3.0.0", "", { "dependencies": { "event-target-shim": "^5.0.0" } }, "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg=="], + + "any-base": ["any-base@1.1.0", "", {}, "sha512-uMgjozySS8adZZYePpaWs8cxB9/kdzmpX6SgJZ+wbz1K5eYk5QMYDVJaZKhxyIHUdnnJkfR7SVgStgH7LkGUyg=="], + + "await-to-js": ["await-to-js@3.0.0", "", {}, "sha512-zJAaP9zxTcvTHRlejau3ZOY4V7SRpiByf3/dxx2uyKxxor19tpmpV2QRsTKikckwhaPmr2dVpxxMr7jOCYVp5g=="], + + "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="], + + "bmp-ts": ["bmp-ts@1.0.9", "", {}, "sha512-cTEHk2jLrPyi+12M3dhpEbnnPOsaZuq7C45ylbbQIiWgDFZq4UVYPEY5mlqjvsj/6gJv9qX5sa+ebDzLXT28Vw=="], + + "buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="], + + "bun-ffi-structs": ["bun-ffi-structs@0.1.2", "", { "peerDependencies": { "typescript": "^5" } }, "sha512-Lh1oQAYHDcnesJauieA4UNkWGXY9hYck7OA5IaRwE3Bp6K2F2pJSNYqq+hIy7P3uOvo3km3oxS8304g5gDMl/w=="], + + "bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="], + + "bun-webgpu": ["bun-webgpu@0.1.4", "", { "dependencies": { "@webgpu/types": "^0.1.60" }, "optionalDependencies": { "bun-webgpu-darwin-arm64": "^0.1.4", "bun-webgpu-darwin-x64": "^0.1.4", "bun-webgpu-linux-x64": "^0.1.4", "bun-webgpu-win32-x64": "^0.1.4" } }, "sha512-Kw+HoXl1PMWJTh9wvh63SSRofTA8vYBFCw0XEP1V1fFdQEDhI8Sgf73sdndE/oDpN/7CMx0Yv/q8FCvO39ROMQ=="], + + "bun-webgpu-darwin-arm64": ["bun-webgpu-darwin-arm64@0.1.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-eDgLN9teKTfmvrCqgwwmWNsNszxYs7IZdCqk0S1DCarvMhr4wcajoSBlA/nQA0/owwLduPTS8xxCnQp4/N/gDg=="], + + "bun-webgpu-darwin-x64": ["bun-webgpu-darwin-x64@0.1.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-X+PjwJUWenUmdQBP8EtdItMyieQ6Nlpn+BH518oaouDiSnWj5+b0Y7DNDZJq7Ezom4EaxmqL/uGYZK3aCQ7CXg=="], + + "bun-webgpu-linux-x64": ["bun-webgpu-linux-x64@0.1.4", "", { "os": "linux", "cpu": "x64" }, "sha512-zMLs2YIGB+/jxrYFXaFhVKX/GBt05UTF45lc9srcHc9JXGjEj+12CIo1CHLTAWatXMTqt0Jsu6ukWEoWVT/ayA=="], + + "bun-webgpu-win32-x64": ["bun-webgpu-win32-x64@0.1.4", "", { "os": "win32", "cpu": "x64" }, "sha512-Z5yAK28xrcm8Wb5k7TZ8FJKpOI/r+aVCRdlHYAqI2SDJFN3nD4mJs900X6kNVmG/xFzb5yOuKVYWGg+6ZXWbyA=="], + + "diff": ["diff@8.0.2", "", {}, "sha512-sSuxWU5j5SR9QQji/o2qMvqRNYRDOcBTgsJ/DeCf4iSN4gW+gNMXM7wFIP+fdXZxoNiAnHUTGjCr+TSWXdRDKg=="], + + "event-target-shim": ["event-target-shim@5.0.1", "", {}, "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ=="], + + "events": ["events@3.3.0", "", {}, "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q=="], + + "exif-parser": ["exif-parser@0.1.12", "", {}, "sha512-c2bQfLNbMzLPmzQuOr8fy0csy84WmwnER81W88DzTp9CYNPJ6yzOj2EZAh9pywYpqHnshVLHQJ8WzldAyfY+Iw=="], + + "file-type": ["file-type@16.5.4", "", { "dependencies": { "readable-web-to-node-stream": "^3.0.0", "strtok3": "^6.2.4", "token-types": "^4.1.1" } }, "sha512-/yFHK0aGjFEgDJjEKP0pWCplsPFPhwyfwevf/pVxiN0tmE4L9LmwWxWukdJSHdoCli4VgQLehjJtwQBnqmsKcw=="], + + "gifwrap": ["gifwrap@0.10.1", "", { "dependencies": { "image-q": "^4.0.0", "omggif": "^1.0.10" } }, "sha512-2760b1vpJHNmLzZ/ubTtNnEx5WApN/PYWJvXvgS+tL1egTTthayFYIQQNi136FLEDcN/IyEY2EcGpIITD6eYUw=="], + + "ieee754": ["ieee754@1.2.1", "", {}, "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA=="], + + "image-q": ["image-q@4.0.0", "", { "dependencies": { "@types/node": "16.9.1" } }, "sha512-PfJGVgIfKQJuq3s0tTDOKtztksibuUEbJQIYT3by6wctQo+Rdlh7ef4evJ5NCdxY4CfMbvFkocEwbl4BF8RlJw=="], + + "jimp": ["jimp@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/diff": "1.6.0", "@jimp/js-bmp": "1.6.0", "@jimp/js-gif": "1.6.0", "@jimp/js-jpeg": "1.6.0", "@jimp/js-png": "1.6.0", "@jimp/js-tiff": "1.6.0", "@jimp/plugin-blit": "1.6.0", "@jimp/plugin-blur": "1.6.0", "@jimp/plugin-circle": "1.6.0", "@jimp/plugin-color": "1.6.0", "@jimp/plugin-contain": "1.6.0", "@jimp/plugin-cover": "1.6.0", "@jimp/plugin-crop": "1.6.0", "@jimp/plugin-displace": "1.6.0", "@jimp/plugin-dither": "1.6.0", "@jimp/plugin-fisheye": "1.6.0", "@jimp/plugin-flip": "1.6.0", "@jimp/plugin-hash": "1.6.0", "@jimp/plugin-mask": "1.6.0", "@jimp/plugin-print": "1.6.0", "@jimp/plugin-quantize": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/plugin-rotate": "1.6.0", "@jimp/plugin-threshold": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0" } }, "sha512-YcwCHw1kiqEeI5xRpDlPPBGL2EOpBKLwO4yIBJcXWHPj5PnA5urGq0jbyhM5KoNpypQ6VboSoxc9D8HyfvngSg=="], + + "jpeg-js": ["jpeg-js@0.4.4", "", {}, "sha512-WZzeDOEtTOBK4Mdsar0IqEU5sMr3vSV2RqkAIzUEV2BHnUfKGyswWFPFwK5EeDo93K3FohSHbLAjj0s1Wzd+dg=="], + + "mime": ["mime@3.0.0", "", { "bin": { "mime": "cli.js" } }, "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A=="], + + "ollama": ["ollama@0.6.3", "", { "dependencies": { "whatwg-fetch": "^3.6.20" } }, "sha512-KEWEhIqE5wtfzEIZbDCLH51VFZ6Z3ZSa6sIOg/E/tBV8S51flyqBOXi+bRxlOYKDf8i327zG9eSTb8IJxvm3Zg=="], + + "omggif": ["omggif@1.0.10", "", {}, "sha512-LMJTtvgc/nugXj0Vcrrs68Mn2D1r0zf630VNtqtpI1FEO7e+O9FP4gqs9AcnBaSEeoHIPm28u6qgPR0oyEpGSw=="], + + "pako": ["pako@1.0.11", "", {}, "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="], + + "parse-bmfont-ascii": ["parse-bmfont-ascii@1.0.6", "", {}, "sha512-U4RrVsUFCleIOBsIGYOMKjn9PavsGOXxbvYGtMOEfnId0SVNsgehXh1DxUdVPLoxd5mvcEtvmKs2Mmf0Mpa1ZA=="], + + "parse-bmfont-binary": ["parse-bmfont-binary@1.0.6", "", {}, "sha512-GxmsRea0wdGdYthjuUeWTMWPqm2+FAd4GI8vCvhgJsFnoGhTrLhXDDupwTo7rXVAgaLIGoVHDZS9p/5XbSqeWA=="], + + "parse-bmfont-xml": ["parse-bmfont-xml@1.1.6", "", { "dependencies": { "xml-parse-from-string": "^1.0.0", "xml2js": "^0.5.0" } }, "sha512-0cEliVMZEhrFDwMh4SxIyVJpqYoOWDJ9P895tFuS+XuNzI5UBmBk5U5O4KuJdTnZpSBI4LFA2+ZiJaiwfSwlMA=="], + + "peek-readable": ["peek-readable@4.1.0", "", {}, "sha512-ZI3LnwUv5nOGbQzD9c2iDG6toheuXSZP5esSHBjopsXH4dg19soufvpUGA3uohi5anFtGb2lhAVdHzH6R/Evvg=="], + + "pixelmatch": ["pixelmatch@5.3.0", "", { "dependencies": { "pngjs": "^6.0.0" }, "bin": { "pixelmatch": "bin/pixelmatch" } }, "sha512-o8mkY4E/+LNUf6LzX96ht6k6CEDi65k9G2rjMtBe9Oo+VPKSvl+0GKHuH/AlG+GA5LPG/i5hrekkxUc3s2HU+Q=="], + + "planck": ["planck@1.4.2", "", { "peerDependencies": { "stage-js": "^1.0.0-alpha.12" } }, "sha512-mNbhnV3g8X2rwGxzcesjmN8BDA6qfXgQxXVMkWau9MCRlQY0RLNEkyHlVp6yFy/X6qrzAXyNONCnZ1cGDLrNew=="], + + "pngjs": ["pngjs@7.0.0", "", {}, "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow=="], + + "process": ["process@0.11.10", "", {}, "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A=="], + + "readable-stream": ["readable-stream@4.7.0", "", { "dependencies": { "abort-controller": "^3.0.0", "buffer": "^6.0.3", "events": "^3.3.0", "process": "^0.11.10", "string_decoder": "^1.3.0" } }, "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg=="], + + "readable-web-to-node-stream": ["readable-web-to-node-stream@3.0.4", "", { "dependencies": { "readable-stream": "^4.7.0" } }, "sha512-9nX56alTf5bwXQ3ZDipHJhusu9NTQJ/CVPtb/XHAJCXihZeitfJvIRS4GqQ/mfIoOE3IelHMrpayVrosdHBuLw=="], + + "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], + + "sax": ["sax@1.4.4", "", {}, "sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw=="], + + "simple-xml-to-json": ["simple-xml-to-json@1.2.3", "", {}, "sha512-kWJDCr9EWtZ+/EYYM5MareWj2cRnZGF93YDNpH4jQiHB+hBIZnfPFSQiVMzZOdk+zXWqTZ/9fTeQNu2DqeiudA=="], + + "stage-js": ["stage-js@1.0.0-alpha.17", "", {}, "sha512-AzlMO+t51v6cFvKZ+Oe9DJnL1OXEH5s9bEy6di5aOrUpcP7PCzI/wIeXF0u3zg0L89gwnceoKxrLId0ZpYnNXw=="], + + "string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="], + + "strtok3": ["strtok3@6.3.0", "", { "dependencies": { "@tokenizer/token": "^0.3.0", "peek-readable": "^4.1.0" } }, "sha512-fZtbhtvI9I48xDSywd/somNqgUHl2L2cstmXCCif0itOf96jeW18MBSyrLuNicYQVkvpOxkZtkzujiTJ9LW5Jw=="], + + "three": ["three@0.177.0", "", {}, "sha512-EiXv5/qWAaGI+Vz2A+JfavwYCMdGjxVsrn3oBwllUoqYeaBO75J63ZfyaQKoiLrqNHoTlUc6PFgMXnS0kI45zg=="], + + "tinycolor2": ["tinycolor2@1.6.0", "", {}, "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw=="], + + "token-types": ["token-types@4.2.1", "", { "dependencies": { "@tokenizer/token": "^0.3.0", "ieee754": "^1.2.1" } }, "sha512-6udB24Q737UD/SDsKAHI9FCRP7Bqc9D/MQUV02ORQg5iskjtLJlZJNdN4kKtcdtwCeWIwIHDGaUsTsCCAa8sFQ=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici": ["undici@6.23.0", "", {}, "sha512-VfQPToRA5FZs/qJxLIinmU59u0r7LXqoJkCzinq3ckNJp3vKEh7jTWN589YQ5+aoAC/TGRLyJLCPKcLQbM8r9g=="], + + "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "utif2": ["utif2@4.1.0", "", { "dependencies": { "pako": "^1.0.11" } }, "sha512-+oknB9FHrJ7oW7A2WZYajOcv4FcDR4CfoGB0dPNfxbi4GO05RRnFmt5oa23+9w32EanrYcSJWspUiJkLMs+37w=="], + + "web-tree-sitter": ["web-tree-sitter@0.25.10", "", { "peerDependencies": { "@types/emscripten": "^1.40.0" }, "optionalPeers": ["@types/emscripten"] }, "sha512-Y09sF44/13XvgVKgO2cNDw5rGk6s26MgoZPXLESvMXeefBf7i6/73eFurre0IsTW6E14Y0ArIzhUMmjoc7xyzA=="], + + "whatwg-fetch": ["whatwg-fetch@3.6.20", "", {}, "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="], + + "xml-parse-from-string": ["xml-parse-from-string@1.0.1", "", {}, "sha512-ErcKwJTF54uRzzNMXq2X5sMIy88zJvfN2DmdoQvy7PAFJ+tPRU6ydWuOKNMyfmOjdyBQTFREi60s0Y0SyI0G0g=="], + + "xml2js": ["xml2js@0.5.0", "", { "dependencies": { "sax": ">=0.6.0", "xmlbuilder": "~11.0.0" } }, "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA=="], + + "xmlbuilder": ["xmlbuilder@11.0.1", "", {}, "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA=="], + + "yoga-layout": ["yoga-layout@3.2.1", "", {}, "sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ=="], + + "zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], + + "image-q/@types/node": ["@types/node@16.9.1", "", {}, "sha512-QpLcX9ZSsq3YYUUnD3nFDY8H7wctAhQj/TFKL8Ya8v5fMm3CFXxo8zStsLAl780ltoYoo1WvKUVGBQK+1ifr7g=="], + + "pixelmatch/pngjs": ["pngjs@6.0.0", "", {}, "sha512-TRzzuFRRmEoSW/p1KVAmiOgPco2Irlah+bGFCeNfJXxxYGwSw7YwAOAcd7X28K/m5bjBWKsC29KyoMfHbypayg=="], + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..4f46e3d --- /dev/null +++ b/package.json @@ -0,0 +1,21 @@ +{ + "name": "reddit-trend-analyzer", + "version": "1.0.0", + "module": "src/index.ts", + "type": "module", + "private": true, + "scripts": { + "start": "bun run src/cli.ts", + "tui": "bun run src/index.ts", + "dev": "bun --watch run src/cli.ts" + }, + "devDependencies": { + "@types/bun": "latest", + "typescript": "^5.0.0" + }, + "dependencies": { + "@opentui/core": "^0.1.74", + "@qdrant/js-client-rest": "^1.16.2", + "ollama": "^0.6.3" + } +} diff --git a/src/cli.ts b/src/cli.ts new file mode 100644 index 0000000..511917e --- /dev/null +++ b/src/cli.ts @@ -0,0 +1,238 @@ +import * as readline from 'readline' +import { RedditScraper } from './scraper/reddit' +import { CommentFetcher } from './scraper/comments' +import { EmbeddingPipeline } from './embeddings/ollama' +import { QdrantStorage } from './storage/qdrant' +import type { RedditComment } from './scraper/types' +import type { SearchResult } from './storage/types' + +const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, +}) + +function prompt(question: string): Promise { + return new Promise(resolve => rl.question(question, resolve)) +} + +function progressBar(current: number, total: number, width: number = 30): string { + const percent = total > 0 ? current / total : 0 + const filled = Math.round(percent * width) + const empty = width - filled + return `[${'█'.repeat(filled)}${'░'.repeat(empty)}] ${Math.round(percent * 100)}%` +} + +async function main() { + console.log('\n=== reddit trend analyzer ===\n') + + const scraper = new RedditScraper(5000) + const commentFetcher = new CommentFetcher(5000) + const embeddings = new EmbeddingPipeline() + const storage = new QdrantStorage() + + // check connections + console.log('checking connections...') + + const ollamaOk = await embeddings.checkConnection() + if (!ollamaOk) { + console.error('error: cannot connect to ollama') + console.error('run: ollama pull nomic-embed-text') + process.exit(1) + } + console.log(' ollama: ok') + + const qdrantOk = await storage.checkConnection() + if (!qdrantOk) { + console.error('error: cannot connect to qdrant') + process.exit(1) + } + console.log(' qdrant: ok') + + await storage.ensureCollection() + const stats = await storage.getStats() + console.log(`\ncurrent stats: ${stats.posts} posts, ${stats.comments} comments, ${stats.subreddits.length} subreddits\n`) + + let lastResults: SearchResult[] = [] + + while (true) { + console.log('\ncommands:') + console.log(' scrape [pages] - scrape subreddit (e.g. scrape https://reddit.com/r/vfx/best 3)') + console.log(' search - semantic search') + console.log(' stats - show collection stats') + console.log(' export json|csv - export last search results') + console.log(' quit - exit\n') + + const input = await prompt('> ') + const [cmd, ...args] = input.trim().split(' ') + + if (cmd === 'quit' || cmd === 'q' || cmd === 'exit') { + console.log('bye!') + rl.close() + process.exit(0) + } + + if (cmd === 'stats') { + const s = await storage.getStats() + console.log(`\nposts: ${s.posts.toLocaleString()}`) + console.log(`comments: ${s.comments.toLocaleString()}`) + console.log(`subreddits: ${s.subreddits.join(', ') || 'none'}`) + continue + } + + if (cmd === 'scrape') { + const url = args[0] + const pages = parseInt(args[1] || '3', 10) + + if (!url) { + console.log('usage: scrape [pages]') + console.log('example: scrape https://reddit.com/r/vfx/best 5') + continue + } + + console.log(`\nscraping ${url} (${pages} pages)...\n`) + + try { + // fetch posts + scraper.setProgressCallback((p) => { + process.stdout.write(`\rfetching posts: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `) + }) + + const posts = await scraper.fetchPosts({ + url, + pages, + postsPerPage: 100, + fetchComments: true, + delayMs: 3000, + }) + console.log(`\nfetched ${posts.length} posts`) + + // fetch comments + commentFetcher.setProgressCallback((p) => { + process.stdout.write(`\rfetching comments: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `) + }) + + const commentsByPost = await commentFetcher.fetchAllComments(posts) + const totalComments = Array.from(commentsByPost.values()).reduce((acc, c) => acc + c.length, 0) + console.log(`\nfetched ${totalComments} comments`) + + // embed posts + embeddings.setProgressCallback((p) => { + process.stdout.write(`\rembedding posts: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `) + }) + + const postPoints = await embeddings.embedPosts(posts, commentsByPost) + await storage.upsertPoints(postPoints) + console.log(`\nembedded ${postPoints.length} posts`) + + // embed comments + const allComments: RedditComment[] = [] + for (const comments of commentsByPost.values()) { + allComments.push(...comments) + } + + embeddings.setProgressCallback((p) => { + process.stdout.write(`\rembedding comments: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `) + }) + + const commentPoints = await embeddings.embedComments(allComments) + await storage.upsertPoints(commentPoints) + console.log(`\nembedded ${commentPoints.length} comments`) + + console.log('\ndone!') + + } catch (err) { + console.error('\nerror:', err instanceof Error ? err.message : err) + } + continue + } + + if (cmd === 'search') { + const query = args.join(' ') + if (!query) { + console.log('usage: search ') + continue + } + + try { + console.log(`\nsearching for "${query}"...\n`) + const vector = await embeddings.embed(query) + const results = await storage.search(vector, 10) + lastResults = results + + if (results.length === 0) { + console.log('no results found') + continue + } + + for (const r of results) { + const preview = (r.payload.title || r.payload.body || '').slice(0, 80).replace(/\n/g, ' ') + const type = r.payload.type === 'post' ? '[post]' : '[comment]' + console.log(`${type} (${r.score.toFixed(3)}) ${preview}...`) + console.log(` -> ${r.payload.permalink}\n`) + } + } catch (err) { + console.error('error:', err instanceof Error ? err.message : err) + } + continue + } + + if (cmd === 'export') { + const format = args[0] + if (!format || !['json', 'csv'].includes(format)) { + console.log('usage: export json|csv') + continue + } + + if (lastResults.length === 0) { + console.log('no results to export (run a search first)') + continue + } + + const filename = `reddit-trends.${format}` + + if (format === 'json') { + const data = lastResults.map(r => ({ + id: r.payload.id, + type: r.payload.type, + subreddit: r.payload.subreddit, + title: r.payload.title, + body: r.payload.body, + author: r.payload.author, + score: r.payload.score, + created: new Date(r.payload.created * 1000).toISOString(), + permalink: r.payload.permalink, + similarity: r.score, + })) + await Bun.write(filename, JSON.stringify(data, null, 2)) + } else { + const headers = ['id', 'type', 'subreddit', 'title', 'body', 'author', 'score', 'created', 'permalink', 'similarity'] + const escape = (val: string | number | undefined): string => { + if (val === undefined) return '' + const str = String(val) + if (str.includes(',') || str.includes('"') || str.includes('\n')) { + return `"${str.replace(/"/g, '""')}"` + } + return str + } + const rows = lastResults.map(r => [ + r.payload.id, r.payload.type, r.payload.subreddit, r.payload.title || '', + r.payload.body, r.payload.author, r.payload.score, + new Date(r.payload.created * 1000).toISOString(), r.payload.permalink, r.score.toFixed(4), + ].map(escape).join(',')) + await Bun.write(filename, [headers.join(','), ...rows].join('\n')) + } + + console.log(`exported to ${filename}`) + continue + } + + if (cmd) { + console.log(`unknown command: ${cmd}`) + } + } +} + +main().catch(err => { + console.error('fatal error:', err) + process.exit(1) +}) diff --git a/src/embeddings/ollama.ts b/src/embeddings/ollama.ts new file mode 100644 index 0000000..7c1f973 --- /dev/null +++ b/src/embeddings/ollama.ts @@ -0,0 +1,175 @@ +import { Ollama } from 'ollama' +import { prepareForEmbedding, cleanText } from '../utils/text' +import type { RedditPost, RedditComment } from '../scraper/types' +import type { PointPayload } from '../storage/types' + +const MODEL = 'nomic-embed-text' +const VECTOR_DIM = 768 +const BATCH_SIZE = 10 + +export interface EmbeddedPoint { + id: string + vector: number[] + payload: PointPayload +} + +export interface EmbeddingProgress { + current: number + total: number + message: string +} + +export class EmbeddingPipeline { + private ollama: Ollama + private onProgress?: (progress: EmbeddingProgress) => void + + constructor(host?: string) { + this.ollama = new Ollama({ + host: host || process.env.OLLAMA_HOST || 'http://localhost:11434', + }) + } + + setProgressCallback(callback: (progress: EmbeddingProgress) => void): void { + this.onProgress = callback + } + + private emitProgress(progress: EmbeddingProgress): void { + this.onProgress?.(progress) + } + + async embed(text: string): Promise { + const response = await this.ollama.embed({ + model: MODEL, + input: text, + }) + return response.embeddings[0] ?? [] + } + + async embedBatch(texts: string[]): Promise { + const response = await this.ollama.embed({ + model: MODEL, + input: texts, + }) + return response.embeddings + } + + async embedPosts( + posts: RedditPost[], + commentsByPost: Map + ): Promise { + const points: EmbeddedPoint[] = [] + const batches: Array<{ text: string; payload: PointPayload }> = [] + + for (const post of posts) { + const comments = commentsByPost.get(post.id) || [] + const topComments = comments + .sort((a, b) => b.score - a.score) + .slice(0, 5) + .map(c => c.body) + + const text = prepareForEmbedding(post.title, post.selftext, topComments) + + batches.push({ + text, + payload: { + id: post.id, + type: 'post', + subreddit: post.subreddit, + title: post.title, + author: post.author, + body: post.selftext || '', + score: post.score, + created: post.created_utc, + permalink: `https://reddit.com${post.permalink}`, + }, + }) + } + + for (let i = 0; i < batches.length; i += BATCH_SIZE) { + const batch = batches.slice(i, i + BATCH_SIZE) + + this.emitProgress({ + current: Math.min(i + BATCH_SIZE, batches.length), + total: batches.length, + message: `Embedding posts ${i + 1}-${Math.min(i + BATCH_SIZE, batches.length)}`, + }) + + const texts = batch.map(b => b.text) + const embeddings = await this.embedBatch(texts) + + for (let j = 0; j < batch.length; j++) { + const item = batch[j] + const embedding = embeddings[j] + if (item && embedding) { + points.push({ + id: item.payload.id, + vector: embedding, + payload: item.payload, + }) + } + } + } + + return points + } + + async embedComments(comments: RedditComment[]): Promise { + const points: EmbeddedPoint[] = [] + + const validComments = comments.filter( + c => c.body && c.body.length > 20 && c.author !== '[deleted]' + ) + + for (let i = 0; i < validComments.length; i += BATCH_SIZE) { + const batch = validComments.slice(i, i + BATCH_SIZE) + + this.emitProgress({ + current: Math.min(i + BATCH_SIZE, validComments.length), + total: validComments.length, + message: `Embedding comments ${i + 1}-${Math.min(i + BATCH_SIZE, validComments.length)}`, + }) + + const texts = batch.map(c => cleanText(c.body)) + const embeddings = await this.embedBatch(texts) + + for (let j = 0; j < batch.length; j++) { + const comment = batch[j] + const embedding = embeddings[j] + if (comment && embedding) { + points.push({ + id: comment.id, + vector: embedding, + payload: { + id: comment.id, + type: 'comment', + subreddit: comment.subreddit, + author: comment.author, + body: comment.body, + score: comment.score, + created: comment.created_utc, + permalink: `https://reddit.com${comment.permalink}`, + parent_id: comment.parent_id, + }, + }) + } + } + } + + return points + } + + async checkConnection(): Promise { + try { + const result = await this.ollama.list() + const hasModel = result.models.some(m => m.name.startsWith(MODEL)) + if (!hasModel) { + console.warn(`Model ${MODEL} not found. Available models:`, result.models.map(m => m.name)) + } + return true + } catch { + return false + } + } +} + +export { VECTOR_DIM } diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..55001b1 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,59 @@ +import { createApp } from './tui/app' + +async function main() { + console.clear() + + const ollamaOk = await checkOllama() + if (!ollamaOk) { + console.error('error: cannot connect to ollama at', process.env.OLLAMA_HOST || 'http://localhost:11434') + console.error('make sure ollama is running and nomic-embed-text model is available') + console.error('run: ollama pull nomic-embed-text') + process.exit(1) + } + + const qdrantOk = await checkQdrant() + if (!qdrantOk) { + console.error('error: cannot connect to qdrant at', process.env.QDRANT_URL || 'http://localhost:6333') + console.error('make sure qdrant is running and QDRANT_API_KEY is set if required') + process.exit(1) + } + + await createApp() +} + +async function checkOllama(): Promise { + try { + const { Ollama } = await import('ollama') + const client = new Ollama({ + host: process.env.OLLAMA_HOST || 'http://localhost:11434', + }) + const models = await client.list() + return models.models.some(m => m.name.includes('nomic-embed-text')) + } catch { + return false + } +} + +async function checkQdrant(): Promise { + try { + const { QdrantClient } = await import('@qdrant/js-client-rest') + const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333' + const parsedUrl = new URL(qdrantUrl) + + const client = new QdrantClient({ + host: parsedUrl.hostname, + port: parsedUrl.port ? parseInt(parsedUrl.port) : (parsedUrl.protocol === 'https:' ? 443 : 6333), + https: parsedUrl.protocol === 'https:', + apiKey: process.env.QDRANT_API_KEY, + }) + await client.getCollections() + return true + } catch { + return false + } +} + +main().catch(err => { + console.error('fatal error:', err) + process.exit(1) +}) diff --git a/src/scraper/comments.ts b/src/scraper/comments.ts new file mode 100644 index 0000000..cbadd13 --- /dev/null +++ b/src/scraper/comments.ts @@ -0,0 +1,120 @@ +import { RateLimiter, fetchWithRetry } from '../utils/rate-limit' +import type { + RedditComment, + RedditPost, + RedditListing, + RedditCommentData, + ScrapeProgress, +} from './types' + +const USER_AGENT = 'reddit-trend-analyzer/1.0 (by /u/trend-analyzer-bot)' + +export class CommentFetcher { + private rateLimiter: RateLimiter + private onProgress?: (progress: ScrapeProgress) => void + + constructor(delayMs: number = 5000) { + this.rateLimiter = new RateLimiter(delayMs) + } + + setProgressCallback(callback: (progress: ScrapeProgress) => void): void { + this.onProgress = callback + } + + private emitProgress(progress: ScrapeProgress): void { + this.onProgress?.(progress) + } + + private extractComments( + listing: RedditListing, + subreddit: string, + depth: number = 0 + ): RedditComment[] { + const comments: RedditComment[] = [] + + for (const child of listing.data.children) { + if (child.kind !== 't1') continue + const data = child.data + + if (data.author === '[deleted]' || !data.body) continue + + comments.push({ + id: data.id, + name: data.name, + body: data.body, + author: data.author, + score: data.score, + created_utc: data.created_utc, + permalink: data.permalink, + parent_id: data.parent_id, + subreddit: subreddit, + depth: depth, + }) + + if (data.replies && typeof data.replies === 'object') { + comments.push( + ...this.extractComments(data.replies, subreddit, depth + 1) + ) + } + } + + return comments + } + + async fetchCommentsForPost(post: RedditPost): Promise { + const url = `https://www.reddit.com${post.permalink}.json?limit=100&depth=3` + + try { + const response = await fetchWithRetry< + [RedditListing, RedditListing] + >( + url, + { + headers: { + 'User-Agent': USER_AGENT, + 'Accept': 'application/json', + }, + }, + this.rateLimiter, + 3 + ) + + if (!response[1]?.data?.children) { + return [] + } + + return this.extractComments(response[1], post.subreddit) + } catch (error) { + // silently skip failed posts + return [] + } + } + + async fetchAllComments(posts: RedditPost[]): Promise> { + const commentsByPost = new Map() + + for (let i = 0; i < posts.length; i++) { + const post = posts[i] + if (!post) continue + + this.emitProgress({ + phase: 'comments', + current: i + 1, + total: posts.length, + message: `Fetching comments for post ${i + 1}/${posts.length}`, + }) + + const comments = await this.fetchCommentsForPost(post) + commentsByPost.set(post.id, comments) + } + + this.emitProgress({ + phase: 'comments', + current: posts.length, + total: posts.length, + message: 'Done fetching comments', + }) + + return commentsByPost + } +} diff --git a/src/scraper/reddit.ts b/src/scraper/reddit.ts new file mode 100644 index 0000000..9303cdf --- /dev/null +++ b/src/scraper/reddit.ts @@ -0,0 +1,120 @@ +import { RateLimiter, fetchWithRetry } from '../utils/rate-limit' +import type { + RedditPost, + RedditListing, + RedditPostData, + ScrapeOptions, + ScrapeProgress, +} from './types' + +const USER_AGENT = 'reddit-trend-analyzer/1.0 (by /u/trend-analyzer-bot)' + +export function normalizeRedditUrl(url: string): string { + let normalized = url.trim() + + if (!normalized.startsWith('http')) { + normalized = `https://www.reddit.com${normalized.startsWith('/') ? '' : '/'}${normalized}` + } + + normalized = normalized + .replace('old.reddit.com', 'www.reddit.com') + .replace('new.reddit.com', 'www.reddit.com') + + if (!normalized.endsWith('.json') && !normalized.includes('.json?')) { + const hasQuery = normalized.includes('?') + normalized = hasQuery + ? normalized.replace('?', '.json?') + : normalized + '.json' + } + + return normalized +} + +export function parseSubredditFromUrl(url: string): string { + const match = url.match(/\/r\/([^/?]+)/) + return match?.[1] ?? 'unknown' +} + +export class RedditScraper { + private rateLimiter: RateLimiter + private onProgress?: (progress: ScrapeProgress) => void + + constructor(delayMs: number = 5000) { + this.rateLimiter = new RateLimiter(delayMs) + } + + setProgressCallback(callback: (progress: ScrapeProgress) => void): void { + this.onProgress = callback + } + + private emitProgress(progress: ScrapeProgress): void { + this.onProgress?.(progress) + } + + async fetchPosts(options: ScrapeOptions): Promise { + const baseUrl = normalizeRedditUrl(options.url) + const subreddit = parseSubredditFromUrl(options.url) + const posts: RedditPost[] = [] + let after: string | null = null + + for (let page = 0; page < options.pages; page++) { + const url = new URL(baseUrl) + url.searchParams.set('limit', String(options.postsPerPage)) + if (after) { + url.searchParams.set('after', after) + } + + this.emitProgress({ + phase: 'posts', + current: posts.length, + total: options.pages * options.postsPerPage, + message: `Fetching page ${page + 1}/${options.pages}...`, + }) + + const listing = await fetchWithRetry>( + url.toString(), + { + headers: { + 'User-Agent': USER_AGENT, + 'Accept': 'application/json', + }, + }, + this.rateLimiter, + 3 + ) + + for (const child of listing.data.children) { + if (child.kind !== 't3') continue + const data = child.data + + posts.push({ + id: data.id, + name: data.name, + title: data.title, + selftext: data.selftext || '', + author: data.author, + score: data.score, + upvote_ratio: data.upvote_ratio, + num_comments: data.num_comments, + created_utc: data.created_utc, + permalink: data.permalink, + subreddit: subreddit, + url: data.url, + is_self: data.is_self, + }) + } + + after = listing.data.after + if (!after) break + } + + this.emitProgress({ + phase: 'posts', + current: posts.length, + total: posts.length, + message: `Fetched ${posts.length} posts`, + }) + + return posts + } +} diff --git a/src/scraper/types.ts b/src/scraper/types.ts new file mode 100644 index 0000000..de0a4c9 --- /dev/null +++ b/src/scraper/types.ts @@ -0,0 +1,87 @@ +export interface RedditPost { + id: string + name: string + title: string + selftext: string + author: string + score: number + upvote_ratio: number + num_comments: number + created_utc: number + permalink: string + subreddit: string + url: string + is_self: boolean +} + +export interface RedditComment { + id: string + name: string + body: string + author: string + score: number + created_utc: number + permalink: string + parent_id: string + subreddit: string + depth: number +} + +export interface RedditListingData { + after: string | null + before: string | null + children: Array<{ kind: string; data: T }> + dist: number +} + +export interface RedditListing { + kind: string + data: RedditListingData +} + +export interface RedditPostData { + id: string + name: string + title: string + selftext: string + selftext_html: string | null + author: string + score: number + upvote_ratio: number + num_comments: number + created_utc: number + permalink: string + subreddit: string + url: string + is_self: boolean +} + +export interface RedditCommentData { + id: string + name: string + body: string + body_html: string + author: string + score: number + created_utc: number + permalink: string + parent_id: string + subreddit: string + depth: number + replies?: RedditListing | '' +} + +export interface ScrapeOptions { + url: string + pages: number + postsPerPage: number + fetchComments: boolean + delayMs: number +} + +export interface ScrapeProgress { + phase: 'posts' | 'comments' | 'done' + current: number + total: number + message: string +} diff --git a/src/storage/qdrant.ts b/src/storage/qdrant.ts new file mode 100644 index 0000000..9362625 --- /dev/null +++ b/src/storage/qdrant.ts @@ -0,0 +1,183 @@ +import { QdrantClient } from '@qdrant/js-client-rest' +import type { EmbeddedPoint } from '../embeddings/ollama' +import type { PointPayload, SearchResult, CollectionStats } from './types' +import { VECTOR_DIM } from '../embeddings/ollama' + +const COLLECTION_NAME = 'reddit_trends' +const BATCH_SIZE = 100 + +export class QdrantStorage { + private client: QdrantClient + private collectionName: string + + constructor(url?: string, apiKey?: string, collectionName?: string) { + const qdrantUrl = url || process.env.QDRANT_URL || 'http://localhost:6333' + const parsedUrl = new URL(qdrantUrl) + + this.client = new QdrantClient({ + host: parsedUrl.hostname, + port: parsedUrl.port ? parseInt(parsedUrl.port) : (parsedUrl.protocol === 'https:' ? 443 : 6333), + https: parsedUrl.protocol === 'https:', + apiKey: apiKey || process.env.QDRANT_API_KEY, + }) + this.collectionName = collectionName || COLLECTION_NAME + } + + async ensureCollection(): Promise { + const collections = await this.client.getCollections() + const exists = collections.collections.some(c => c.name === this.collectionName) + + if (!exists) { + await this.client.createCollection(this.collectionName, { + vectors: { + size: VECTOR_DIM, + distance: 'Cosine', + }, + }) + + await this.client.createPayloadIndex(this.collectionName, { + field_name: 'subreddit', + field_schema: 'keyword', + }) + + await this.client.createPayloadIndex(this.collectionName, { + field_name: 'type', + field_schema: 'keyword', + }) + + await this.client.createPayloadIndex(this.collectionName, { + field_name: 'created', + field_schema: 'integer', + }) + + await this.client.createPayloadIndex(this.collectionName, { + field_name: 'score', + field_schema: 'integer', + }) + } + } + + async upsertPoints(points: EmbeddedPoint[]): Promise { + for (let i = 0; i < points.length; i += BATCH_SIZE) { + const batch = points.slice(i, i + BATCH_SIZE) + + await this.client.upsert(this.collectionName, { + wait: true, + points: batch.map((point, idx) => ({ + id: i + idx, + vector: point.vector, + payload: point.payload as unknown as Record, + })), + }) + } + } + + async search( + vector: number[], + limit: number = 10, + filters?: { + subreddit?: string + type?: 'post' | 'comment' + minScore?: number + afterDate?: number + } + ): Promise { + const must: any[] = [] + + if (filters?.subreddit) { + must.push({ + key: 'subreddit', + match: { value: filters.subreddit }, + }) + } + + if (filters?.type) { + must.push({ + key: 'type', + match: { value: filters.type }, + }) + } + + if (filters?.minScore !== undefined) { + must.push({ + key: 'score', + range: { gte: filters.minScore }, + }) + } + + if (filters?.afterDate !== undefined) { + must.push({ + key: 'created', + range: { gte: filters.afterDate }, + }) + } + + const results = await this.client.search(this.collectionName, { + vector, + limit, + with_payload: true, + filter: must.length > 0 ? { must } : undefined, + }) + + return results.map(r => ({ + id: String(r.id), + score: r.score, + payload: r.payload as unknown as PointPayload, + })) + } + + async getStats(): Promise { + try { + const info = await this.client.getCollection(this.collectionName) + const pointCount = info.points_count || 0 + + const postCount = await this.client.count(this.collectionName, { + filter: { must: [{ key: 'type', match: { value: 'post' } }] }, + }) + + const commentCount = await this.client.count(this.collectionName, { + filter: { must: [{ key: 'type', match: { value: 'comment' } }] }, + }) + + const scroll = await this.client.scroll(this.collectionName, { + limit: 1000, + with_payload: { include: ['subreddit'] }, + }) + + const subreddits = new Set() + for (const point of scroll.points) { + const payload = point.payload as { subreddit?: string } + if (payload?.subreddit) { + subreddits.add(payload.subreddit) + } + } + + return { + posts: postCount.count, + comments: commentCount.count, + subreddits: Array.from(subreddits), + } + } catch { + return { posts: 0, comments: 0, subreddits: [] } + } + } + + async deleteCollection(): Promise { + try { + await this.client.deleteCollection(this.collectionName) + } catch { + // collection might not exist + } + } + + async checkConnection(): Promise { + try { + await this.client.getCollections() + return true + } catch { + return false + } + } +} + +export { COLLECTION_NAME } diff --git a/src/storage/types.ts b/src/storage/types.ts new file mode 100644 index 0000000..a1990ff --- /dev/null +++ b/src/storage/types.ts @@ -0,0 +1,24 @@ +export interface PointPayload { + id: string + type: 'post' | 'comment' + subreddit: string + title?: string + author: string + body: string + score: number + created: number + permalink: string + parent_id?: string +} + +export interface SearchResult { + id: string + score: number + payload: PointPayload +} + +export interface CollectionStats { + posts: number + comments: number + subreddits: string[] +} diff --git a/src/tui/app.ts b/src/tui/app.ts new file mode 100644 index 0000000..44e5251 --- /dev/null +++ b/src/tui/app.ts @@ -0,0 +1,268 @@ +import { + createCliRenderer, + BoxRenderable, + TextRenderable, + type KeyEvent, +} from '@opentui/core' + +import { RedditScraper } from '../scraper/reddit' +import { CommentFetcher } from '../scraper/comments' +import { EmbeddingPipeline } from '../embeddings/ollama' +import { QdrantStorage } from '../storage/qdrant' +import type { RedditComment } from '../scraper/types' +import type { SearchResult } from '../storage/types' + +import { createUrlInput, focusUrlInput } from './components/url-input' +import { createProgressPanel, updateProgress, resetProgress } from './components/progress' +import { createStatsPanel, updateStats } from './components/stats' +import { createTrendingPanel, updateTrending } from './components/trending' +import { createSearchPanel, updateSearchResults, focusSearch } from './components/search' +import { createExportBar, exportToJson, exportToCsv } from './components/export' + +export interface AppState { + url: string + pages: number + postsPerPage: number + isRunning: boolean + lastResults: SearchResult[] +} + +export async function createApp() { + const renderer = await createCliRenderer({ exitOnCtrlC: false }) + + const state: AppState = { + url: '', + pages: 5, + postsPerPage: 100, + isRunning: false, + lastResults: [], + } + + const scraper = new RedditScraper(3000) + const commentFetcher = new CommentFetcher(3000) + const embeddings = new EmbeddingPipeline() + const storage = new QdrantStorage() + + const root = new BoxRenderable(renderer, { + id: 'root', + flexDirection: 'column', + padding: 1, + }) + + const header = new BoxRenderable(renderer, { + id: 'header', + flexDirection: 'row', + justifyContent: 'space-between', + paddingBottom: 1, + }) + + header.add(new TextRenderable(renderer, { + id: 'title', + content: ' reddit trend analyzer', + })) + + header.add(new TextRenderable(renderer, { + id: 'quit-hint-header', + content: '[q]uit ', + })) + + root.add(header) + + let progressPanel: BoxRenderable + let statsPanel: BoxRenderable + let trendingPanel: BoxRenderable + let searchPanel: BoxRenderable + + const urlInput = createUrlInput(renderer, { + onUrlSubmit: (url) => { state.url = url }, + onPagesChange: (pages) => { state.pages = pages }, + onPostsPerPageChange: (count) => { state.postsPerPage = count }, + onStartScrape: () => runScrape(), + }) + root.add(urlInput) + + progressPanel = createProgressPanel(renderer) + root.add(progressPanel) + + const middleRow = new BoxRenderable(renderer, { + id: 'middle-row', + flexDirection: 'row', + gap: 2, + }) + + statsPanel = createStatsPanel(renderer) + middleRow.add(statsPanel) + + trendingPanel = createTrendingPanel(renderer) + middleRow.add(trendingPanel) + + root.add(middleRow) + + searchPanel = createSearchPanel(renderer, { + onSearch: async (query) => { + if (state.isRunning) return + try { + const vector = await embeddings.embed(query) + const results = await storage.search(vector, 10) + state.lastResults = results + updateSearchResults(searchPanel, results) + } catch (err) { + console.error('Search error:', err) + } + }, + }) + root.add(searchPanel) + + const exportBar = createExportBar(renderer) + root.add(exportBar) + + renderer.root.add(root) + + async function runScrape() { + if (state.isRunning || !state.url) return + state.isRunning = true + resetProgress(progressPanel) + + try { + await storage.ensureCollection() + + scraper.setProgressCallback((p) => { + updateProgress(progressPanel, { + phase: p.phase, + current: p.current, + total: p.total, + message: p.message, + }) + }) + + const posts = await scraper.fetchPosts({ + url: state.url, + pages: state.pages, + postsPerPage: state.postsPerPage, + fetchComments: true, + delayMs: 3000, + }) + + commentFetcher.setProgressCallback((p) => { + updateProgress(progressPanel, { + phase: 'comments', + current: p.current, + total: p.total, + message: p.message, + }) + }) + + const commentsByPost = await commentFetcher.fetchAllComments(posts) + + embeddings.setProgressCallback((p) => { + updateProgress(progressPanel, { + phase: 'embedding', + current: p.current, + total: p.total, + message: p.message, + }) + }) + + const postPoints = await embeddings.embedPosts(posts, commentsByPost) + await storage.upsertPoints(postPoints) + + const allComments: RedditComment[] = [] + for (const comments of commentsByPost.values()) { + allComments.push(...comments) + } + + const commentPoints = await embeddings.embedComments(allComments) + await storage.upsertPoints(commentPoints) + + const stats = await storage.getStats() + updateStats(statsPanel, stats) + + updateTrending(trendingPanel, [ + { title: 'scrape complete', count: postPoints.length, avgScore: 0 }, + ]) + + } catch (err) { + console.error('Scrape error:', err) + updateTrending(trendingPanel, [ + { title: `error: ${err instanceof Error ? err.message : 'unknown'}`, count: 0, avgScore: 0 }, + ]) + } finally { + state.isRunning = false + } + } + + async function refreshStats() { + try { + const stats = await storage.getStats() + updateStats(statsPanel, stats) + } catch (err) { + console.error('Stats refresh error:', err) + } + } + + renderer.keyInput.on('keypress', async (key: KeyEvent) => { + const urlInputEl = urlInput.getRenderable('url-input') + const searchInputEl = searchPanel.getRenderable('search-input') + const inputFocused = urlInputEl?.focused || searchInputEl?.focused + + // always allow quit + if (key.ctrl && key.name === 'c') { + renderer.destroy() + process.exit(0) + } + + // tab always switches focus + if (key.name === 'tab') { + if (urlInputEl?.focused) { + searchInputEl?.focus() + } else { + urlInputEl?.focus() + } + return + } + + // escape unfocuses inputs + if (key.name === 'escape' && inputFocused) { + urlInputEl?.blur?.() + searchInputEl?.blur?.() + return + } + + // only handle hotkeys when no input is focused + if (!inputFocused) { + if (key.name === 'q') { + renderer.destroy() + process.exit(0) + } + + if (key.name === 'e' && !state.isRunning && state.lastResults.length > 0) { + await exportToJson(state.lastResults) + updateTrending(trendingPanel, [ + { title: 'exported to reddit-trends.json', count: 0, avgScore: 0 }, + ]) + } + + if (key.name === 'c' && !state.isRunning && state.lastResults.length > 0) { + await exportToCsv(state.lastResults) + updateTrending(trendingPanel, [ + { title: 'exported to reddit-trends.csv', count: 0, avgScore: 0 }, + ]) + } + + if (key.name === 'r' && !state.isRunning) { + await refreshStats() + } + } + + // enter starts scrape (works even with input focused if url is set) + if (key.name === 'return' && !state.isRunning && state.url) { + await runScrape() + } + }) + + focusUrlInput(urlInput) + + await refreshStats() + + return { renderer, state } +} diff --git a/src/tui/components/export.ts b/src/tui/components/export.ts new file mode 100644 index 0000000..5314a0b --- /dev/null +++ b/src/tui/components/export.ts @@ -0,0 +1,92 @@ +import { + BoxRenderable, + TextRenderable, + type RenderContext, +} from '@opentui/core' +import type { SearchResult } from '../../storage/types' + +export function createExportBar(renderer: RenderContext): BoxRenderable { + const container = new BoxRenderable(renderer, { + id: 'export-bar', + flexDirection: 'row', + gap: 2, + padding: 1, + }) + + container.add(new TextRenderable(renderer, { + id: 'export-json', + content: '[e]xport json', + })) + + container.add(new TextRenderable(renderer, { + id: 'export-csv', + content: '[c]sv', + })) + + container.add(new TextRenderable(renderer, { + id: 'refresh-stats', + content: '[r]efresh stats', + })) + + container.add(new TextRenderable(renderer, { + id: 'quit-hint', + content: '[q]uit', + })) + + return container +} + +export async function exportToJson( + results: SearchResult[], + filename: string = 'reddit-trends.json' +): Promise { + const data = results.map(r => ({ + id: r.payload.id, + type: r.payload.type, + subreddit: r.payload.subreddit, + title: r.payload.title, + body: r.payload.body, + author: r.payload.author, + score: r.payload.score, + created: new Date(r.payload.created * 1000).toISOString(), + permalink: r.payload.permalink, + similarity: r.score, + })) + + await Bun.write(filename, JSON.stringify(data, null, 2)) +} + +export async function exportToCsv( + results: SearchResult[], + filename: string = 'reddit-trends.csv' +): Promise { + const headers = [ + 'id', 'type', 'subreddit', 'title', 'body', 'author', + 'score', 'created', 'permalink', 'similarity' + ] + + const escape = (val: string | number | undefined): string => { + if (val === undefined) return '' + const str = String(val) + if (str.includes(',') || str.includes('"') || str.includes('\n')) { + return `"${str.replace(/"/g, '""')}"` + } + return str + } + + const rows = results.map(r => [ + r.payload.id, + r.payload.type, + r.payload.subreddit, + r.payload.title || '', + r.payload.body, + r.payload.author, + r.payload.score, + new Date(r.payload.created * 1000).toISOString(), + r.payload.permalink, + r.score.toFixed(4), + ].map(escape).join(',')) + + const csv = [headers.join(','), ...rows].join('\n') + await Bun.write(filename, csv) +} diff --git a/src/tui/components/progress.ts b/src/tui/components/progress.ts new file mode 100644 index 0000000..26a319f --- /dev/null +++ b/src/tui/components/progress.ts @@ -0,0 +1,77 @@ +import { + BoxRenderable, + TextRenderable, + type RenderContext, +} from '@opentui/core' + +export interface ProgressState { + phase: string + current: number + total: number + message: string +} + +function createProgressBar(percent: number, width: number = 20): string { + const filled = Math.round((percent / 100) * width) + const empty = width - filled + return '█'.repeat(filled) + '░'.repeat(empty) +} + +export function createProgressPanel(renderer: RenderContext): BoxRenderable { + const container = new BoxRenderable(renderer, { + id: 'progress-panel', + border: true, + title: ' progress ', + flexDirection: 'column', + padding: 1, + gap: 0, + height: 5, + }) + + const fetchLine = new TextRenderable(renderer, { + id: 'fetch-progress', + content: 'waiting...', + }) + container.add(fetchLine) + + const embedLine = new TextRenderable(renderer, { + id: 'embed-progress', + content: '', + }) + container.add(embedLine) + + return container +} + +export function updateProgress( + container: BoxRenderable, + state: ProgressState +): void { + const percent = state.total > 0 ? Math.round((state.current / state.total) * 100) : 0 + const bar = createProgressBar(percent) + const text = `${state.message} ${state.current}/${state.total} ${bar} ${percent}%` + + if (state.phase === 'posts' || state.phase === 'comments') { + const fetchText = container.getRenderable('fetch-progress') as TextRenderable + if (fetchText) { + fetchText.content = text + } + } else if (state.phase === 'embedding') { + const embedText = container.getRenderable('embed-progress') as TextRenderable + if (embedText) { + embedText.content = text + } + } +} + +export function resetProgress(container: BoxRenderable): void { + const fetchText = container.getRenderable('fetch-progress') as TextRenderable + const embedText = container.getRenderable('embed-progress') as TextRenderable + + if (fetchText) { + fetchText.content = 'waiting...' + } + if (embedText) { + embedText.content = '' + } +} diff --git a/src/tui/components/search.ts b/src/tui/components/search.ts new file mode 100644 index 0000000..68a86c8 --- /dev/null +++ b/src/tui/components/search.ts @@ -0,0 +1,94 @@ +import { + BoxRenderable, + TextRenderable, + InputRenderable, + InputRenderableEvents, + type RenderContext, +} from '@opentui/core' +import type { SearchResult } from '../../storage/types' + +export interface SearchConfig { + onSearch: (query: string) => Promise +} + +export function createSearchPanel( + renderer: RenderContext, + config: SearchConfig +): BoxRenderable { + const container = new BoxRenderable(renderer, { + id: 'search-panel', + border: true, + title: ' search ', + flexDirection: 'column', + padding: 1, + gap: 1, + height: 12, + }) + + const queryRow = new BoxRenderable(renderer, { + id: 'query-row', + flexDirection: 'row', + gap: 1, + }) + + queryRow.add(new TextRenderable(renderer, { + id: 'query-label', + content: 'query:', + width: 7, + })) + + const queryInput = new InputRenderable(renderer, { + id: 'search-input', + width: 45, + placeholder: 'semantic search...', + cursorColor: '#00FF00', + focusedBackgroundColor: '#1a1a1a', + }) + + queryInput.on(InputRenderableEvents.CHANGE, async (value: string) => { + if (value.trim()) { + await config.onSearch(value) + } + }) + + queryRow.add(queryInput) + container.add(queryRow) + + container.add(new TextRenderable(renderer, { + id: 'results-label', + content: 'results:', + })) + + container.add(new TextRenderable(renderer, { + id: 'results-content', + content: '', + })) + + return container +} + +export function updateSearchResults( + container: BoxRenderable, + results: SearchResult[] +): void { + const resultsContent = container.getRenderable('results-content') as TextRenderable + if (!resultsContent) return + + if (results.length === 0) { + resultsContent.content = 'no results' + return + } + + const lines = results.slice(0, 5).map(result => { + const preview = (result.payload.title || result.payload.body || '') + .slice(0, 50) + .replace(/\n/g, ' ') + return `> "${preview}..." (${result.score.toFixed(2)})` + }).join('\n') + + resultsContent.content = lines +} + +export function focusSearch(container: BoxRenderable): void { + container.getRenderable('search-input')?.focus() +} diff --git a/src/tui/components/stats.ts b/src/tui/components/stats.ts new file mode 100644 index 0000000..49a320e --- /dev/null +++ b/src/tui/components/stats.ts @@ -0,0 +1,51 @@ +import { + BoxRenderable, + TextRenderable, + type RenderContext, +} from '@opentui/core' +import type { CollectionStats } from '../../storage/types' + +export function createStatsPanel(renderer: RenderContext): BoxRenderable { + const container = new BoxRenderable(renderer, { + id: 'stats-panel', + border: true, + title: ' stats ', + flexDirection: 'column', + padding: 1, + width: 20, + height: 7, + }) + + container.add(new TextRenderable(renderer, { + id: 'posts-count', + content: 'posts: 0', + })) + + container.add(new TextRenderable(renderer, { + id: 'comments-count', + content: 'comments: 0', + })) + + container.add(new TextRenderable(renderer, { + id: 'subreddits-count', + content: 'subreddits: 0', + })) + + return container +} + +export function updateStats(container: BoxRenderable, stats: CollectionStats): void { + const posts = container.getRenderable('posts-count') as TextRenderable + const comments = container.getRenderable('comments-count') as TextRenderable + const subreddits = container.getRenderable('subreddits-count') as TextRenderable + + if (posts) { + posts.content = `posts: ${stats.posts.toLocaleString()}` + } + if (comments) { + comments.content = `comments: ${stats.comments.toLocaleString()}` + } + if (subreddits) { + subreddits.content = `subreddits: ${stats.subreddits.length}` + } +} diff --git a/src/tui/components/trending.ts b/src/tui/components/trending.ts new file mode 100644 index 0000000..59efc61 --- /dev/null +++ b/src/tui/components/trending.ts @@ -0,0 +1,49 @@ +import { + BoxRenderable, + TextRenderable, + type RenderContext, +} from '@opentui/core' + +export interface TrendingTopic { + title: string + count: number + avgScore: number +} + +export function createTrendingPanel(renderer: RenderContext): BoxRenderable { + const container = new BoxRenderable(renderer, { + id: 'trending-panel', + border: true, + title: ' trending / status ', + flexDirection: 'column', + padding: 1, + height: 10, + }) + + container.add(new TextRenderable(renderer, { + id: 'trending-content', + content: 'scrape data to see trends', + })) + + return container +} + +export function updateTrending( + container: BoxRenderable, + topics: TrendingTopic[] +): void { + const contentText = container.getRenderable('trending-content') as TextRenderable + if (!contentText) return + + if (topics.length === 0) { + contentText.content = 'no trends found' + return + } + + const lines = topics + .slice(0, 8) + .map((topic, i) => `${i + 1}. ${topic.title}`) + .join('\n') + + contentText.content = lines +} diff --git a/src/tui/components/url-input.ts b/src/tui/components/url-input.ts new file mode 100644 index 0000000..c87d7bf --- /dev/null +++ b/src/tui/components/url-input.ts @@ -0,0 +1,115 @@ +import { + BoxRenderable, + TextRenderable, + InputRenderable, + InputRenderableEvents, + type RenderContext, +} from '@opentui/core' + +export interface UrlInputConfig { + onUrlSubmit: (url: string) => void + onPagesChange: (pages: number) => void + onPostsPerPageChange: (count: number) => void + onStartScrape: () => void +} + +export function createUrlInput(renderer: RenderContext, config: UrlInputConfig): BoxRenderable { + const container = new BoxRenderable(renderer, { + id: 'scrape-panel', + border: true, + title: ' scrape ', + flexDirection: 'column', + padding: 1, + gap: 1, + }) + + const urlRow = new BoxRenderable(renderer, { + id: 'url-row', + flexDirection: 'row', + gap: 1, + }) + + urlRow.add(new TextRenderable(renderer, { + id: 'url-label', + content: 'url:', + width: 5, + })) + + const urlInput = new InputRenderable(renderer, { + id: 'url-input', + width: 50, + placeholder: 'https://reddit.com/r/________/best', + cursorColor: '#00FF00', + focusedBackgroundColor: '#1a1a1a', + }) + + urlInput.on(InputRenderableEvents.CHANGE, (value: string) => { + config.onUrlSubmit(value) + }) + + urlRow.add(urlInput) + container.add(urlRow) + + const optionsRow = new BoxRenderable(renderer, { + id: 'options-row', + flexDirection: 'row', + gap: 2, + }) + + optionsRow.add(new TextRenderable(renderer, { + id: 'pages-label', + content: 'pages:', + })) + + const pagesInput = new InputRenderable(renderer, { + id: 'pages-input', + width: 5, + placeholder: '5', + cursorColor: '#00FF00', + focusedBackgroundColor: '#1a1a1a', + }) + + pagesInput.on(InputRenderableEvents.CHANGE, (value: string) => { + const num = parseInt(value, 10) + if (!isNaN(num) && num > 0) { + config.onPagesChange(num) + } + }) + + optionsRow.add(pagesInput) + + optionsRow.add(new TextRenderable(renderer, { + id: 'posts-label', + content: 'posts/page:', + })) + + const postsInput = new InputRenderable(renderer, { + id: 'posts-input', + width: 5, + placeholder: '100', + cursorColor: '#00FF00', + focusedBackgroundColor: '#1a1a1a', + }) + + postsInput.on(InputRenderableEvents.CHANGE, (value: string) => { + const num = parseInt(value, 10) + if (!isNaN(num) && num > 0) { + config.onPostsPerPageChange(num) + } + }) + + optionsRow.add(postsInput) + + optionsRow.add(new TextRenderable(renderer, { + id: 'start-hint', + content: '[enter to start]', + })) + + container.add(optionsRow) + + return container +} + +export function focusUrlInput(container: BoxRenderable): void { + container.getRenderable('url-input')?.focus() +} diff --git a/src/utils/rate-limit.ts b/src/utils/rate-limit.ts new file mode 100644 index 0000000..79bff13 --- /dev/null +++ b/src/utils/rate-limit.ts @@ -0,0 +1,66 @@ +export function delay(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)) +} + +export class RateLimiter { + private lastRequest = 0 + private minDelay: number + private backoffMultiplier = 1 + + constructor(minDelayMs: number = 5000) { + this.minDelay = minDelayMs + } + + async wait(): Promise { + const now = Date.now() + const effectiveDelay = this.minDelay * this.backoffMultiplier + const elapsed = now - this.lastRequest + if (elapsed < effectiveDelay) { + await delay(effectiveDelay - elapsed) + } + this.lastRequest = Date.now() + } + + backoff(): void { + this.backoffMultiplier = Math.min(this.backoffMultiplier * 2, 10) + console.log(` rate limited, backing off to ${this.minDelay * this.backoffMultiplier}ms`) + } + + reset(): void { + this.backoffMultiplier = 1 + } + + setDelay(ms: number): void { + this.minDelay = ms + } +} + +export async function fetchWithRetry( + url: string, + options: RequestInit, + rateLimiter: RateLimiter, + maxRetries: number = 3 +): Promise { + for (let attempt = 0; attempt <= maxRetries; attempt++) { + await rateLimiter.wait() + + const response = await fetch(url, options) + + if (response.status === 429) { + rateLimiter.backoff() + if (attempt < maxRetries) { + continue + } + throw new Error('Rate limited after max retries') + } + + if (!response.ok) { + throw new Error(`HTTP error: ${response.status} ${response.statusText}`) + } + + rateLimiter.reset() + return response.json() as Promise + } + + throw new Error('Max retries exceeded') +} diff --git a/src/utils/text.ts b/src/utils/text.ts new file mode 100644 index 0000000..121f82c --- /dev/null +++ b/src/utils/text.ts @@ -0,0 +1,69 @@ +const HTML_ENTITIES: Record = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + ''': "'", + ' ': ' ', + '​': '', +} + +export function decodeHtmlEntities(text: string): string { + let result = text + for (const [entity, char] of Object.entries(HTML_ENTITIES)) { + result = result.replaceAll(entity, char) + } + return result +} + +export function stripHtml(text: string): string { + return text + .replace(/<[^>]+>/g, '') + .replace(/\s+/g, ' ') + .trim() +} + +export function cleanText(text: string): string { + return decodeHtmlEntities(stripHtml(text)) + .replace(/\[deleted\]/gi, '') + .replace(/\[removed\]/gi, '') + .replace(/https?:\/\/\S+/g, '') + .replace(/\s+/g, ' ') + .trim() +} + +export function truncateText(text: string, maxLength: number = 8000): string { + if (text.length <= maxLength) return text + return text.slice(0, maxLength) + '...' +} + +export function prepareForEmbedding( + title: string, + body: string, + comments: string[] = [] +): string { + const parts: string[] = [] + + if (title) { + parts.push(`Title: ${cleanText(title)}`) + } + + if (body) { + const cleanBody = cleanText(body) + if (cleanBody) { + parts.push(`Content: ${cleanBody}`) + } + } + + if (comments.length > 0) { + const topComments = comments + .slice(0, 5) + .map(c => cleanText(c)) + .filter(c => c.length > 10) + if (topComments.length > 0) { + parts.push(`Discussion: ${topComments.join(' | ')}`) + } + } + + return truncateText(parts.join('\n\n')) +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..bfa0fea --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,29 @@ +{ + "compilerOptions": { + // Environment setup & latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +}