initial commit of CLI
This commit is contained in:
commit
fd4cad363b
3
.env.example
Normal file
3
.env.example
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
QDRANT_URL=https://vectors.biohazardvfx.com
|
||||||
|
QDRANT_API_KEY=your-api-key-here
|
||||||
|
OLLAMA_HOST=http://localhost:11434
|
||||||
39
.gitignore
vendored
Normal file
39
.gitignore
vendored
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# dependencies (bun install)
|
||||||
|
node_modules
|
||||||
|
|
||||||
|
# output
|
||||||
|
out
|
||||||
|
dist
|
||||||
|
*.tgz
|
||||||
|
|
||||||
|
# code coverage
|
||||||
|
coverage
|
||||||
|
*.lcov
|
||||||
|
|
||||||
|
# logs
|
||||||
|
logs
|
||||||
|
_.log
|
||||||
|
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
|
||||||
|
|
||||||
|
# dotenv environment variable files
|
||||||
|
.env
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# caches
|
||||||
|
.eslintcache
|
||||||
|
.cache
|
||||||
|
*.tsbuildinfo
|
||||||
|
|
||||||
|
# IntelliJ based IDEs
|
||||||
|
.idea
|
||||||
|
|
||||||
|
# Finder (MacOS) folder config
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# exported data
|
||||||
|
reddit-trends.json
|
||||||
|
reddit-trends.csv
|
||||||
|
.env
|
||||||
74
CLAUDE.md
Normal file
74
CLAUDE.md
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
reddit trend analyzer
|
||||||
|
===
|
||||||
|
|
||||||
|
a CLI tool that scrapes reddit discussions, embeds them with ollama, stores in qdrant, and provides a TUI dashboard for discovering common problems/trends.
|
||||||
|
|
||||||
|
running
|
||||||
|
---
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bun start # run the app
|
||||||
|
bun dev # run with watch mode
|
||||||
|
```
|
||||||
|
|
||||||
|
prerequisites
|
||||||
|
---
|
||||||
|
|
||||||
|
- ollama running locally with nomic-embed-text model (`ollama pull nomic-embed-text`)
|
||||||
|
- qdrant accessible at QDRANT_URL (or localhost:6333)
|
||||||
|
|
||||||
|
env vars
|
||||||
|
---
|
||||||
|
|
||||||
|
```
|
||||||
|
QDRANT_URL=https://vectors.biohazardvfx.com
|
||||||
|
QDRANT_API_KEY=<your-key>
|
||||||
|
OLLAMA_HOST=http://localhost:11434 # optional, defaults to this
|
||||||
|
```
|
||||||
|
|
||||||
|
architecture
|
||||||
|
---
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
index.ts # entry point, connection checks, TUI setup
|
||||||
|
scraper/
|
||||||
|
reddit.ts # fetch subreddit posts with pagination
|
||||||
|
comments.ts # fetch comments for each post
|
||||||
|
types.ts # reddit json response types
|
||||||
|
embeddings/
|
||||||
|
ollama.ts # batch embed text with nomic-embed-text (768 dims)
|
||||||
|
storage/
|
||||||
|
qdrant.ts # create collection, upsert, search
|
||||||
|
types.ts # point payload schema
|
||||||
|
tui/
|
||||||
|
app.ts # main dashboard, wires everything together
|
||||||
|
components/
|
||||||
|
url-input.ts # subreddit url input
|
||||||
|
progress.ts # scraping/embedding progress bars
|
||||||
|
stats.ts # collection stats panel
|
||||||
|
trending.ts # trending topics view
|
||||||
|
search.ts # semantic search interface
|
||||||
|
export.ts # export to json/csv
|
||||||
|
utils/
|
||||||
|
rate-limit.ts # delay helper for reddit api
|
||||||
|
text.ts # text preprocessing for embedding
|
||||||
|
```
|
||||||
|
|
||||||
|
keybindings
|
||||||
|
---
|
||||||
|
|
||||||
|
- `q` or `ctrl+c` - quit
|
||||||
|
- `enter` - start scrape (when url is entered)
|
||||||
|
- `tab` - switch between url and search inputs
|
||||||
|
- `e` - export results to json
|
||||||
|
- `c` - export results to csv
|
||||||
|
- `r` - refresh stats from qdrant
|
||||||
|
|
||||||
|
coding notes
|
||||||
|
---
|
||||||
|
|
||||||
|
- uses @opentui/core standalone (no react/solid)
|
||||||
|
- reddit rate limiting: 3s delay between requests
|
||||||
|
- embeddings batched in groups of 10
|
||||||
|
- qdrant collection: reddit_trends with indexes on subreddit, type, created, score
|
||||||
15
README.md
Normal file
15
README.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# reddit-trend-analyzer
|
||||||
|
|
||||||
|
To install dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bun install
|
||||||
|
```
|
||||||
|
|
||||||
|
To run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bun run index.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
This project was created using `bun init` in bun v1.3.5. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.
|
||||||
346
brief.md
Normal file
346
brief.md
Normal file
@ -0,0 +1,346 @@
|
|||||||
|
reddit trend analyzer
|
||||||
|
===
|
||||||
|
|
||||||
|
a tool for discovering common problems and questions in reddit communities to inform content strategy and tool development.
|
||||||
|
|
||||||
|
core goal
|
||||||
|
---
|
||||||
|
|
||||||
|
find what people struggle with most -> create content/tools that solve those problems -> organic SEO growth
|
||||||
|
|
||||||
|
tech stack
|
||||||
|
---
|
||||||
|
|
||||||
|
- vector database: qdrant
|
||||||
|
- embeddings: nomic-embed-text (ollama)
|
||||||
|
- framework: next.js
|
||||||
|
- components: shadcn
|
||||||
|
- charts: recharts (simple, shadcn-compatible)
|
||||||
|
- theme: shadcn tokens from globals.css inline theme ONLY
|
||||||
|
|
||||||
|
data pipeline
|
||||||
|
---
|
||||||
|
|
||||||
|
```
|
||||||
|
reddit scrape -> text cleaning -> embedding -> qdrant storage
|
||||||
|
|
|
||||||
|
clustering (HDBSCAN)
|
||||||
|
|
|
||||||
|
problem extraction (LLM)
|
||||||
|
|
|
||||||
|
frequency + engagement scoring
|
||||||
|
```
|
||||||
|
|
||||||
|
core features
|
||||||
|
---
|
||||||
|
|
||||||
|
**1. data ingestion**
|
||||||
|
|
||||||
|
existing CLI handles this well:
|
||||||
|
- scrape subreddit posts + comments
|
||||||
|
- embed with nomic-embed-text
|
||||||
|
- store in qdrant with metadata (score, created, subreddit, type)
|
||||||
|
|
||||||
|
**2. problem clustering**
|
||||||
|
|
||||||
|
the key feature. group similar discussions to surface recurring themes.
|
||||||
|
|
||||||
|
- cluster embeddings using HDBSCAN (density-based, handles noise well)
|
||||||
|
- extract cluster centroids as topic anchors
|
||||||
|
- LLM pass to generate human-readable problem statements from each cluster
|
||||||
|
- rank clusters by:
|
||||||
|
- size (discussion count)
|
||||||
|
- total engagement (sum of upvotes)
|
||||||
|
- recency (still being talked about?)
|
||||||
|
|
||||||
|
output example:
|
||||||
|
```
|
||||||
|
| problem | discussions | upvotes | last seen |
|
||||||
|
|----------------------------------------------|-------------|---------|-----------|
|
||||||
|
| users struggle with X when doing Y | 47 | 2.3k | 2d ago |
|
||||||
|
| confusion about how to configure Z | 31 | 890 | 1w ago |
|
||||||
|
| no good free alternative to [competitor] | 28 | 1.1k | 3d ago |
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. question extraction**
|
||||||
|
|
||||||
|
pull out actual questions people ask.
|
||||||
|
|
||||||
|
- pattern matching: "how do I", "why does", "is there a way to", "what's the best", etc.
|
||||||
|
- deduplicate semantically similar questions (vector similarity > 0.9)
|
||||||
|
- rank by engagement
|
||||||
|
- group under parent problem clusters
|
||||||
|
|
||||||
|
output: FAQ-ready list for blog posts, docs, or schema markup
|
||||||
|
|
||||||
|
**4. search + explore**
|
||||||
|
|
||||||
|
- semantic search across all scraped content
|
||||||
|
- filter by: subreddit, date range, min upvotes, type (post/comment)
|
||||||
|
- click through to original reddit discussions
|
||||||
|
|
||||||
|
**5. export**
|
||||||
|
|
||||||
|
- problem clusters as markdown content briefs
|
||||||
|
- questions as FAQ schema (json-ld ready)
|
||||||
|
- csv for spreadsheet analysis
|
||||||
|
- raw json for custom processing
|
||||||
|
|
||||||
|
dashboard views
|
||||||
|
---
|
||||||
|
|
||||||
|
**home / stats**
|
||||||
|
|
||||||
|
simple overview:
|
||||||
|
- total posts/comments in db
|
||||||
|
- subreddits being tracked
|
||||||
|
- problem clusters identified
|
||||||
|
- recent scrape activity
|
||||||
|
|
||||||
|
**problem explorer** (main view)
|
||||||
|
|
||||||
|
sortable/filterable table of problem clusters:
|
||||||
|
- columns: problem summary, discussion count, total upvotes, avg sentiment, last active
|
||||||
|
- expand row -> sample discussions + extracted questions
|
||||||
|
- select multiple -> bulk export as content briefs
|
||||||
|
- search within problems
|
||||||
|
|
||||||
|
**question bank**
|
||||||
|
|
||||||
|
all extracted questions:
|
||||||
|
- grouped by parent problem cluster (collapsible)
|
||||||
|
- search/filter
|
||||||
|
- copy as json-ld FAQ schema
|
||||||
|
- mark as "addressed" when content exists
|
||||||
|
|
||||||
|
**scrape manager**
|
||||||
|
|
||||||
|
- list of tracked subreddits
|
||||||
|
- manual scrape trigger
|
||||||
|
- scrape history with stats
|
||||||
|
- add/remove subreddits
|
||||||
|
|
||||||
|
|
||||||
|
To give the user "Ultimate Control," the dashboard should include:
|
||||||
|
|
||||||
|
1. **Similarity Sensitivity Slider:** A global control that adjusts how strict the vector database is. Lower similarity = more broad, creative connections. Higher similarity = more specific, literal results.
|
||||||
|
2. **The "Impact Score" Weighting:** Allow users to toggle what "Importance" means to them. Is it **Upvote Count**? **Sentiment Extremity**? Or **Topic Velocity**? Adjusting these weights should re-order the "Competitor Hijack" table in real-time.
|
||||||
|
3. **Command Palette:** Instead of clicking through menus, a "Ctrl + K" command bar allows the user to type "Find gaps in comparison intent" to instantly update the visualizations.
|
||||||
|
|
||||||
|
implementation phases
|
||||||
|
---
|
||||||
|
|
||||||
|
**phase 1: clustering + extraction (backend)**
|
||||||
|
|
||||||
|
- [ ] add HDBSCAN clustering to pipeline
|
||||||
|
- [ ] LLM integration for problem summarization (claude or local)
|
||||||
|
- [ ] question extraction with pattern matching + dedup
|
||||||
|
- [ ] store clusters in qdrant (or sqlite sidecar)
|
||||||
|
- [ ] CLI commands: `cluster`, `problems`, `questions`
|
||||||
|
|
||||||
|
**phase 2: web UI**
|
||||||
|
|
||||||
|
- [ ] next.js app with shadcn
|
||||||
|
- [ ] problem explorer table (tanstack table)
|
||||||
|
- [ ] question bank view
|
||||||
|
- [ ] semantic search
|
||||||
|
- [ ] export functionality
|
||||||
|
- [ ] basic stats dashboard
|
||||||
|
|
||||||
|
**phase 3: polish**
|
||||||
|
|
||||||
|
- [ ] scheduled/recurring scrapes
|
||||||
|
- [ ] better semantic deduplication
|
||||||
|
- [ ] sentiment scoring (optional)
|
||||||
|
- [ ] "addressed" tracking (link to published content)
|
||||||
|
|
||||||
|
env vars
|
||||||
|
---
|
||||||
|
|
||||||
|
```
|
||||||
|
QDRANT_URL=https://vectors.biohazardvfx.com
|
||||||
|
QDRANT_API_KEY=<key>
|
||||||
|
OLLAMA_HOST=http://localhost:11434
|
||||||
|
ANTHROPIC_API_KEY=<key> # for problem summarization
|
||||||
|
```
|
||||||
|
|
||||||
|
success criteria
|
||||||
|
---
|
||||||
|
|
||||||
|
tool is working if:
|
||||||
|
- we can identify 10+ distinct problems from a subreddit scrape
|
||||||
|
- problem summaries are actionable (could write a blog post about it)
|
||||||
|
- question extraction gives us real FAQs people are asking
|
||||||
|
- export format is immediately usable for content planning
|
||||||
|
|
||||||
|
everything else is nice-to-have.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
theme (globals.css)
|
||||||
|
---
|
||||||
|
|
||||||
|
```css
|
||||||
|
:root {
|
||||||
|
--background: oklch(0.9551 0 0);
|
||||||
|
--foreground: oklch(0.3211 0 0);
|
||||||
|
--card: oklch(0.9702 0 0);
|
||||||
|
--card-foreground: oklch(0.3211 0 0);
|
||||||
|
--popover: oklch(0.9702 0 0);
|
||||||
|
--popover-foreground: oklch(0.3211 0 0);
|
||||||
|
--primary: oklch(0.4891 0 0);
|
||||||
|
--primary-foreground: oklch(1.0000 0 0);
|
||||||
|
--secondary: oklch(0.9067 0 0);
|
||||||
|
--secondary-foreground: oklch(0.3211 0 0);
|
||||||
|
--muted: oklch(0.8853 0 0);
|
||||||
|
--muted-foreground: oklch(0.5103 0 0);
|
||||||
|
--accent: oklch(0.8078 0 0);
|
||||||
|
--accent-foreground: oklch(0.3211 0 0);
|
||||||
|
--destructive: oklch(0.5594 0.1900 25.8625);
|
||||||
|
--destructive-foreground: oklch(1.0000 0 0);
|
||||||
|
--border: oklch(0.8576 0 0);
|
||||||
|
--input: oklch(0.9067 0 0);
|
||||||
|
--ring: oklch(0.4891 0 0);
|
||||||
|
--chart-1: oklch(0.4891 0 0);
|
||||||
|
--chart-2: oklch(0.4863 0.0361 196.0278);
|
||||||
|
--chart-3: oklch(0.6534 0 0);
|
||||||
|
--chart-4: oklch(0.7316 0 0);
|
||||||
|
--chart-5: oklch(0.8078 0 0);
|
||||||
|
--sidebar: oklch(0.9370 0 0);
|
||||||
|
--sidebar-foreground: oklch(0.3211 0 0);
|
||||||
|
--sidebar-primary: oklch(0.4891 0 0);
|
||||||
|
--sidebar-primary-foreground: oklch(1.0000 0 0);
|
||||||
|
--sidebar-accent: oklch(0.8078 0 0);
|
||||||
|
--sidebar-accent-foreground: oklch(0.3211 0 0);
|
||||||
|
--sidebar-border: oklch(0.8576 0 0);
|
||||||
|
--sidebar-ring: oklch(0.4891 0 0);
|
||||||
|
--font-sans: Montserrat, sans-serif;
|
||||||
|
--font-serif: Georgia, serif;
|
||||||
|
--font-mono: Fira Code, monospace;
|
||||||
|
--radius: 0.35rem;
|
||||||
|
--shadow-x: 0px;
|
||||||
|
--shadow-y: 2px;
|
||||||
|
--shadow-blur: 0px;
|
||||||
|
--shadow-spread: 0px;
|
||||||
|
--shadow-opacity: 0.15;
|
||||||
|
--shadow-color: hsl(0 0% 20% / 0.1);
|
||||||
|
--shadow-2xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07);
|
||||||
|
--shadow-xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07);
|
||||||
|
--shadow-sm: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-md: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 2px 4px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-lg: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 4px 6px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 8px 10px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-2xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.38);
|
||||||
|
--tracking-normal: 0em;
|
||||||
|
--spacing: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dark {
|
||||||
|
--background: oklch(0.2178 0 0);
|
||||||
|
--foreground: oklch(0.8853 0 0);
|
||||||
|
--card: oklch(0.2435 0 0);
|
||||||
|
--card-foreground: oklch(0.8853 0 0);
|
||||||
|
--popover: oklch(0.2435 0 0);
|
||||||
|
--popover-foreground: oklch(0.8853 0 0);
|
||||||
|
--primary: oklch(0.7058 0 0);
|
||||||
|
--primary-foreground: oklch(0.2178 0 0);
|
||||||
|
--secondary: oklch(0.3092 0 0);
|
||||||
|
--secondary-foreground: oklch(0.8853 0 0);
|
||||||
|
--muted: oklch(0.2850 0 0);
|
||||||
|
--muted-foreground: oklch(0.5999 0 0);
|
||||||
|
--accent: oklch(0.3715 0 0);
|
||||||
|
--accent-foreground: oklch(0.8853 0 0);
|
||||||
|
--destructive: oklch(0.6591 0.1530 22.1703);
|
||||||
|
--destructive-foreground: oklch(1.0000 0 0);
|
||||||
|
--border: oklch(0.3290 0 0);
|
||||||
|
--input: oklch(0.3092 0 0);
|
||||||
|
--ring: oklch(0.7058 0 0);
|
||||||
|
--chart-1: oklch(0.7058 0 0);
|
||||||
|
--chart-2: oklch(0.6714 0.0339 206.3482);
|
||||||
|
--chart-3: oklch(0.5452 0 0);
|
||||||
|
--chart-4: oklch(0.4604 0 0);
|
||||||
|
--chart-5: oklch(0.3715 0 0);
|
||||||
|
--sidebar: oklch(0.2393 0 0);
|
||||||
|
--sidebar-foreground: oklch(0.8853 0 0);
|
||||||
|
--sidebar-primary: oklch(0.7058 0 0);
|
||||||
|
--sidebar-primary-foreground: oklch(0.2178 0 0);
|
||||||
|
--sidebar-accent: oklch(0.3715 0 0);
|
||||||
|
--sidebar-accent-foreground: oklch(0.8853 0 0);
|
||||||
|
--sidebar-border: oklch(0.3290 0 0);
|
||||||
|
--sidebar-ring: oklch(0.7058 0 0);
|
||||||
|
--font-sans: Inter, sans-serif;
|
||||||
|
--font-serif: Georgia, serif;
|
||||||
|
--font-mono: Fira Code, monospace;
|
||||||
|
--radius: 0.35rem;
|
||||||
|
--shadow-x: 0px;
|
||||||
|
--shadow-y: 2px;
|
||||||
|
--shadow-blur: 0px;
|
||||||
|
--shadow-spread: 0px;
|
||||||
|
--shadow-opacity: 0.15;
|
||||||
|
--shadow-color: hsl(0 0% 20% / 0.1);
|
||||||
|
--shadow-2xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07);
|
||||||
|
--shadow-xs: 0px 2px 0px 0px hsl(0 0% 20% / 0.07);
|
||||||
|
--shadow-sm: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 1px 2px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-md: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 2px 4px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-lg: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 4px 6px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.15), 0px 8px 10px -1px hsl(0 0% 20% / 0.15);
|
||||||
|
--shadow-2xl: 0px 2px 0px 0px hsl(0 0% 20% / 0.38);
|
||||||
|
}
|
||||||
|
|
||||||
|
@theme inline {
|
||||||
|
--color-background: var(--background);
|
||||||
|
--color-foreground: var(--foreground);
|
||||||
|
--color-card: var(--card);
|
||||||
|
--color-card-foreground: var(--card-foreground);
|
||||||
|
--color-popover: var(--popover);
|
||||||
|
--color-popover-foreground: var(--popover-foreground);
|
||||||
|
--color-primary: var(--primary);
|
||||||
|
--color-primary-foreground: var(--primary-foreground);
|
||||||
|
--color-secondary: var(--secondary);
|
||||||
|
--color-secondary-foreground: var(--secondary-foreground);
|
||||||
|
--color-muted: var(--muted);
|
||||||
|
--color-muted-foreground: var(--muted-foreground);
|
||||||
|
--color-accent: var(--accent);
|
||||||
|
--color-accent-foreground: var(--accent-foreground);
|
||||||
|
--color-destructive: var(--destructive);
|
||||||
|
--color-destructive-foreground: var(--destructive-foreground);
|
||||||
|
--color-border: var(--border);
|
||||||
|
--color-input: var(--input);
|
||||||
|
--color-ring: var(--ring);
|
||||||
|
--color-chart-1: var(--chart-1);
|
||||||
|
--color-chart-2: var(--chart-2);
|
||||||
|
--color-chart-3: var(--chart-3);
|
||||||
|
--color-chart-4: var(--chart-4);
|
||||||
|
--color-chart-5: var(--chart-5);
|
||||||
|
--color-sidebar: var(--sidebar);
|
||||||
|
--color-sidebar-foreground: var(--sidebar-foreground);
|
||||||
|
--color-sidebar-primary: var(--sidebar-primary);
|
||||||
|
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
|
||||||
|
--color-sidebar-accent: var(--sidebar-accent);
|
||||||
|
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
|
||||||
|
--color-sidebar-border: var(--sidebar-border);
|
||||||
|
--color-sidebar-ring: var(--sidebar-ring);
|
||||||
|
|
||||||
|
--font-sans: var(--font-sans);
|
||||||
|
--font-mono: var(--font-mono);
|
||||||
|
--font-serif: var(--font-serif);
|
||||||
|
|
||||||
|
--radius-sm: calc(var(--radius) - 4px);
|
||||||
|
--radius-md: calc(var(--radius) - 2px);
|
||||||
|
--radius-lg: var(--radius);
|
||||||
|
--radius-xl: calc(var(--radius) + 4px);
|
||||||
|
|
||||||
|
--shadow-2xs: var(--shadow-2xs);
|
||||||
|
--shadow-xs: var(--shadow-xs);
|
||||||
|
--shadow-sm: var(--shadow-sm);
|
||||||
|
--shadow: var(--shadow);
|
||||||
|
--shadow-md: var(--shadow-md);
|
||||||
|
--shadow-lg: var(--shadow-lg);
|
||||||
|
--shadow-xl: var(--shadow-xl);
|
||||||
|
--shadow-2xl: var(--shadow-2xl);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
223
bun.lock
Normal file
223
bun.lock
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
{
|
||||||
|
"lockfileVersion": 1,
|
||||||
|
"configVersion": 1,
|
||||||
|
"workspaces": {
|
||||||
|
"": {
|
||||||
|
"name": "reddit-trend-analyzer",
|
||||||
|
"dependencies": {
|
||||||
|
"@opentui/core": "^0.1.74",
|
||||||
|
"@qdrant/js-client-rest": "^1.16.2",
|
||||||
|
"ollama": "^0.6.3",
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/bun": "latest",
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"typescript": "^5",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"packages": {
|
||||||
|
"@dimforge/rapier2d-simd-compat": ["@dimforge/rapier2d-simd-compat@0.17.3", "", {}, "sha512-bijvwWz6NHsNj5e5i1vtd3dU2pDhthSaTUZSh14DUGGKJfw8eMnlWZsxwHBxB/a3AXVNDjL9abuHw1k9FGR+jg=="],
|
||||||
|
|
||||||
|
"@jimp/core": ["@jimp/core@1.6.0", "", { "dependencies": { "@jimp/file-ops": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "await-to-js": "^3.0.0", "exif-parser": "^0.1.12", "file-type": "^16.0.0", "mime": "3" } }, "sha512-EQQlKU3s9QfdJqiSrZWNTxBs3rKXgO2W+GxNXDtwchF3a4IqxDheFX1ti+Env9hdJXDiYLp2jTRjlxhPthsk8w=="],
|
||||||
|
|
||||||
|
"@jimp/diff": ["@jimp/diff@1.6.0", "", { "dependencies": { "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "pixelmatch": "^5.3.0" } }, "sha512-+yUAQ5gvRC5D1WHYxjBHZI7JBRusGGSLf8AmPRPCenTzh4PA+wZ1xv2+cYqQwTfQHU5tXYOhA0xDytfHUf1Zyw=="],
|
||||||
|
|
||||||
|
"@jimp/file-ops": ["@jimp/file-ops@1.6.0", "", {}, "sha512-Dx/bVDmgnRe1AlniRpCKrGRm5YvGmUwbDzt+MAkgmLGf+jvBT75hmMEZ003n9HQI/aPnm/YKnXjg/hOpzNCpHQ=="],
|
||||||
|
|
||||||
|
"@jimp/js-bmp": ["@jimp/js-bmp@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "bmp-ts": "^1.0.9" } }, "sha512-FU6Q5PC/e3yzLyBDXupR3SnL3htU7S3KEs4e6rjDP6gNEOXRFsWs6YD3hXuXd50jd8ummy+q2WSwuGkr8wi+Gw=="],
|
||||||
|
|
||||||
|
"@jimp/js-gif": ["@jimp/js-gif@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "gifwrap": "^0.10.1", "omggif": "^1.0.10" } }, "sha512-N9CZPHOrJTsAUoWkWZstLPpwT5AwJ0wge+47+ix3++SdSL/H2QzyMqxbcDYNFe4MoI5MIhATfb0/dl/wmX221g=="],
|
||||||
|
|
||||||
|
"@jimp/js-jpeg": ["@jimp/js-jpeg@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "jpeg-js": "^0.4.4" } }, "sha512-6vgFDqeusblf5Pok6B2DUiMXplH8RhIKAryj1yn+007SIAQ0khM1Uptxmpku/0MfbClx2r7pnJv9gWpAEJdMVA=="],
|
||||||
|
|
||||||
|
"@jimp/js-png": ["@jimp/js-png@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "pngjs": "^7.0.0" } }, "sha512-AbQHScy3hDDgMRNfG0tPjL88AV6qKAILGReIa3ATpW5QFjBKpisvUaOqhzJ7Reic1oawx3Riyv152gaPfqsBVg=="],
|
||||||
|
|
||||||
|
"@jimp/js-tiff": ["@jimp/js-tiff@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "utif2": "^4.1.0" } }, "sha512-zhReR8/7KO+adijj3h0ZQUOiun3mXUv79zYEAKvE0O+rP7EhgtKvWJOZfRzdZSNv0Pu1rKtgM72qgtwe2tFvyw=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-blit": ["@jimp/plugin-blit@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-M+uRWl1csi7qilnSK8uxK4RJMSuVeBiO1AY0+7APnfUbQNZm6hCe0CCFv1Iyw1D/Dhb8ph8fQgm5mwM0eSxgVA=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-blur": ["@jimp/plugin-blur@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/utils": "1.6.0" } }, "sha512-zrM7iic1OTwUCb0g/rN5y+UnmdEsT3IfuCXCJJNs8SZzP0MkZ1eTvuwK9ZidCuMo4+J3xkzCidRwYXB5CyGZTw=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-circle": ["@jimp/plugin-circle@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-xt1Gp+LtdMKAXfDp3HNaG30SPZW6AQ7dtAtTnoRKorRi+5yCJjKqXRgkewS5bvj8DEh87Ko1ydJfzqS3P2tdWw=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-color": ["@jimp/plugin-color@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "tinycolor2": "^1.6.0", "zod": "^3.23.8" } }, "sha512-J5q8IVCpkBsxIXM+45XOXTrsyfblyMZg3a9eAo0P7VPH4+CrvyNQwaYatbAIamSIN1YzxmO3DkIZXzRjFSz1SA=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-contain": ["@jimp/plugin-contain@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-blit": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-oN/n+Vdq/Qg9bB4yOBOxtY9IPAtEfES8J1n9Ddx+XhGBYT1/QTU/JYkGaAkIGoPnyYvmLEDqMz2SGihqlpqfzQ=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-cover": ["@jimp/plugin-cover@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-crop": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-Iow0h6yqSC269YUJ8HC3Q/MpCi2V55sMlbkkTTx4zPvd8mWZlC0ykrNDeAy9IJegrQ7v5E99rJwmQu25lygKLA=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-crop": ["@jimp/plugin-crop@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-KqZkEhvs+21USdySCUDI+GFa393eDIzbi1smBqkUPTE+pRwSWMAf01D5OC3ZWB+xZsNla93BDS9iCkLHA8wang=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-displace": ["@jimp/plugin-displace@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-4Y10X9qwr5F+Bo5ME356XSACEF55485j5nGdiyJ9hYzjQP9nGgxNJaZ4SAOqpd+k5sFaIeD7SQ0Occ26uIng5Q=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-dither": ["@jimp/plugin-dither@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0" } }, "sha512-600d1RxY0pKwgyU0tgMahLNKsqEcxGdbgXadCiVCoGd6V6glyCvkNrnnwC0n5aJ56Htkj88PToSdF88tNVZEEQ=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-fisheye": ["@jimp/plugin-fisheye@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-E5QHKWSCBFtpgZarlmN3Q6+rTQxjirFqo44ohoTjzYVrDI6B6beXNnPIThJgPr0Y9GwfzgyarKvQuQuqCnnfbA=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-flip": ["@jimp/plugin-flip@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-/+rJVDuBIVOgwoyVkBjUFHtP+wmW0r+r5OQ2GpatQofToPVbJw1DdYWXlwviSx7hvixTWLKVgRWQ5Dw862emDg=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-hash": ["@jimp/plugin-hash@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/js-bmp": "1.6.0", "@jimp/js-jpeg": "1.6.0", "@jimp/js-png": "1.6.0", "@jimp/js-tiff": "1.6.0", "@jimp/plugin-color": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "any-base": "^1.1.0" } }, "sha512-wWzl0kTpDJgYVbZdajTf+4NBSKvmI3bRI8q6EH9CVeIHps9VWVsUvEyb7rpbcwVLWYuzDtP2R0lTT6WeBNQH9Q=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-mask": ["@jimp/plugin-mask@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-Cwy7ExSJMZszvkad8NV8o/Z92X2kFUFM8mcDAhNVxU0Q6tA0op2UKRJY51eoK8r6eds/qak3FQkXakvNabdLnA=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-print": ["@jimp/plugin-print@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/js-jpeg": "1.6.0", "@jimp/js-png": "1.6.0", "@jimp/plugin-blit": "1.6.0", "@jimp/types": "1.6.0", "parse-bmfont-ascii": "^1.0.6", "parse-bmfont-binary": "^1.0.6", "parse-bmfont-xml": "^1.1.6", "simple-xml-to-json": "^1.2.2", "zod": "^3.23.8" } }, "sha512-zarTIJi8fjoGMSI/M3Xh5yY9T65p03XJmPsuNet19K/Q7mwRU6EV2pfj+28++2PV2NJ+htDF5uecAlnGyxFN2A=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-quantize": ["@jimp/plugin-quantize@1.6.0", "", { "dependencies": { "image-q": "^4.0.0", "zod": "^3.23.8" } }, "sha512-EmzZ/s9StYQwbpG6rUGBCisc3f64JIhSH+ncTJd+iFGtGo0YvSeMdAd+zqgiHpfZoOL54dNavZNjF4otK+mvlg=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-resize": ["@jimp/plugin-resize@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/types": "1.6.0", "zod": "^3.23.8" } }, "sha512-uSUD1mqXN9i1SGSz5ov3keRZ7S9L32/mAQG08wUwZiEi5FpbV0K8A8l1zkazAIZi9IJzLlTauRNU41Mi8IF9fA=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-rotate": ["@jimp/plugin-rotate@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-crop": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-JagdjBLnUZGSG4xjCLkIpQOZZ3Mjbg8aGCCi4G69qR+OjNpOeGI7N2EQlfK/WE8BEHOW5vdjSyglNqcYbQBWRw=="],
|
||||||
|
|
||||||
|
"@jimp/plugin-threshold": ["@jimp/plugin-threshold@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/plugin-color": "1.6.0", "@jimp/plugin-hash": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0", "zod": "^3.23.8" } }, "sha512-M59m5dzLoHOVWdM41O8z9SyySzcDn43xHseOH0HavjsfQsT56GGCC4QzU1banJidbUrePhzoEdS42uFE8Fei8w=="],
|
||||||
|
|
||||||
|
"@jimp/types": ["@jimp/types@1.6.0", "", { "dependencies": { "zod": "^3.23.8" } }, "sha512-7UfRsiKo5GZTAATxm2qQ7jqmUXP0DxTArztllTcYdyw6Xi5oT4RaoXynVtCD4UyLK5gJgkZJcwonoijrhYFKfg=="],
|
||||||
|
|
||||||
|
"@jimp/utils": ["@jimp/utils@1.6.0", "", { "dependencies": { "@jimp/types": "1.6.0", "tinycolor2": "^1.6.0" } }, "sha512-gqFTGEosKbOkYF/WFj26jMHOI5OH2jeP1MmC/zbK6BF6VJBf8rIC5898dPfSzZEbSA0wbbV5slbntWVc5PKLFA=="],
|
||||||
|
|
||||||
|
"@opentui/core": ["@opentui/core@0.1.74", "", { "dependencies": { "bun-ffi-structs": "0.1.2", "diff": "8.0.2", "jimp": "1.6.0", "yoga-layout": "3.2.1" }, "optionalDependencies": { "@dimforge/rapier2d-simd-compat": "^0.17.3", "@opentui/core-darwin-arm64": "0.1.74", "@opentui/core-darwin-x64": "0.1.74", "@opentui/core-linux-arm64": "0.1.74", "@opentui/core-linux-x64": "0.1.74", "@opentui/core-win32-arm64": "0.1.74", "@opentui/core-win32-x64": "0.1.74", "bun-webgpu": "0.1.4", "planck": "^1.4.2", "three": "0.177.0" }, "peerDependencies": { "web-tree-sitter": "0.25.10" } }, "sha512-g4W16ymv12JdgZ+9B4t7mpIICvzWy2+eHERfmDf80ALduOQCUedKQdULcBFhVCYUXIkDRtIy6CID5thMAah3FA=="],
|
||||||
|
|
||||||
|
"@opentui/core-darwin-arm64": ["@opentui/core-darwin-arm64@0.1.74", "", { "os": "darwin", "cpu": "arm64" }, "sha512-rfmlDLtm/u17CnuhJgCxPeYMvOST+A2MOdVOk46IurtHO849bdYqK6iudKNlFRs1FOrymgSKF9GlWBHAOKeRjg=="],
|
||||||
|
|
||||||
|
"@opentui/core-darwin-x64": ["@opentui/core-darwin-x64@0.1.74", "", { "os": "darwin", "cpu": "x64" }, "sha512-WAD8orsDV0ZdW/5GwjOOB4FY96772xbkz+rcV7WRzEFUVaqoBaC04IuqYzS9d5s+cjkbT5Cpj47hrVYkkVQKng=="],
|
||||||
|
|
||||||
|
"@opentui/core-linux-arm64": ["@opentui/core-linux-arm64@0.1.74", "", { "os": "linux", "cpu": "arm64" }, "sha512-lgmHzrzLy4e+rgBS+lhtsMLLgIMLbtLNMm6EzVPyYVDlLDGjM7+ulXMem7AtpaRrWrUUl4REiG9BoQUsCFDwYA=="],
|
||||||
|
|
||||||
|
"@opentui/core-linux-x64": ["@opentui/core-linux-x64@0.1.74", "", { "os": "linux", "cpu": "x64" }, "sha512-8Mn2WbdBQ29xCThuPZezjDhd1N3+fXwKkGvCBOdTI0le6h2A/vCNbfUVjwfr/EGZSRXxCG+Yapol34BAULGpOA=="],
|
||||||
|
|
||||||
|
"@opentui/core-win32-arm64": ["@opentui/core-win32-arm64@0.1.74", "", { "os": "win32", "cpu": "arm64" }, "sha512-dvYUXz03avnI6ZluyLp00HPmR0UT/IE/6QS97XBsgJlUTtpnbKkBtB5jD1NHwWkElaRj1Qv2QP36ngFoJqbl9g=="],
|
||||||
|
|
||||||
|
"@opentui/core-win32-x64": ["@opentui/core-win32-x64@0.1.74", "", { "os": "win32", "cpu": "x64" }, "sha512-3wfWXaAKOIlDQz6ZZIESf2M+YGZ7uFHijjTEM8w/STRlLw8Y6+QyGYi1myHSM4d6RSO+/s2EMDxvjDf899W9vQ=="],
|
||||||
|
|
||||||
|
"@qdrant/js-client-rest": ["@qdrant/js-client-rest@1.16.2", "", { "dependencies": { "@qdrant/openapi-typescript-fetch": "1.2.6", "undici": "^6.0.0" }, "peerDependencies": { "typescript": ">=4.7" } }, "sha512-Zm4wEZURrZ24a+Hmm4l1QQYjiz975Ep3vF0yzWR7ICGcxittNz47YK2iBOk8kb8qseCu8pg7WmO1HOIsO8alvw=="],
|
||||||
|
|
||||||
|
"@qdrant/openapi-typescript-fetch": ["@qdrant/openapi-typescript-fetch@1.2.6", "", {}, "sha512-oQG/FejNpItrxRHoyctYvT3rwGZOnK4jr3JdppO/c78ktDvkWiPXPHNsrDf33K9sZdRb6PR7gi4noIapu5q4HA=="],
|
||||||
|
|
||||||
|
"@tokenizer/token": ["@tokenizer/token@0.3.0", "", {}, "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="],
|
||||||
|
|
||||||
|
"@types/bun": ["@types/bun@1.3.6", "", { "dependencies": { "bun-types": "1.3.6" } }, "sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA=="],
|
||||||
|
|
||||||
|
"@types/node": ["@types/node@25.0.9", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-/rpCXHlCWeqClNBwUhDcusJxXYDjZTyE8v5oTO7WbL8eij2nKhUeU89/6xgjU7N4/Vh3He0BtyhJdQbDyhiXAw=="],
|
||||||
|
|
||||||
|
"@webgpu/types": ["@webgpu/types@0.1.69", "", {}, "sha512-RPmm6kgRbI8e98zSD3RVACvnuktIja5+yLgDAkTmxLr90BEwdTXRQWNLF3ETTTyH/8mKhznZuN5AveXYFEsMGQ=="],
|
||||||
|
|
||||||
|
"abort-controller": ["abort-controller@3.0.0", "", { "dependencies": { "event-target-shim": "^5.0.0" } }, "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg=="],
|
||||||
|
|
||||||
|
"any-base": ["any-base@1.1.0", "", {}, "sha512-uMgjozySS8adZZYePpaWs8cxB9/kdzmpX6SgJZ+wbz1K5eYk5QMYDVJaZKhxyIHUdnnJkfR7SVgStgH7LkGUyg=="],
|
||||||
|
|
||||||
|
"await-to-js": ["await-to-js@3.0.0", "", {}, "sha512-zJAaP9zxTcvTHRlejau3ZOY4V7SRpiByf3/dxx2uyKxxor19tpmpV2QRsTKikckwhaPmr2dVpxxMr7jOCYVp5g=="],
|
||||||
|
|
||||||
|
"base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="],
|
||||||
|
|
||||||
|
"bmp-ts": ["bmp-ts@1.0.9", "", {}, "sha512-cTEHk2jLrPyi+12M3dhpEbnnPOsaZuq7C45ylbbQIiWgDFZq4UVYPEY5mlqjvsj/6gJv9qX5sa+ebDzLXT28Vw=="],
|
||||||
|
|
||||||
|
"buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="],
|
||||||
|
|
||||||
|
"bun-ffi-structs": ["bun-ffi-structs@0.1.2", "", { "peerDependencies": { "typescript": "^5" } }, "sha512-Lh1oQAYHDcnesJauieA4UNkWGXY9hYck7OA5IaRwE3Bp6K2F2pJSNYqq+hIy7P3uOvo3km3oxS8304g5gDMl/w=="],
|
||||||
|
|
||||||
|
"bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="],
|
||||||
|
|
||||||
|
"bun-webgpu": ["bun-webgpu@0.1.4", "", { "dependencies": { "@webgpu/types": "^0.1.60" }, "optionalDependencies": { "bun-webgpu-darwin-arm64": "^0.1.4", "bun-webgpu-darwin-x64": "^0.1.4", "bun-webgpu-linux-x64": "^0.1.4", "bun-webgpu-win32-x64": "^0.1.4" } }, "sha512-Kw+HoXl1PMWJTh9wvh63SSRofTA8vYBFCw0XEP1V1fFdQEDhI8Sgf73sdndE/oDpN/7CMx0Yv/q8FCvO39ROMQ=="],
|
||||||
|
|
||||||
|
"bun-webgpu-darwin-arm64": ["bun-webgpu-darwin-arm64@0.1.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-eDgLN9teKTfmvrCqgwwmWNsNszxYs7IZdCqk0S1DCarvMhr4wcajoSBlA/nQA0/owwLduPTS8xxCnQp4/N/gDg=="],
|
||||||
|
|
||||||
|
"bun-webgpu-darwin-x64": ["bun-webgpu-darwin-x64@0.1.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-X+PjwJUWenUmdQBP8EtdItMyieQ6Nlpn+BH518oaouDiSnWj5+b0Y7DNDZJq7Ezom4EaxmqL/uGYZK3aCQ7CXg=="],
|
||||||
|
|
||||||
|
"bun-webgpu-linux-x64": ["bun-webgpu-linux-x64@0.1.4", "", { "os": "linux", "cpu": "x64" }, "sha512-zMLs2YIGB+/jxrYFXaFhVKX/GBt05UTF45lc9srcHc9JXGjEj+12CIo1CHLTAWatXMTqt0Jsu6ukWEoWVT/ayA=="],
|
||||||
|
|
||||||
|
"bun-webgpu-win32-x64": ["bun-webgpu-win32-x64@0.1.4", "", { "os": "win32", "cpu": "x64" }, "sha512-Z5yAK28xrcm8Wb5k7TZ8FJKpOI/r+aVCRdlHYAqI2SDJFN3nD4mJs900X6kNVmG/xFzb5yOuKVYWGg+6ZXWbyA=="],
|
||||||
|
|
||||||
|
"diff": ["diff@8.0.2", "", {}, "sha512-sSuxWU5j5SR9QQji/o2qMvqRNYRDOcBTgsJ/DeCf4iSN4gW+gNMXM7wFIP+fdXZxoNiAnHUTGjCr+TSWXdRDKg=="],
|
||||||
|
|
||||||
|
"event-target-shim": ["event-target-shim@5.0.1", "", {}, "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ=="],
|
||||||
|
|
||||||
|
"events": ["events@3.3.0", "", {}, "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q=="],
|
||||||
|
|
||||||
|
"exif-parser": ["exif-parser@0.1.12", "", {}, "sha512-c2bQfLNbMzLPmzQuOr8fy0csy84WmwnER81W88DzTp9CYNPJ6yzOj2EZAh9pywYpqHnshVLHQJ8WzldAyfY+Iw=="],
|
||||||
|
|
||||||
|
"file-type": ["file-type@16.5.4", "", { "dependencies": { "readable-web-to-node-stream": "^3.0.0", "strtok3": "^6.2.4", "token-types": "^4.1.1" } }, "sha512-/yFHK0aGjFEgDJjEKP0pWCplsPFPhwyfwevf/pVxiN0tmE4L9LmwWxWukdJSHdoCli4VgQLehjJtwQBnqmsKcw=="],
|
||||||
|
|
||||||
|
"gifwrap": ["gifwrap@0.10.1", "", { "dependencies": { "image-q": "^4.0.0", "omggif": "^1.0.10" } }, "sha512-2760b1vpJHNmLzZ/ubTtNnEx5WApN/PYWJvXvgS+tL1egTTthayFYIQQNi136FLEDcN/IyEY2EcGpIITD6eYUw=="],
|
||||||
|
|
||||||
|
"ieee754": ["ieee754@1.2.1", "", {}, "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA=="],
|
||||||
|
|
||||||
|
"image-q": ["image-q@4.0.0", "", { "dependencies": { "@types/node": "16.9.1" } }, "sha512-PfJGVgIfKQJuq3s0tTDOKtztksibuUEbJQIYT3by6wctQo+Rdlh7ef4evJ5NCdxY4CfMbvFkocEwbl4BF8RlJw=="],
|
||||||
|
|
||||||
|
"jimp": ["jimp@1.6.0", "", { "dependencies": { "@jimp/core": "1.6.0", "@jimp/diff": "1.6.0", "@jimp/js-bmp": "1.6.0", "@jimp/js-gif": "1.6.0", "@jimp/js-jpeg": "1.6.0", "@jimp/js-png": "1.6.0", "@jimp/js-tiff": "1.6.0", "@jimp/plugin-blit": "1.6.0", "@jimp/plugin-blur": "1.6.0", "@jimp/plugin-circle": "1.6.0", "@jimp/plugin-color": "1.6.0", "@jimp/plugin-contain": "1.6.0", "@jimp/plugin-cover": "1.6.0", "@jimp/plugin-crop": "1.6.0", "@jimp/plugin-displace": "1.6.0", "@jimp/plugin-dither": "1.6.0", "@jimp/plugin-fisheye": "1.6.0", "@jimp/plugin-flip": "1.6.0", "@jimp/plugin-hash": "1.6.0", "@jimp/plugin-mask": "1.6.0", "@jimp/plugin-print": "1.6.0", "@jimp/plugin-quantize": "1.6.0", "@jimp/plugin-resize": "1.6.0", "@jimp/plugin-rotate": "1.6.0", "@jimp/plugin-threshold": "1.6.0", "@jimp/types": "1.6.0", "@jimp/utils": "1.6.0" } }, "sha512-YcwCHw1kiqEeI5xRpDlPPBGL2EOpBKLwO4yIBJcXWHPj5PnA5urGq0jbyhM5KoNpypQ6VboSoxc9D8HyfvngSg=="],
|
||||||
|
|
||||||
|
"jpeg-js": ["jpeg-js@0.4.4", "", {}, "sha512-WZzeDOEtTOBK4Mdsar0IqEU5sMr3vSV2RqkAIzUEV2BHnUfKGyswWFPFwK5EeDo93K3FohSHbLAjj0s1Wzd+dg=="],
|
||||||
|
|
||||||
|
"mime": ["mime@3.0.0", "", { "bin": { "mime": "cli.js" } }, "sha512-jSCU7/VB1loIWBZe14aEYHU/+1UMEHoaO7qxCOVJOw9GgH72VAWppxNcjU+x9a2k3GSIBXNKxXQFqRvvZ7vr3A=="],
|
||||||
|
|
||||||
|
"ollama": ["ollama@0.6.3", "", { "dependencies": { "whatwg-fetch": "^3.6.20" } }, "sha512-KEWEhIqE5wtfzEIZbDCLH51VFZ6Z3ZSa6sIOg/E/tBV8S51flyqBOXi+bRxlOYKDf8i327zG9eSTb8IJxvm3Zg=="],
|
||||||
|
|
||||||
|
"omggif": ["omggif@1.0.10", "", {}, "sha512-LMJTtvgc/nugXj0Vcrrs68Mn2D1r0zf630VNtqtpI1FEO7e+O9FP4gqs9AcnBaSEeoHIPm28u6qgPR0oyEpGSw=="],
|
||||||
|
|
||||||
|
"pako": ["pako@1.0.11", "", {}, "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="],
|
||||||
|
|
||||||
|
"parse-bmfont-ascii": ["parse-bmfont-ascii@1.0.6", "", {}, "sha512-U4RrVsUFCleIOBsIGYOMKjn9PavsGOXxbvYGtMOEfnId0SVNsgehXh1DxUdVPLoxd5mvcEtvmKs2Mmf0Mpa1ZA=="],
|
||||||
|
|
||||||
|
"parse-bmfont-binary": ["parse-bmfont-binary@1.0.6", "", {}, "sha512-GxmsRea0wdGdYthjuUeWTMWPqm2+FAd4GI8vCvhgJsFnoGhTrLhXDDupwTo7rXVAgaLIGoVHDZS9p/5XbSqeWA=="],
|
||||||
|
|
||||||
|
"parse-bmfont-xml": ["parse-bmfont-xml@1.1.6", "", { "dependencies": { "xml-parse-from-string": "^1.0.0", "xml2js": "^0.5.0" } }, "sha512-0cEliVMZEhrFDwMh4SxIyVJpqYoOWDJ9P895tFuS+XuNzI5UBmBk5U5O4KuJdTnZpSBI4LFA2+ZiJaiwfSwlMA=="],
|
||||||
|
|
||||||
|
"peek-readable": ["peek-readable@4.1.0", "", {}, "sha512-ZI3LnwUv5nOGbQzD9c2iDG6toheuXSZP5esSHBjopsXH4dg19soufvpUGA3uohi5anFtGb2lhAVdHzH6R/Evvg=="],
|
||||||
|
|
||||||
|
"pixelmatch": ["pixelmatch@5.3.0", "", { "dependencies": { "pngjs": "^6.0.0" }, "bin": { "pixelmatch": "bin/pixelmatch" } }, "sha512-o8mkY4E/+LNUf6LzX96ht6k6CEDi65k9G2rjMtBe9Oo+VPKSvl+0GKHuH/AlG+GA5LPG/i5hrekkxUc3s2HU+Q=="],
|
||||||
|
|
||||||
|
"planck": ["planck@1.4.2", "", { "peerDependencies": { "stage-js": "^1.0.0-alpha.12" } }, "sha512-mNbhnV3g8X2rwGxzcesjmN8BDA6qfXgQxXVMkWau9MCRlQY0RLNEkyHlVp6yFy/X6qrzAXyNONCnZ1cGDLrNew=="],
|
||||||
|
|
||||||
|
"pngjs": ["pngjs@7.0.0", "", {}, "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow=="],
|
||||||
|
|
||||||
|
"process": ["process@0.11.10", "", {}, "sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A=="],
|
||||||
|
|
||||||
|
"readable-stream": ["readable-stream@4.7.0", "", { "dependencies": { "abort-controller": "^3.0.0", "buffer": "^6.0.3", "events": "^3.3.0", "process": "^0.11.10", "string_decoder": "^1.3.0" } }, "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg=="],
|
||||||
|
|
||||||
|
"readable-web-to-node-stream": ["readable-web-to-node-stream@3.0.4", "", { "dependencies": { "readable-stream": "^4.7.0" } }, "sha512-9nX56alTf5bwXQ3ZDipHJhusu9NTQJ/CVPtb/XHAJCXihZeitfJvIRS4GqQ/mfIoOE3IelHMrpayVrosdHBuLw=="],
|
||||||
|
|
||||||
|
"safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="],
|
||||||
|
|
||||||
|
"sax": ["sax@1.4.4", "", {}, "sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw=="],
|
||||||
|
|
||||||
|
"simple-xml-to-json": ["simple-xml-to-json@1.2.3", "", {}, "sha512-kWJDCr9EWtZ+/EYYM5MareWj2cRnZGF93YDNpH4jQiHB+hBIZnfPFSQiVMzZOdk+zXWqTZ/9fTeQNu2DqeiudA=="],
|
||||||
|
|
||||||
|
"stage-js": ["stage-js@1.0.0-alpha.17", "", {}, "sha512-AzlMO+t51v6cFvKZ+Oe9DJnL1OXEH5s9bEy6di5aOrUpcP7PCzI/wIeXF0u3zg0L89gwnceoKxrLId0ZpYnNXw=="],
|
||||||
|
|
||||||
|
"string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="],
|
||||||
|
|
||||||
|
"strtok3": ["strtok3@6.3.0", "", { "dependencies": { "@tokenizer/token": "^0.3.0", "peek-readable": "^4.1.0" } }, "sha512-fZtbhtvI9I48xDSywd/somNqgUHl2L2cstmXCCif0itOf96jeW18MBSyrLuNicYQVkvpOxkZtkzujiTJ9LW5Jw=="],
|
||||||
|
|
||||||
|
"three": ["three@0.177.0", "", {}, "sha512-EiXv5/qWAaGI+Vz2A+JfavwYCMdGjxVsrn3oBwllUoqYeaBO75J63ZfyaQKoiLrqNHoTlUc6PFgMXnS0kI45zg=="],
|
||||||
|
|
||||||
|
"tinycolor2": ["tinycolor2@1.6.0", "", {}, "sha512-XPaBkWQJdsf3pLKJV9p4qN/S+fm2Oj8AIPo1BTUhg5oxkvm9+SVEGFdhyOz7tTdUTfvxMiAs4sp6/eZO2Ew+pw=="],
|
||||||
|
|
||||||
|
"token-types": ["token-types@4.2.1", "", { "dependencies": { "@tokenizer/token": "^0.3.0", "ieee754": "^1.2.1" } }, "sha512-6udB24Q737UD/SDsKAHI9FCRP7Bqc9D/MQUV02ORQg5iskjtLJlZJNdN4kKtcdtwCeWIwIHDGaUsTsCCAa8sFQ=="],
|
||||||
|
|
||||||
|
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
||||||
|
|
||||||
|
"undici": ["undici@6.23.0", "", {}, "sha512-VfQPToRA5FZs/qJxLIinmU59u0r7LXqoJkCzinq3ckNJp3vKEh7jTWN589YQ5+aoAC/TGRLyJLCPKcLQbM8r9g=="],
|
||||||
|
|
||||||
|
"undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
|
||||||
|
|
||||||
|
"utif2": ["utif2@4.1.0", "", { "dependencies": { "pako": "^1.0.11" } }, "sha512-+oknB9FHrJ7oW7A2WZYajOcv4FcDR4CfoGB0dPNfxbi4GO05RRnFmt5oa23+9w32EanrYcSJWspUiJkLMs+37w=="],
|
||||||
|
|
||||||
|
"web-tree-sitter": ["web-tree-sitter@0.25.10", "", { "peerDependencies": { "@types/emscripten": "^1.40.0" }, "optionalPeers": ["@types/emscripten"] }, "sha512-Y09sF44/13XvgVKgO2cNDw5rGk6s26MgoZPXLESvMXeefBf7i6/73eFurre0IsTW6E14Y0ArIzhUMmjoc7xyzA=="],
|
||||||
|
|
||||||
|
"whatwg-fetch": ["whatwg-fetch@3.6.20", "", {}, "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg=="],
|
||||||
|
|
||||||
|
"xml-parse-from-string": ["xml-parse-from-string@1.0.1", "", {}, "sha512-ErcKwJTF54uRzzNMXq2X5sMIy88zJvfN2DmdoQvy7PAFJ+tPRU6ydWuOKNMyfmOjdyBQTFREi60s0Y0SyI0G0g=="],
|
||||||
|
|
||||||
|
"xml2js": ["xml2js@0.5.0", "", { "dependencies": { "sax": ">=0.6.0", "xmlbuilder": "~11.0.0" } }, "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA=="],
|
||||||
|
|
||||||
|
"xmlbuilder": ["xmlbuilder@11.0.1", "", {}, "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA=="],
|
||||||
|
|
||||||
|
"yoga-layout": ["yoga-layout@3.2.1", "", {}, "sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ=="],
|
||||||
|
|
||||||
|
"zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="],
|
||||||
|
|
||||||
|
"image-q/@types/node": ["@types/node@16.9.1", "", {}, "sha512-QpLcX9ZSsq3YYUUnD3nFDY8H7wctAhQj/TFKL8Ya8v5fMm3CFXxo8zStsLAl780ltoYoo1WvKUVGBQK+1ifr7g=="],
|
||||||
|
|
||||||
|
"pixelmatch/pngjs": ["pngjs@6.0.0", "", {}, "sha512-TRzzuFRRmEoSW/p1KVAmiOgPco2Irlah+bGFCeNfJXxxYGwSw7YwAOAcd7X28K/m5bjBWKsC29KyoMfHbypayg=="],
|
||||||
|
}
|
||||||
|
}
|
||||||
21
package.json
Normal file
21
package.json
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"name": "reddit-trend-analyzer",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"module": "src/index.ts",
|
||||||
|
"type": "module",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"start": "bun run src/cli.ts",
|
||||||
|
"tui": "bun run src/index.ts",
|
||||||
|
"dev": "bun --watch run src/cli.ts"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/bun": "latest",
|
||||||
|
"typescript": "^5.0.0"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@opentui/core": "^0.1.74",
|
||||||
|
"@qdrant/js-client-rest": "^1.16.2",
|
||||||
|
"ollama": "^0.6.3"
|
||||||
|
}
|
||||||
|
}
|
||||||
238
src/cli.ts
Normal file
238
src/cli.ts
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
import * as readline from 'readline'
|
||||||
|
import { RedditScraper } from './scraper/reddit'
|
||||||
|
import { CommentFetcher } from './scraper/comments'
|
||||||
|
import { EmbeddingPipeline } from './embeddings/ollama'
|
||||||
|
import { QdrantStorage } from './storage/qdrant'
|
||||||
|
import type { RedditComment } from './scraper/types'
|
||||||
|
import type { SearchResult } from './storage/types'
|
||||||
|
|
||||||
|
const rl = readline.createInterface({
|
||||||
|
input: process.stdin,
|
||||||
|
output: process.stdout,
|
||||||
|
})
|
||||||
|
|
||||||
|
function prompt(question: string): Promise<string> {
|
||||||
|
return new Promise(resolve => rl.question(question, resolve))
|
||||||
|
}
|
||||||
|
|
||||||
|
function progressBar(current: number, total: number, width: number = 30): string {
|
||||||
|
const percent = total > 0 ? current / total : 0
|
||||||
|
const filled = Math.round(percent * width)
|
||||||
|
const empty = width - filled
|
||||||
|
return `[${'█'.repeat(filled)}${'░'.repeat(empty)}] ${Math.round(percent * 100)}%`
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('\n=== reddit trend analyzer ===\n')
|
||||||
|
|
||||||
|
const scraper = new RedditScraper(5000)
|
||||||
|
const commentFetcher = new CommentFetcher(5000)
|
||||||
|
const embeddings = new EmbeddingPipeline()
|
||||||
|
const storage = new QdrantStorage()
|
||||||
|
|
||||||
|
// check connections
|
||||||
|
console.log('checking connections...')
|
||||||
|
|
||||||
|
const ollamaOk = await embeddings.checkConnection()
|
||||||
|
if (!ollamaOk) {
|
||||||
|
console.error('error: cannot connect to ollama')
|
||||||
|
console.error('run: ollama pull nomic-embed-text')
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
console.log(' ollama: ok')
|
||||||
|
|
||||||
|
const qdrantOk = await storage.checkConnection()
|
||||||
|
if (!qdrantOk) {
|
||||||
|
console.error('error: cannot connect to qdrant')
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
console.log(' qdrant: ok')
|
||||||
|
|
||||||
|
await storage.ensureCollection()
|
||||||
|
const stats = await storage.getStats()
|
||||||
|
console.log(`\ncurrent stats: ${stats.posts} posts, ${stats.comments} comments, ${stats.subreddits.length} subreddits\n`)
|
||||||
|
|
||||||
|
let lastResults: SearchResult[] = []
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
console.log('\ncommands:')
|
||||||
|
console.log(' scrape <url> [pages] - scrape subreddit (e.g. scrape https://reddit.com/r/vfx/best 3)')
|
||||||
|
console.log(' search <query> - semantic search')
|
||||||
|
console.log(' stats - show collection stats')
|
||||||
|
console.log(' export json|csv - export last search results')
|
||||||
|
console.log(' quit - exit\n')
|
||||||
|
|
||||||
|
const input = await prompt('> ')
|
||||||
|
const [cmd, ...args] = input.trim().split(' ')
|
||||||
|
|
||||||
|
if (cmd === 'quit' || cmd === 'q' || cmd === 'exit') {
|
||||||
|
console.log('bye!')
|
||||||
|
rl.close()
|
||||||
|
process.exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd === 'stats') {
|
||||||
|
const s = await storage.getStats()
|
||||||
|
console.log(`\nposts: ${s.posts.toLocaleString()}`)
|
||||||
|
console.log(`comments: ${s.comments.toLocaleString()}`)
|
||||||
|
console.log(`subreddits: ${s.subreddits.join(', ') || 'none'}`)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd === 'scrape') {
|
||||||
|
const url = args[0]
|
||||||
|
const pages = parseInt(args[1] || '3', 10)
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
console.log('usage: scrape <url> [pages]')
|
||||||
|
console.log('example: scrape https://reddit.com/r/vfx/best 5')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nscraping ${url} (${pages} pages)...\n`)
|
||||||
|
|
||||||
|
try {
|
||||||
|
// fetch posts
|
||||||
|
scraper.setProgressCallback((p) => {
|
||||||
|
process.stdout.write(`\rfetching posts: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `)
|
||||||
|
})
|
||||||
|
|
||||||
|
const posts = await scraper.fetchPosts({
|
||||||
|
url,
|
||||||
|
pages,
|
||||||
|
postsPerPage: 100,
|
||||||
|
fetchComments: true,
|
||||||
|
delayMs: 3000,
|
||||||
|
})
|
||||||
|
console.log(`\nfetched ${posts.length} posts`)
|
||||||
|
|
||||||
|
// fetch comments
|
||||||
|
commentFetcher.setProgressCallback((p) => {
|
||||||
|
process.stdout.write(`\rfetching comments: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `)
|
||||||
|
})
|
||||||
|
|
||||||
|
const commentsByPost = await commentFetcher.fetchAllComments(posts)
|
||||||
|
const totalComments = Array.from(commentsByPost.values()).reduce((acc, c) => acc + c.length, 0)
|
||||||
|
console.log(`\nfetched ${totalComments} comments`)
|
||||||
|
|
||||||
|
// embed posts
|
||||||
|
embeddings.setProgressCallback((p) => {
|
||||||
|
process.stdout.write(`\rembedding posts: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `)
|
||||||
|
})
|
||||||
|
|
||||||
|
const postPoints = await embeddings.embedPosts(posts, commentsByPost)
|
||||||
|
await storage.upsertPoints(postPoints)
|
||||||
|
console.log(`\nembedded ${postPoints.length} posts`)
|
||||||
|
|
||||||
|
// embed comments
|
||||||
|
const allComments: RedditComment[] = []
|
||||||
|
for (const comments of commentsByPost.values()) {
|
||||||
|
allComments.push(...comments)
|
||||||
|
}
|
||||||
|
|
||||||
|
embeddings.setProgressCallback((p) => {
|
||||||
|
process.stdout.write(`\rembedding comments: ${progressBar(p.current, p.total)} ${p.current}/${p.total} `)
|
||||||
|
})
|
||||||
|
|
||||||
|
const commentPoints = await embeddings.embedComments(allComments)
|
||||||
|
await storage.upsertPoints(commentPoints)
|
||||||
|
console.log(`\nembedded ${commentPoints.length} comments`)
|
||||||
|
|
||||||
|
console.log('\ndone!')
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
console.error('\nerror:', err instanceof Error ? err.message : err)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd === 'search') {
|
||||||
|
const query = args.join(' ')
|
||||||
|
if (!query) {
|
||||||
|
console.log('usage: search <query>')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.log(`\nsearching for "${query}"...\n`)
|
||||||
|
const vector = await embeddings.embed(query)
|
||||||
|
const results = await storage.search(vector, 10)
|
||||||
|
lastResults = results
|
||||||
|
|
||||||
|
if (results.length === 0) {
|
||||||
|
console.log('no results found')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const r of results) {
|
||||||
|
const preview = (r.payload.title || r.payload.body || '').slice(0, 80).replace(/\n/g, ' ')
|
||||||
|
const type = r.payload.type === 'post' ? '[post]' : '[comment]'
|
||||||
|
console.log(`${type} (${r.score.toFixed(3)}) ${preview}...`)
|
||||||
|
console.log(` -> ${r.payload.permalink}\n`)
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error('error:', err instanceof Error ? err.message : err)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd === 'export') {
|
||||||
|
const format = args[0]
|
||||||
|
if (!format || !['json', 'csv'].includes(format)) {
|
||||||
|
console.log('usage: export json|csv')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastResults.length === 0) {
|
||||||
|
console.log('no results to export (run a search first)')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const filename = `reddit-trends.${format}`
|
||||||
|
|
||||||
|
if (format === 'json') {
|
||||||
|
const data = lastResults.map(r => ({
|
||||||
|
id: r.payload.id,
|
||||||
|
type: r.payload.type,
|
||||||
|
subreddit: r.payload.subreddit,
|
||||||
|
title: r.payload.title,
|
||||||
|
body: r.payload.body,
|
||||||
|
author: r.payload.author,
|
||||||
|
score: r.payload.score,
|
||||||
|
created: new Date(r.payload.created * 1000).toISOString(),
|
||||||
|
permalink: r.payload.permalink,
|
||||||
|
similarity: r.score,
|
||||||
|
}))
|
||||||
|
await Bun.write(filename, JSON.stringify(data, null, 2))
|
||||||
|
} else {
|
||||||
|
const headers = ['id', 'type', 'subreddit', 'title', 'body', 'author', 'score', 'created', 'permalink', 'similarity']
|
||||||
|
const escape = (val: string | number | undefined): string => {
|
||||||
|
if (val === undefined) return ''
|
||||||
|
const str = String(val)
|
||||||
|
if (str.includes(',') || str.includes('"') || str.includes('\n')) {
|
||||||
|
return `"${str.replace(/"/g, '""')}"`
|
||||||
|
}
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
const rows = lastResults.map(r => [
|
||||||
|
r.payload.id, r.payload.type, r.payload.subreddit, r.payload.title || '',
|
||||||
|
r.payload.body, r.payload.author, r.payload.score,
|
||||||
|
new Date(r.payload.created * 1000).toISOString(), r.payload.permalink, r.score.toFixed(4),
|
||||||
|
].map(escape).join(','))
|
||||||
|
await Bun.write(filename, [headers.join(','), ...rows].join('\n'))
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`exported to ${filename}`)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd) {
|
||||||
|
console.log(`unknown command: ${cmd}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(err => {
|
||||||
|
console.error('fatal error:', err)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
175
src/embeddings/ollama.ts
Normal file
175
src/embeddings/ollama.ts
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
import { Ollama } from 'ollama'
|
||||||
|
import { prepareForEmbedding, cleanText } from '../utils/text'
|
||||||
|
import type { RedditPost, RedditComment } from '../scraper/types'
|
||||||
|
import type { PointPayload } from '../storage/types'
|
||||||
|
|
||||||
|
const MODEL = 'nomic-embed-text'
|
||||||
|
const VECTOR_DIM = 768
|
||||||
|
const BATCH_SIZE = 10
|
||||||
|
|
||||||
|
export interface EmbeddedPoint {
|
||||||
|
id: string
|
||||||
|
vector: number[]
|
||||||
|
payload: PointPayload
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface EmbeddingProgress {
|
||||||
|
current: number
|
||||||
|
total: number
|
||||||
|
message: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export class EmbeddingPipeline {
|
||||||
|
private ollama: Ollama
|
||||||
|
private onProgress?: (progress: EmbeddingProgress) => void
|
||||||
|
|
||||||
|
constructor(host?: string) {
|
||||||
|
this.ollama = new Ollama({
|
||||||
|
host: host || process.env.OLLAMA_HOST || 'http://localhost:11434',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
setProgressCallback(callback: (progress: EmbeddingProgress) => void): void {
|
||||||
|
this.onProgress = callback
|
||||||
|
}
|
||||||
|
|
||||||
|
private emitProgress(progress: EmbeddingProgress): void {
|
||||||
|
this.onProgress?.(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
async embed(text: string): Promise<number[]> {
|
||||||
|
const response = await this.ollama.embed({
|
||||||
|
model: MODEL,
|
||||||
|
input: text,
|
||||||
|
})
|
||||||
|
return response.embeddings[0] ?? []
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||||
|
const response = await this.ollama.embed({
|
||||||
|
model: MODEL,
|
||||||
|
input: texts,
|
||||||
|
})
|
||||||
|
return response.embeddings
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedPosts(
|
||||||
|
posts: RedditPost[],
|
||||||
|
commentsByPost: Map<string, RedditComment[]>
|
||||||
|
): Promise<EmbeddedPoint[]> {
|
||||||
|
const points: EmbeddedPoint[] = []
|
||||||
|
const batches: Array<{ text: string; payload: PointPayload }> = []
|
||||||
|
|
||||||
|
for (const post of posts) {
|
||||||
|
const comments = commentsByPost.get(post.id) || []
|
||||||
|
const topComments = comments
|
||||||
|
.sort((a, b) => b.score - a.score)
|
||||||
|
.slice(0, 5)
|
||||||
|
.map(c => c.body)
|
||||||
|
|
||||||
|
const text = prepareForEmbedding(post.title, post.selftext, topComments)
|
||||||
|
|
||||||
|
batches.push({
|
||||||
|
text,
|
||||||
|
payload: {
|
||||||
|
id: post.id,
|
||||||
|
type: 'post',
|
||||||
|
subreddit: post.subreddit,
|
||||||
|
title: post.title,
|
||||||
|
author: post.author,
|
||||||
|
body: post.selftext || '',
|
||||||
|
score: post.score,
|
||||||
|
created: post.created_utc,
|
||||||
|
permalink: `https://reddit.com${post.permalink}`,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < batches.length; i += BATCH_SIZE) {
|
||||||
|
const batch = batches.slice(i, i + BATCH_SIZE)
|
||||||
|
|
||||||
|
this.emitProgress({
|
||||||
|
current: Math.min(i + BATCH_SIZE, batches.length),
|
||||||
|
total: batches.length,
|
||||||
|
message: `Embedding posts ${i + 1}-${Math.min(i + BATCH_SIZE, batches.length)}`,
|
||||||
|
})
|
||||||
|
|
||||||
|
const texts = batch.map(b => b.text)
|
||||||
|
const embeddings = await this.embedBatch(texts)
|
||||||
|
|
||||||
|
for (let j = 0; j < batch.length; j++) {
|
||||||
|
const item = batch[j]
|
||||||
|
const embedding = embeddings[j]
|
||||||
|
if (item && embedding) {
|
||||||
|
points.push({
|
||||||
|
id: item.payload.id,
|
||||||
|
vector: embedding,
|
||||||
|
payload: item.payload,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return points
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedComments(comments: RedditComment[]): Promise<EmbeddedPoint[]> {
|
||||||
|
const points: EmbeddedPoint[] = []
|
||||||
|
|
||||||
|
const validComments = comments.filter(
|
||||||
|
c => c.body && c.body.length > 20 && c.author !== '[deleted]'
|
||||||
|
)
|
||||||
|
|
||||||
|
for (let i = 0; i < validComments.length; i += BATCH_SIZE) {
|
||||||
|
const batch = validComments.slice(i, i + BATCH_SIZE)
|
||||||
|
|
||||||
|
this.emitProgress({
|
||||||
|
current: Math.min(i + BATCH_SIZE, validComments.length),
|
||||||
|
total: validComments.length,
|
||||||
|
message: `Embedding comments ${i + 1}-${Math.min(i + BATCH_SIZE, validComments.length)}`,
|
||||||
|
})
|
||||||
|
|
||||||
|
const texts = batch.map(c => cleanText(c.body))
|
||||||
|
const embeddings = await this.embedBatch(texts)
|
||||||
|
|
||||||
|
for (let j = 0; j < batch.length; j++) {
|
||||||
|
const comment = batch[j]
|
||||||
|
const embedding = embeddings[j]
|
||||||
|
if (comment && embedding) {
|
||||||
|
points.push({
|
||||||
|
id: comment.id,
|
||||||
|
vector: embedding,
|
||||||
|
payload: {
|
||||||
|
id: comment.id,
|
||||||
|
type: 'comment',
|
||||||
|
subreddit: comment.subreddit,
|
||||||
|
author: comment.author,
|
||||||
|
body: comment.body,
|
||||||
|
score: comment.score,
|
||||||
|
created: comment.created_utc,
|
||||||
|
permalink: `https://reddit.com${comment.permalink}`,
|
||||||
|
parent_id: comment.parent_id,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return points
|
||||||
|
}
|
||||||
|
|
||||||
|
async checkConnection(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const result = await this.ollama.list()
|
||||||
|
const hasModel = result.models.some(m => m.name.startsWith(MODEL))
|
||||||
|
if (!hasModel) {
|
||||||
|
console.warn(`Model ${MODEL} not found. Available models:`, result.models.map(m => m.name))
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
} catch {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export { VECTOR_DIM }
|
||||||
59
src/index.ts
Normal file
59
src/index.ts
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import { createApp } from './tui/app'
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.clear()
|
||||||
|
|
||||||
|
const ollamaOk = await checkOllama()
|
||||||
|
if (!ollamaOk) {
|
||||||
|
console.error('error: cannot connect to ollama at', process.env.OLLAMA_HOST || 'http://localhost:11434')
|
||||||
|
console.error('make sure ollama is running and nomic-embed-text model is available')
|
||||||
|
console.error('run: ollama pull nomic-embed-text')
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
const qdrantOk = await checkQdrant()
|
||||||
|
if (!qdrantOk) {
|
||||||
|
console.error('error: cannot connect to qdrant at', process.env.QDRANT_URL || 'http://localhost:6333')
|
||||||
|
console.error('make sure qdrant is running and QDRANT_API_KEY is set if required')
|
||||||
|
process.exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
await createApp()
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkOllama(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const { Ollama } = await import('ollama')
|
||||||
|
const client = new Ollama({
|
||||||
|
host: process.env.OLLAMA_HOST || 'http://localhost:11434',
|
||||||
|
})
|
||||||
|
const models = await client.list()
|
||||||
|
return models.models.some(m => m.name.includes('nomic-embed-text'))
|
||||||
|
} catch {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function checkQdrant(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const { QdrantClient } = await import('@qdrant/js-client-rest')
|
||||||
|
const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333'
|
||||||
|
const parsedUrl = new URL(qdrantUrl)
|
||||||
|
|
||||||
|
const client = new QdrantClient({
|
||||||
|
host: parsedUrl.hostname,
|
||||||
|
port: parsedUrl.port ? parseInt(parsedUrl.port) : (parsedUrl.protocol === 'https:' ? 443 : 6333),
|
||||||
|
https: parsedUrl.protocol === 'https:',
|
||||||
|
apiKey: process.env.QDRANT_API_KEY,
|
||||||
|
})
|
||||||
|
await client.getCollections()
|
||||||
|
return true
|
||||||
|
} catch {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(err => {
|
||||||
|
console.error('fatal error:', err)
|
||||||
|
process.exit(1)
|
||||||
|
})
|
||||||
120
src/scraper/comments.ts
Normal file
120
src/scraper/comments.ts
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
import { RateLimiter, fetchWithRetry } from '../utils/rate-limit'
|
||||||
|
import type {
|
||||||
|
RedditComment,
|
||||||
|
RedditPost,
|
||||||
|
RedditListing,
|
||||||
|
RedditCommentData,
|
||||||
|
ScrapeProgress,
|
||||||
|
} from './types'
|
||||||
|
|
||||||
|
const USER_AGENT = 'reddit-trend-analyzer/1.0 (by /u/trend-analyzer-bot)'
|
||||||
|
|
||||||
|
export class CommentFetcher {
|
||||||
|
private rateLimiter: RateLimiter
|
||||||
|
private onProgress?: (progress: ScrapeProgress) => void
|
||||||
|
|
||||||
|
constructor(delayMs: number = 5000) {
|
||||||
|
this.rateLimiter = new RateLimiter(delayMs)
|
||||||
|
}
|
||||||
|
|
||||||
|
setProgressCallback(callback: (progress: ScrapeProgress) => void): void {
|
||||||
|
this.onProgress = callback
|
||||||
|
}
|
||||||
|
|
||||||
|
private emitProgress(progress: ScrapeProgress): void {
|
||||||
|
this.onProgress?.(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractComments(
|
||||||
|
listing: RedditListing<RedditCommentData>,
|
||||||
|
subreddit: string,
|
||||||
|
depth: number = 0
|
||||||
|
): RedditComment[] {
|
||||||
|
const comments: RedditComment[] = []
|
||||||
|
|
||||||
|
for (const child of listing.data.children) {
|
||||||
|
if (child.kind !== 't1') continue
|
||||||
|
const data = child.data
|
||||||
|
|
||||||
|
if (data.author === '[deleted]' || !data.body) continue
|
||||||
|
|
||||||
|
comments.push({
|
||||||
|
id: data.id,
|
||||||
|
name: data.name,
|
||||||
|
body: data.body,
|
||||||
|
author: data.author,
|
||||||
|
score: data.score,
|
||||||
|
created_utc: data.created_utc,
|
||||||
|
permalink: data.permalink,
|
||||||
|
parent_id: data.parent_id,
|
||||||
|
subreddit: subreddit,
|
||||||
|
depth: depth,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (data.replies && typeof data.replies === 'object') {
|
||||||
|
comments.push(
|
||||||
|
...this.extractComments(data.replies, subreddit, depth + 1)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return comments
|
||||||
|
}
|
||||||
|
|
||||||
|
async fetchCommentsForPost(post: RedditPost): Promise<RedditComment[]> {
|
||||||
|
const url = `https://www.reddit.com${post.permalink}.json?limit=100&depth=3`
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetchWithRetry<
|
||||||
|
[RedditListing<RedditCommentData>, RedditListing<RedditCommentData>]
|
||||||
|
>(
|
||||||
|
url,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'User-Agent': USER_AGENT,
|
||||||
|
'Accept': 'application/json',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
this.rateLimiter,
|
||||||
|
3
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!response[1]?.data?.children) {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.extractComments(response[1], post.subreddit)
|
||||||
|
} catch (error) {
|
||||||
|
// silently skip failed posts
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fetchAllComments(posts: RedditPost[]): Promise<Map<string, RedditComment[]>> {
|
||||||
|
const commentsByPost = new Map<string, RedditComment[]>()
|
||||||
|
|
||||||
|
for (let i = 0; i < posts.length; i++) {
|
||||||
|
const post = posts[i]
|
||||||
|
if (!post) continue
|
||||||
|
|
||||||
|
this.emitProgress({
|
||||||
|
phase: 'comments',
|
||||||
|
current: i + 1,
|
||||||
|
total: posts.length,
|
||||||
|
message: `Fetching comments for post ${i + 1}/${posts.length}`,
|
||||||
|
})
|
||||||
|
|
||||||
|
const comments = await this.fetchCommentsForPost(post)
|
||||||
|
commentsByPost.set(post.id, comments)
|
||||||
|
}
|
||||||
|
|
||||||
|
this.emitProgress({
|
||||||
|
phase: 'comments',
|
||||||
|
current: posts.length,
|
||||||
|
total: posts.length,
|
||||||
|
message: 'Done fetching comments',
|
||||||
|
})
|
||||||
|
|
||||||
|
return commentsByPost
|
||||||
|
}
|
||||||
|
}
|
||||||
120
src/scraper/reddit.ts
Normal file
120
src/scraper/reddit.ts
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
import { RateLimiter, fetchWithRetry } from '../utils/rate-limit'
|
||||||
|
import type {
|
||||||
|
RedditPost,
|
||||||
|
RedditListing,
|
||||||
|
RedditPostData,
|
||||||
|
ScrapeOptions,
|
||||||
|
ScrapeProgress,
|
||||||
|
} from './types'
|
||||||
|
|
||||||
|
const USER_AGENT = 'reddit-trend-analyzer/1.0 (by /u/trend-analyzer-bot)'
|
||||||
|
|
||||||
|
export function normalizeRedditUrl(url: string): string {
|
||||||
|
let normalized = url.trim()
|
||||||
|
|
||||||
|
if (!normalized.startsWith('http')) {
|
||||||
|
normalized = `https://www.reddit.com${normalized.startsWith('/') ? '' : '/'}${normalized}`
|
||||||
|
}
|
||||||
|
|
||||||
|
normalized = normalized
|
||||||
|
.replace('old.reddit.com', 'www.reddit.com')
|
||||||
|
.replace('new.reddit.com', 'www.reddit.com')
|
||||||
|
|
||||||
|
if (!normalized.endsWith('.json') && !normalized.includes('.json?')) {
|
||||||
|
const hasQuery = normalized.includes('?')
|
||||||
|
normalized = hasQuery
|
||||||
|
? normalized.replace('?', '.json?')
|
||||||
|
: normalized + '.json'
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseSubredditFromUrl(url: string): string {
|
||||||
|
const match = url.match(/\/r\/([^/?]+)/)
|
||||||
|
return match?.[1] ?? 'unknown'
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RedditScraper {
|
||||||
|
private rateLimiter: RateLimiter
|
||||||
|
private onProgress?: (progress: ScrapeProgress) => void
|
||||||
|
|
||||||
|
constructor(delayMs: number = 5000) {
|
||||||
|
this.rateLimiter = new RateLimiter(delayMs)
|
||||||
|
}
|
||||||
|
|
||||||
|
setProgressCallback(callback: (progress: ScrapeProgress) => void): void {
|
||||||
|
this.onProgress = callback
|
||||||
|
}
|
||||||
|
|
||||||
|
private emitProgress(progress: ScrapeProgress): void {
|
||||||
|
this.onProgress?.(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fetchPosts(options: ScrapeOptions): Promise<RedditPost[]> {
|
||||||
|
const baseUrl = normalizeRedditUrl(options.url)
|
||||||
|
const subreddit = parseSubredditFromUrl(options.url)
|
||||||
|
const posts: RedditPost[] = []
|
||||||
|
let after: string | null = null
|
||||||
|
|
||||||
|
for (let page = 0; page < options.pages; page++) {
|
||||||
|
const url = new URL(baseUrl)
|
||||||
|
url.searchParams.set('limit', String(options.postsPerPage))
|
||||||
|
if (after) {
|
||||||
|
url.searchParams.set('after', after)
|
||||||
|
}
|
||||||
|
|
||||||
|
this.emitProgress({
|
||||||
|
phase: 'posts',
|
||||||
|
current: posts.length,
|
||||||
|
total: options.pages * options.postsPerPage,
|
||||||
|
message: `Fetching page ${page + 1}/${options.pages}...`,
|
||||||
|
})
|
||||||
|
|
||||||
|
const listing = await fetchWithRetry<RedditListing<RedditPostData>>(
|
||||||
|
url.toString(),
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'User-Agent': USER_AGENT,
|
||||||
|
'Accept': 'application/json',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
this.rateLimiter,
|
||||||
|
3
|
||||||
|
)
|
||||||
|
|
||||||
|
for (const child of listing.data.children) {
|
||||||
|
if (child.kind !== 't3') continue
|
||||||
|
const data = child.data
|
||||||
|
|
||||||
|
posts.push({
|
||||||
|
id: data.id,
|
||||||
|
name: data.name,
|
||||||
|
title: data.title,
|
||||||
|
selftext: data.selftext || '',
|
||||||
|
author: data.author,
|
||||||
|
score: data.score,
|
||||||
|
upvote_ratio: data.upvote_ratio,
|
||||||
|
num_comments: data.num_comments,
|
||||||
|
created_utc: data.created_utc,
|
||||||
|
permalink: data.permalink,
|
||||||
|
subreddit: subreddit,
|
||||||
|
url: data.url,
|
||||||
|
is_self: data.is_self,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
after = listing.data.after
|
||||||
|
if (!after) break
|
||||||
|
}
|
||||||
|
|
||||||
|
this.emitProgress({
|
||||||
|
phase: 'posts',
|
||||||
|
current: posts.length,
|
||||||
|
total: posts.length,
|
||||||
|
message: `Fetched ${posts.length} posts`,
|
||||||
|
})
|
||||||
|
|
||||||
|
return posts
|
||||||
|
}
|
||||||
|
}
|
||||||
87
src/scraper/types.ts
Normal file
87
src/scraper/types.ts
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
export interface RedditPost {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
title: string
|
||||||
|
selftext: string
|
||||||
|
author: string
|
||||||
|
score: number
|
||||||
|
upvote_ratio: number
|
||||||
|
num_comments: number
|
||||||
|
created_utc: number
|
||||||
|
permalink: string
|
||||||
|
subreddit: string
|
||||||
|
url: string
|
||||||
|
is_self: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RedditComment {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
body: string
|
||||||
|
author: string
|
||||||
|
score: number
|
||||||
|
created_utc: number
|
||||||
|
permalink: string
|
||||||
|
parent_id: string
|
||||||
|
subreddit: string
|
||||||
|
depth: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RedditListingData<T> {
|
||||||
|
after: string | null
|
||||||
|
before: string | null
|
||||||
|
children: Array<{ kind: string; data: T }>
|
||||||
|
dist: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RedditListing<T> {
|
||||||
|
kind: string
|
||||||
|
data: RedditListingData<T>
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RedditPostData {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
title: string
|
||||||
|
selftext: string
|
||||||
|
selftext_html: string | null
|
||||||
|
author: string
|
||||||
|
score: number
|
||||||
|
upvote_ratio: number
|
||||||
|
num_comments: number
|
||||||
|
created_utc: number
|
||||||
|
permalink: string
|
||||||
|
subreddit: string
|
||||||
|
url: string
|
||||||
|
is_self: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RedditCommentData {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
body: string
|
||||||
|
body_html: string
|
||||||
|
author: string
|
||||||
|
score: number
|
||||||
|
created_utc: number
|
||||||
|
permalink: string
|
||||||
|
parent_id: string
|
||||||
|
subreddit: string
|
||||||
|
depth: number
|
||||||
|
replies?: RedditListing<RedditCommentData> | ''
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ScrapeOptions {
|
||||||
|
url: string
|
||||||
|
pages: number
|
||||||
|
postsPerPage: number
|
||||||
|
fetchComments: boolean
|
||||||
|
delayMs: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ScrapeProgress {
|
||||||
|
phase: 'posts' | 'comments' | 'done'
|
||||||
|
current: number
|
||||||
|
total: number
|
||||||
|
message: string
|
||||||
|
}
|
||||||
183
src/storage/qdrant.ts
Normal file
183
src/storage/qdrant.ts
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
import { QdrantClient } from '@qdrant/js-client-rest'
|
||||||
|
import type { EmbeddedPoint } from '../embeddings/ollama'
|
||||||
|
import type { PointPayload, SearchResult, CollectionStats } from './types'
|
||||||
|
import { VECTOR_DIM } from '../embeddings/ollama'
|
||||||
|
|
||||||
|
const COLLECTION_NAME = 'reddit_trends'
|
||||||
|
const BATCH_SIZE = 100
|
||||||
|
|
||||||
|
export class QdrantStorage {
|
||||||
|
private client: QdrantClient
|
||||||
|
private collectionName: string
|
||||||
|
|
||||||
|
constructor(url?: string, apiKey?: string, collectionName?: string) {
|
||||||
|
const qdrantUrl = url || process.env.QDRANT_URL || 'http://localhost:6333'
|
||||||
|
const parsedUrl = new URL(qdrantUrl)
|
||||||
|
|
||||||
|
this.client = new QdrantClient({
|
||||||
|
host: parsedUrl.hostname,
|
||||||
|
port: parsedUrl.port ? parseInt(parsedUrl.port) : (parsedUrl.protocol === 'https:' ? 443 : 6333),
|
||||||
|
https: parsedUrl.protocol === 'https:',
|
||||||
|
apiKey: apiKey || process.env.QDRANT_API_KEY,
|
||||||
|
})
|
||||||
|
this.collectionName = collectionName || COLLECTION_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
async ensureCollection(): Promise<void> {
|
||||||
|
const collections = await this.client.getCollections()
|
||||||
|
const exists = collections.collections.some(c => c.name === this.collectionName)
|
||||||
|
|
||||||
|
if (!exists) {
|
||||||
|
await this.client.createCollection(this.collectionName, {
|
||||||
|
vectors: {
|
||||||
|
size: VECTOR_DIM,
|
||||||
|
distance: 'Cosine',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await this.client.createPayloadIndex(this.collectionName, {
|
||||||
|
field_name: 'subreddit',
|
||||||
|
field_schema: 'keyword',
|
||||||
|
})
|
||||||
|
|
||||||
|
await this.client.createPayloadIndex(this.collectionName, {
|
||||||
|
field_name: 'type',
|
||||||
|
field_schema: 'keyword',
|
||||||
|
})
|
||||||
|
|
||||||
|
await this.client.createPayloadIndex(this.collectionName, {
|
||||||
|
field_name: 'created',
|
||||||
|
field_schema: 'integer',
|
||||||
|
})
|
||||||
|
|
||||||
|
await this.client.createPayloadIndex(this.collectionName, {
|
||||||
|
field_name: 'score',
|
||||||
|
field_schema: 'integer',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async upsertPoints(points: EmbeddedPoint[]): Promise<void> {
|
||||||
|
for (let i = 0; i < points.length; i += BATCH_SIZE) {
|
||||||
|
const batch = points.slice(i, i + BATCH_SIZE)
|
||||||
|
|
||||||
|
await this.client.upsert(this.collectionName, {
|
||||||
|
wait: true,
|
||||||
|
points: batch.map((point, idx) => ({
|
||||||
|
id: i + idx,
|
||||||
|
vector: point.vector,
|
||||||
|
payload: point.payload as unknown as Record<string, unknown>,
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(
|
||||||
|
vector: number[],
|
||||||
|
limit: number = 10,
|
||||||
|
filters?: {
|
||||||
|
subreddit?: string
|
||||||
|
type?: 'post' | 'comment'
|
||||||
|
minScore?: number
|
||||||
|
afterDate?: number
|
||||||
|
}
|
||||||
|
): Promise<SearchResult[]> {
|
||||||
|
const must: any[] = []
|
||||||
|
|
||||||
|
if (filters?.subreddit) {
|
||||||
|
must.push({
|
||||||
|
key: 'subreddit',
|
||||||
|
match: { value: filters.subreddit },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filters?.type) {
|
||||||
|
must.push({
|
||||||
|
key: 'type',
|
||||||
|
match: { value: filters.type },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filters?.minScore !== undefined) {
|
||||||
|
must.push({
|
||||||
|
key: 'score',
|
||||||
|
range: { gte: filters.minScore },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filters?.afterDate !== undefined) {
|
||||||
|
must.push({
|
||||||
|
key: 'created',
|
||||||
|
range: { gte: filters.afterDate },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await this.client.search(this.collectionName, {
|
||||||
|
vector,
|
||||||
|
limit,
|
||||||
|
with_payload: true,
|
||||||
|
filter: must.length > 0 ? { must } : undefined,
|
||||||
|
})
|
||||||
|
|
||||||
|
return results.map(r => ({
|
||||||
|
id: String(r.id),
|
||||||
|
score: r.score,
|
||||||
|
payload: r.payload as unknown as PointPayload,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async getStats(): Promise<CollectionStats> {
|
||||||
|
try {
|
||||||
|
const info = await this.client.getCollection(this.collectionName)
|
||||||
|
const pointCount = info.points_count || 0
|
||||||
|
|
||||||
|
const postCount = await this.client.count(this.collectionName, {
|
||||||
|
filter: { must: [{ key: 'type', match: { value: 'post' } }] },
|
||||||
|
})
|
||||||
|
|
||||||
|
const commentCount = await this.client.count(this.collectionName, {
|
||||||
|
filter: { must: [{ key: 'type', match: { value: 'comment' } }] },
|
||||||
|
})
|
||||||
|
|
||||||
|
const scroll = await this.client.scroll(this.collectionName, {
|
||||||
|
limit: 1000,
|
||||||
|
with_payload: { include: ['subreddit'] },
|
||||||
|
})
|
||||||
|
|
||||||
|
const subreddits = new Set<string>()
|
||||||
|
for (const point of scroll.points) {
|
||||||
|
const payload = point.payload as { subreddit?: string }
|
||||||
|
if (payload?.subreddit) {
|
||||||
|
subreddits.add(payload.subreddit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
posts: postCount.count,
|
||||||
|
comments: commentCount.count,
|
||||||
|
subreddits: Array.from(subreddits),
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
return { posts: 0, comments: 0, subreddits: [] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async deleteCollection(): Promise<void> {
|
||||||
|
try {
|
||||||
|
await this.client.deleteCollection(this.collectionName)
|
||||||
|
} catch {
|
||||||
|
// collection might not exist
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async checkConnection(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await this.client.getCollections()
|
||||||
|
return true
|
||||||
|
} catch {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export { COLLECTION_NAME }
|
||||||
24
src/storage/types.ts
Normal file
24
src/storage/types.ts
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
export interface PointPayload {
|
||||||
|
id: string
|
||||||
|
type: 'post' | 'comment'
|
||||||
|
subreddit: string
|
||||||
|
title?: string
|
||||||
|
author: string
|
||||||
|
body: string
|
||||||
|
score: number
|
||||||
|
created: number
|
||||||
|
permalink: string
|
||||||
|
parent_id?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SearchResult {
|
||||||
|
id: string
|
||||||
|
score: number
|
||||||
|
payload: PointPayload
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CollectionStats {
|
||||||
|
posts: number
|
||||||
|
comments: number
|
||||||
|
subreddits: string[]
|
||||||
|
}
|
||||||
268
src/tui/app.ts
Normal file
268
src/tui/app.ts
Normal file
@ -0,0 +1,268 @@
|
|||||||
|
import {
|
||||||
|
createCliRenderer,
|
||||||
|
BoxRenderable,
|
||||||
|
TextRenderable,
|
||||||
|
type KeyEvent,
|
||||||
|
} from '@opentui/core'
|
||||||
|
|
||||||
|
import { RedditScraper } from '../scraper/reddit'
|
||||||
|
import { CommentFetcher } from '../scraper/comments'
|
||||||
|
import { EmbeddingPipeline } from '../embeddings/ollama'
|
||||||
|
import { QdrantStorage } from '../storage/qdrant'
|
||||||
|
import type { RedditComment } from '../scraper/types'
|
||||||
|
import type { SearchResult } from '../storage/types'
|
||||||
|
|
||||||
|
import { createUrlInput, focusUrlInput } from './components/url-input'
|
||||||
|
import { createProgressPanel, updateProgress, resetProgress } from './components/progress'
|
||||||
|
import { createStatsPanel, updateStats } from './components/stats'
|
||||||
|
import { createTrendingPanel, updateTrending } from './components/trending'
|
||||||
|
import { createSearchPanel, updateSearchResults, focusSearch } from './components/search'
|
||||||
|
import { createExportBar, exportToJson, exportToCsv } from './components/export'
|
||||||
|
|
||||||
|
export interface AppState {
|
||||||
|
url: string
|
||||||
|
pages: number
|
||||||
|
postsPerPage: number
|
||||||
|
isRunning: boolean
|
||||||
|
lastResults: SearchResult[]
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createApp() {
|
||||||
|
const renderer = await createCliRenderer({ exitOnCtrlC: false })
|
||||||
|
|
||||||
|
const state: AppState = {
|
||||||
|
url: '',
|
||||||
|
pages: 5,
|
||||||
|
postsPerPage: 100,
|
||||||
|
isRunning: false,
|
||||||
|
lastResults: [],
|
||||||
|
}
|
||||||
|
|
||||||
|
const scraper = new RedditScraper(3000)
|
||||||
|
const commentFetcher = new CommentFetcher(3000)
|
||||||
|
const embeddings = new EmbeddingPipeline()
|
||||||
|
const storage = new QdrantStorage()
|
||||||
|
|
||||||
|
const root = new BoxRenderable(renderer, {
|
||||||
|
id: 'root',
|
||||||
|
flexDirection: 'column',
|
||||||
|
padding: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
const header = new BoxRenderable(renderer, {
|
||||||
|
id: 'header',
|
||||||
|
flexDirection: 'row',
|
||||||
|
justifyContent: 'space-between',
|
||||||
|
paddingBottom: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
header.add(new TextRenderable(renderer, {
|
||||||
|
id: 'title',
|
||||||
|
content: ' reddit trend analyzer',
|
||||||
|
}))
|
||||||
|
|
||||||
|
header.add(new TextRenderable(renderer, {
|
||||||
|
id: 'quit-hint-header',
|
||||||
|
content: '[q]uit ',
|
||||||
|
}))
|
||||||
|
|
||||||
|
root.add(header)
|
||||||
|
|
||||||
|
let progressPanel: BoxRenderable
|
||||||
|
let statsPanel: BoxRenderable
|
||||||
|
let trendingPanel: BoxRenderable
|
||||||
|
let searchPanel: BoxRenderable
|
||||||
|
|
||||||
|
const urlInput = createUrlInput(renderer, {
|
||||||
|
onUrlSubmit: (url) => { state.url = url },
|
||||||
|
onPagesChange: (pages) => { state.pages = pages },
|
||||||
|
onPostsPerPageChange: (count) => { state.postsPerPage = count },
|
||||||
|
onStartScrape: () => runScrape(),
|
||||||
|
})
|
||||||
|
root.add(urlInput)
|
||||||
|
|
||||||
|
progressPanel = createProgressPanel(renderer)
|
||||||
|
root.add(progressPanel)
|
||||||
|
|
||||||
|
const middleRow = new BoxRenderable(renderer, {
|
||||||
|
id: 'middle-row',
|
||||||
|
flexDirection: 'row',
|
||||||
|
gap: 2,
|
||||||
|
})
|
||||||
|
|
||||||
|
statsPanel = createStatsPanel(renderer)
|
||||||
|
middleRow.add(statsPanel)
|
||||||
|
|
||||||
|
trendingPanel = createTrendingPanel(renderer)
|
||||||
|
middleRow.add(trendingPanel)
|
||||||
|
|
||||||
|
root.add(middleRow)
|
||||||
|
|
||||||
|
searchPanel = createSearchPanel(renderer, {
|
||||||
|
onSearch: async (query) => {
|
||||||
|
if (state.isRunning) return
|
||||||
|
try {
|
||||||
|
const vector = await embeddings.embed(query)
|
||||||
|
const results = await storage.search(vector, 10)
|
||||||
|
state.lastResults = results
|
||||||
|
updateSearchResults(searchPanel, results)
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Search error:', err)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
root.add(searchPanel)
|
||||||
|
|
||||||
|
const exportBar = createExportBar(renderer)
|
||||||
|
root.add(exportBar)
|
||||||
|
|
||||||
|
renderer.root.add(root)
|
||||||
|
|
||||||
|
async function runScrape() {
|
||||||
|
if (state.isRunning || !state.url) return
|
||||||
|
state.isRunning = true
|
||||||
|
resetProgress(progressPanel)
|
||||||
|
|
||||||
|
try {
|
||||||
|
await storage.ensureCollection()
|
||||||
|
|
||||||
|
scraper.setProgressCallback((p) => {
|
||||||
|
updateProgress(progressPanel, {
|
||||||
|
phase: p.phase,
|
||||||
|
current: p.current,
|
||||||
|
total: p.total,
|
||||||
|
message: p.message,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
const posts = await scraper.fetchPosts({
|
||||||
|
url: state.url,
|
||||||
|
pages: state.pages,
|
||||||
|
postsPerPage: state.postsPerPage,
|
||||||
|
fetchComments: true,
|
||||||
|
delayMs: 3000,
|
||||||
|
})
|
||||||
|
|
||||||
|
commentFetcher.setProgressCallback((p) => {
|
||||||
|
updateProgress(progressPanel, {
|
||||||
|
phase: 'comments',
|
||||||
|
current: p.current,
|
||||||
|
total: p.total,
|
||||||
|
message: p.message,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
const commentsByPost = await commentFetcher.fetchAllComments(posts)
|
||||||
|
|
||||||
|
embeddings.setProgressCallback((p) => {
|
||||||
|
updateProgress(progressPanel, {
|
||||||
|
phase: 'embedding',
|
||||||
|
current: p.current,
|
||||||
|
total: p.total,
|
||||||
|
message: p.message,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
const postPoints = await embeddings.embedPosts(posts, commentsByPost)
|
||||||
|
await storage.upsertPoints(postPoints)
|
||||||
|
|
||||||
|
const allComments: RedditComment[] = []
|
||||||
|
for (const comments of commentsByPost.values()) {
|
||||||
|
allComments.push(...comments)
|
||||||
|
}
|
||||||
|
|
||||||
|
const commentPoints = await embeddings.embedComments(allComments)
|
||||||
|
await storage.upsertPoints(commentPoints)
|
||||||
|
|
||||||
|
const stats = await storage.getStats()
|
||||||
|
updateStats(statsPanel, stats)
|
||||||
|
|
||||||
|
updateTrending(trendingPanel, [
|
||||||
|
{ title: 'scrape complete', count: postPoints.length, avgScore: 0 },
|
||||||
|
])
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Scrape error:', err)
|
||||||
|
updateTrending(trendingPanel, [
|
||||||
|
{ title: `error: ${err instanceof Error ? err.message : 'unknown'}`, count: 0, avgScore: 0 },
|
||||||
|
])
|
||||||
|
} finally {
|
||||||
|
state.isRunning = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function refreshStats() {
|
||||||
|
try {
|
||||||
|
const stats = await storage.getStats()
|
||||||
|
updateStats(statsPanel, stats)
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Stats refresh error:', err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
renderer.keyInput.on('keypress', async (key: KeyEvent) => {
|
||||||
|
const urlInputEl = urlInput.getRenderable('url-input')
|
||||||
|
const searchInputEl = searchPanel.getRenderable('search-input')
|
||||||
|
const inputFocused = urlInputEl?.focused || searchInputEl?.focused
|
||||||
|
|
||||||
|
// always allow quit
|
||||||
|
if (key.ctrl && key.name === 'c') {
|
||||||
|
renderer.destroy()
|
||||||
|
process.exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tab always switches focus
|
||||||
|
if (key.name === 'tab') {
|
||||||
|
if (urlInputEl?.focused) {
|
||||||
|
searchInputEl?.focus()
|
||||||
|
} else {
|
||||||
|
urlInputEl?.focus()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// escape unfocuses inputs
|
||||||
|
if (key.name === 'escape' && inputFocused) {
|
||||||
|
urlInputEl?.blur?.()
|
||||||
|
searchInputEl?.blur?.()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// only handle hotkeys when no input is focused
|
||||||
|
if (!inputFocused) {
|
||||||
|
if (key.name === 'q') {
|
||||||
|
renderer.destroy()
|
||||||
|
process.exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key.name === 'e' && !state.isRunning && state.lastResults.length > 0) {
|
||||||
|
await exportToJson(state.lastResults)
|
||||||
|
updateTrending(trendingPanel, [
|
||||||
|
{ title: 'exported to reddit-trends.json', count: 0, avgScore: 0 },
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key.name === 'c' && !state.isRunning && state.lastResults.length > 0) {
|
||||||
|
await exportToCsv(state.lastResults)
|
||||||
|
updateTrending(trendingPanel, [
|
||||||
|
{ title: 'exported to reddit-trends.csv', count: 0, avgScore: 0 },
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key.name === 'r' && !state.isRunning) {
|
||||||
|
await refreshStats()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// enter starts scrape (works even with input focused if url is set)
|
||||||
|
if (key.name === 'return' && !state.isRunning && state.url) {
|
||||||
|
await runScrape()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
focusUrlInput(urlInput)
|
||||||
|
|
||||||
|
await refreshStats()
|
||||||
|
|
||||||
|
return { renderer, state }
|
||||||
|
}
|
||||||
92
src/tui/components/export.ts
Normal file
92
src/tui/components/export.ts
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
import {
|
||||||
|
BoxRenderable,
|
||||||
|
TextRenderable,
|
||||||
|
type RenderContext,
|
||||||
|
} from '@opentui/core'
|
||||||
|
import type { SearchResult } from '../../storage/types'
|
||||||
|
|
||||||
|
export function createExportBar(renderer: RenderContext): BoxRenderable {
|
||||||
|
const container = new BoxRenderable(renderer, {
|
||||||
|
id: 'export-bar',
|
||||||
|
flexDirection: 'row',
|
||||||
|
gap: 2,
|
||||||
|
padding: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'export-json',
|
||||||
|
content: '[e]xport json',
|
||||||
|
}))
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'export-csv',
|
||||||
|
content: '[c]sv',
|
||||||
|
}))
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'refresh-stats',
|
||||||
|
content: '[r]efresh stats',
|
||||||
|
}))
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'quit-hint',
|
||||||
|
content: '[q]uit',
|
||||||
|
}))
|
||||||
|
|
||||||
|
return container
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function exportToJson(
|
||||||
|
results: SearchResult[],
|
||||||
|
filename: string = 'reddit-trends.json'
|
||||||
|
): Promise<void> {
|
||||||
|
const data = results.map(r => ({
|
||||||
|
id: r.payload.id,
|
||||||
|
type: r.payload.type,
|
||||||
|
subreddit: r.payload.subreddit,
|
||||||
|
title: r.payload.title,
|
||||||
|
body: r.payload.body,
|
||||||
|
author: r.payload.author,
|
||||||
|
score: r.payload.score,
|
||||||
|
created: new Date(r.payload.created * 1000).toISOString(),
|
||||||
|
permalink: r.payload.permalink,
|
||||||
|
similarity: r.score,
|
||||||
|
}))
|
||||||
|
|
||||||
|
await Bun.write(filename, JSON.stringify(data, null, 2))
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function exportToCsv(
|
||||||
|
results: SearchResult[],
|
||||||
|
filename: string = 'reddit-trends.csv'
|
||||||
|
): Promise<void> {
|
||||||
|
const headers = [
|
||||||
|
'id', 'type', 'subreddit', 'title', 'body', 'author',
|
||||||
|
'score', 'created', 'permalink', 'similarity'
|
||||||
|
]
|
||||||
|
|
||||||
|
const escape = (val: string | number | undefined): string => {
|
||||||
|
if (val === undefined) return ''
|
||||||
|
const str = String(val)
|
||||||
|
if (str.includes(',') || str.includes('"') || str.includes('\n')) {
|
||||||
|
return `"${str.replace(/"/g, '""')}"`
|
||||||
|
}
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
|
||||||
|
const rows = results.map(r => [
|
||||||
|
r.payload.id,
|
||||||
|
r.payload.type,
|
||||||
|
r.payload.subreddit,
|
||||||
|
r.payload.title || '',
|
||||||
|
r.payload.body,
|
||||||
|
r.payload.author,
|
||||||
|
r.payload.score,
|
||||||
|
new Date(r.payload.created * 1000).toISOString(),
|
||||||
|
r.payload.permalink,
|
||||||
|
r.score.toFixed(4),
|
||||||
|
].map(escape).join(','))
|
||||||
|
|
||||||
|
const csv = [headers.join(','), ...rows].join('\n')
|
||||||
|
await Bun.write(filename, csv)
|
||||||
|
}
|
||||||
77
src/tui/components/progress.ts
Normal file
77
src/tui/components/progress.ts
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import {
|
||||||
|
BoxRenderable,
|
||||||
|
TextRenderable,
|
||||||
|
type RenderContext,
|
||||||
|
} from '@opentui/core'
|
||||||
|
|
||||||
|
export interface ProgressState {
|
||||||
|
phase: string
|
||||||
|
current: number
|
||||||
|
total: number
|
||||||
|
message: string
|
||||||
|
}
|
||||||
|
|
||||||
|
function createProgressBar(percent: number, width: number = 20): string {
|
||||||
|
const filled = Math.round((percent / 100) * width)
|
||||||
|
const empty = width - filled
|
||||||
|
return '█'.repeat(filled) + '░'.repeat(empty)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createProgressPanel(renderer: RenderContext): BoxRenderable {
|
||||||
|
const container = new BoxRenderable(renderer, {
|
||||||
|
id: 'progress-panel',
|
||||||
|
border: true,
|
||||||
|
title: ' progress ',
|
||||||
|
flexDirection: 'column',
|
||||||
|
padding: 1,
|
||||||
|
gap: 0,
|
||||||
|
height: 5,
|
||||||
|
})
|
||||||
|
|
||||||
|
const fetchLine = new TextRenderable(renderer, {
|
||||||
|
id: 'fetch-progress',
|
||||||
|
content: 'waiting...',
|
||||||
|
})
|
||||||
|
container.add(fetchLine)
|
||||||
|
|
||||||
|
const embedLine = new TextRenderable(renderer, {
|
||||||
|
id: 'embed-progress',
|
||||||
|
content: '',
|
||||||
|
})
|
||||||
|
container.add(embedLine)
|
||||||
|
|
||||||
|
return container
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateProgress(
|
||||||
|
container: BoxRenderable,
|
||||||
|
state: ProgressState
|
||||||
|
): void {
|
||||||
|
const percent = state.total > 0 ? Math.round((state.current / state.total) * 100) : 0
|
||||||
|
const bar = createProgressBar(percent)
|
||||||
|
const text = `${state.message} ${state.current}/${state.total} ${bar} ${percent}%`
|
||||||
|
|
||||||
|
if (state.phase === 'posts' || state.phase === 'comments') {
|
||||||
|
const fetchText = container.getRenderable('fetch-progress') as TextRenderable
|
||||||
|
if (fetchText) {
|
||||||
|
fetchText.content = text
|
||||||
|
}
|
||||||
|
} else if (state.phase === 'embedding') {
|
||||||
|
const embedText = container.getRenderable('embed-progress') as TextRenderable
|
||||||
|
if (embedText) {
|
||||||
|
embedText.content = text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function resetProgress(container: BoxRenderable): void {
|
||||||
|
const fetchText = container.getRenderable('fetch-progress') as TextRenderable
|
||||||
|
const embedText = container.getRenderable('embed-progress') as TextRenderable
|
||||||
|
|
||||||
|
if (fetchText) {
|
||||||
|
fetchText.content = 'waiting...'
|
||||||
|
}
|
||||||
|
if (embedText) {
|
||||||
|
embedText.content = ''
|
||||||
|
}
|
||||||
|
}
|
||||||
94
src/tui/components/search.ts
Normal file
94
src/tui/components/search.ts
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
import {
|
||||||
|
BoxRenderable,
|
||||||
|
TextRenderable,
|
||||||
|
InputRenderable,
|
||||||
|
InputRenderableEvents,
|
||||||
|
type RenderContext,
|
||||||
|
} from '@opentui/core'
|
||||||
|
import type { SearchResult } from '../../storage/types'
|
||||||
|
|
||||||
|
export interface SearchConfig {
|
||||||
|
onSearch: (query: string) => Promise<void>
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createSearchPanel(
|
||||||
|
renderer: RenderContext,
|
||||||
|
config: SearchConfig
|
||||||
|
): BoxRenderable {
|
||||||
|
const container = new BoxRenderable(renderer, {
|
||||||
|
id: 'search-panel',
|
||||||
|
border: true,
|
||||||
|
title: ' search ',
|
||||||
|
flexDirection: 'column',
|
||||||
|
padding: 1,
|
||||||
|
gap: 1,
|
||||||
|
height: 12,
|
||||||
|
})
|
||||||
|
|
||||||
|
const queryRow = new BoxRenderable(renderer, {
|
||||||
|
id: 'query-row',
|
||||||
|
flexDirection: 'row',
|
||||||
|
gap: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
queryRow.add(new TextRenderable(renderer, {
|
||||||
|
id: 'query-label',
|
||||||
|
content: 'query:',
|
||||||
|
width: 7,
|
||||||
|
}))
|
||||||
|
|
||||||
|
const queryInput = new InputRenderable(renderer, {
|
||||||
|
id: 'search-input',
|
||||||
|
width: 45,
|
||||||
|
placeholder: 'semantic search...',
|
||||||
|
cursorColor: '#00FF00',
|
||||||
|
focusedBackgroundColor: '#1a1a1a',
|
||||||
|
})
|
||||||
|
|
||||||
|
queryInput.on(InputRenderableEvents.CHANGE, async (value: string) => {
|
||||||
|
if (value.trim()) {
|
||||||
|
await config.onSearch(value)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
queryRow.add(queryInput)
|
||||||
|
container.add(queryRow)
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'results-label',
|
||||||
|
content: 'results:',
|
||||||
|
}))
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'results-content',
|
||||||
|
content: '',
|
||||||
|
}))
|
||||||
|
|
||||||
|
return container
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateSearchResults(
|
||||||
|
container: BoxRenderable,
|
||||||
|
results: SearchResult[]
|
||||||
|
): void {
|
||||||
|
const resultsContent = container.getRenderable('results-content') as TextRenderable
|
||||||
|
if (!resultsContent) return
|
||||||
|
|
||||||
|
if (results.length === 0) {
|
||||||
|
resultsContent.content = 'no results'
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = results.slice(0, 5).map(result => {
|
||||||
|
const preview = (result.payload.title || result.payload.body || '')
|
||||||
|
.slice(0, 50)
|
||||||
|
.replace(/\n/g, ' ')
|
||||||
|
return `> "${preview}..." (${result.score.toFixed(2)})`
|
||||||
|
}).join('\n')
|
||||||
|
|
||||||
|
resultsContent.content = lines
|
||||||
|
}
|
||||||
|
|
||||||
|
export function focusSearch(container: BoxRenderable): void {
|
||||||
|
container.getRenderable('search-input')?.focus()
|
||||||
|
}
|
||||||
51
src/tui/components/stats.ts
Normal file
51
src/tui/components/stats.ts
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import {
|
||||||
|
BoxRenderable,
|
||||||
|
TextRenderable,
|
||||||
|
type RenderContext,
|
||||||
|
} from '@opentui/core'
|
||||||
|
import type { CollectionStats } from '../../storage/types'
|
||||||
|
|
||||||
|
export function createStatsPanel(renderer: RenderContext): BoxRenderable {
|
||||||
|
const container = new BoxRenderable(renderer, {
|
||||||
|
id: 'stats-panel',
|
||||||
|
border: true,
|
||||||
|
title: ' stats ',
|
||||||
|
flexDirection: 'column',
|
||||||
|
padding: 1,
|
||||||
|
width: 20,
|
||||||
|
height: 7,
|
||||||
|
})
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'posts-count',
|
||||||
|
content: 'posts: 0',
|
||||||
|
}))
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'comments-count',
|
||||||
|
content: 'comments: 0',
|
||||||
|
}))
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'subreddits-count',
|
||||||
|
content: 'subreddits: 0',
|
||||||
|
}))
|
||||||
|
|
||||||
|
return container
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateStats(container: BoxRenderable, stats: CollectionStats): void {
|
||||||
|
const posts = container.getRenderable('posts-count') as TextRenderable
|
||||||
|
const comments = container.getRenderable('comments-count') as TextRenderable
|
||||||
|
const subreddits = container.getRenderable('subreddits-count') as TextRenderable
|
||||||
|
|
||||||
|
if (posts) {
|
||||||
|
posts.content = `posts: ${stats.posts.toLocaleString()}`
|
||||||
|
}
|
||||||
|
if (comments) {
|
||||||
|
comments.content = `comments: ${stats.comments.toLocaleString()}`
|
||||||
|
}
|
||||||
|
if (subreddits) {
|
||||||
|
subreddits.content = `subreddits: ${stats.subreddits.length}`
|
||||||
|
}
|
||||||
|
}
|
||||||
49
src/tui/components/trending.ts
Normal file
49
src/tui/components/trending.ts
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import {
|
||||||
|
BoxRenderable,
|
||||||
|
TextRenderable,
|
||||||
|
type RenderContext,
|
||||||
|
} from '@opentui/core'
|
||||||
|
|
||||||
|
export interface TrendingTopic {
|
||||||
|
title: string
|
||||||
|
count: number
|
||||||
|
avgScore: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createTrendingPanel(renderer: RenderContext): BoxRenderable {
|
||||||
|
const container = new BoxRenderable(renderer, {
|
||||||
|
id: 'trending-panel',
|
||||||
|
border: true,
|
||||||
|
title: ' trending / status ',
|
||||||
|
flexDirection: 'column',
|
||||||
|
padding: 1,
|
||||||
|
height: 10,
|
||||||
|
})
|
||||||
|
|
||||||
|
container.add(new TextRenderable(renderer, {
|
||||||
|
id: 'trending-content',
|
||||||
|
content: 'scrape data to see trends',
|
||||||
|
}))
|
||||||
|
|
||||||
|
return container
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateTrending(
|
||||||
|
container: BoxRenderable,
|
||||||
|
topics: TrendingTopic[]
|
||||||
|
): void {
|
||||||
|
const contentText = container.getRenderable('trending-content') as TextRenderable
|
||||||
|
if (!contentText) return
|
||||||
|
|
||||||
|
if (topics.length === 0) {
|
||||||
|
contentText.content = 'no trends found'
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const lines = topics
|
||||||
|
.slice(0, 8)
|
||||||
|
.map((topic, i) => `${i + 1}. ${topic.title}`)
|
||||||
|
.join('\n')
|
||||||
|
|
||||||
|
contentText.content = lines
|
||||||
|
}
|
||||||
115
src/tui/components/url-input.ts
Normal file
115
src/tui/components/url-input.ts
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
import {
|
||||||
|
BoxRenderable,
|
||||||
|
TextRenderable,
|
||||||
|
InputRenderable,
|
||||||
|
InputRenderableEvents,
|
||||||
|
type RenderContext,
|
||||||
|
} from '@opentui/core'
|
||||||
|
|
||||||
|
export interface UrlInputConfig {
|
||||||
|
onUrlSubmit: (url: string) => void
|
||||||
|
onPagesChange: (pages: number) => void
|
||||||
|
onPostsPerPageChange: (count: number) => void
|
||||||
|
onStartScrape: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createUrlInput(renderer: RenderContext, config: UrlInputConfig): BoxRenderable {
|
||||||
|
const container = new BoxRenderable(renderer, {
|
||||||
|
id: 'scrape-panel',
|
||||||
|
border: true,
|
||||||
|
title: ' scrape ',
|
||||||
|
flexDirection: 'column',
|
||||||
|
padding: 1,
|
||||||
|
gap: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
const urlRow = new BoxRenderable(renderer, {
|
||||||
|
id: 'url-row',
|
||||||
|
flexDirection: 'row',
|
||||||
|
gap: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
urlRow.add(new TextRenderable(renderer, {
|
||||||
|
id: 'url-label',
|
||||||
|
content: 'url:',
|
||||||
|
width: 5,
|
||||||
|
}))
|
||||||
|
|
||||||
|
const urlInput = new InputRenderable(renderer, {
|
||||||
|
id: 'url-input',
|
||||||
|
width: 50,
|
||||||
|
placeholder: 'https://reddit.com/r/________/best',
|
||||||
|
cursorColor: '#00FF00',
|
||||||
|
focusedBackgroundColor: '#1a1a1a',
|
||||||
|
})
|
||||||
|
|
||||||
|
urlInput.on(InputRenderableEvents.CHANGE, (value: string) => {
|
||||||
|
config.onUrlSubmit(value)
|
||||||
|
})
|
||||||
|
|
||||||
|
urlRow.add(urlInput)
|
||||||
|
container.add(urlRow)
|
||||||
|
|
||||||
|
const optionsRow = new BoxRenderable(renderer, {
|
||||||
|
id: 'options-row',
|
||||||
|
flexDirection: 'row',
|
||||||
|
gap: 2,
|
||||||
|
})
|
||||||
|
|
||||||
|
optionsRow.add(new TextRenderable(renderer, {
|
||||||
|
id: 'pages-label',
|
||||||
|
content: 'pages:',
|
||||||
|
}))
|
||||||
|
|
||||||
|
const pagesInput = new InputRenderable(renderer, {
|
||||||
|
id: 'pages-input',
|
||||||
|
width: 5,
|
||||||
|
placeholder: '5',
|
||||||
|
cursorColor: '#00FF00',
|
||||||
|
focusedBackgroundColor: '#1a1a1a',
|
||||||
|
})
|
||||||
|
|
||||||
|
pagesInput.on(InputRenderableEvents.CHANGE, (value: string) => {
|
||||||
|
const num = parseInt(value, 10)
|
||||||
|
if (!isNaN(num) && num > 0) {
|
||||||
|
config.onPagesChange(num)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
optionsRow.add(pagesInput)
|
||||||
|
|
||||||
|
optionsRow.add(new TextRenderable(renderer, {
|
||||||
|
id: 'posts-label',
|
||||||
|
content: 'posts/page:',
|
||||||
|
}))
|
||||||
|
|
||||||
|
const postsInput = new InputRenderable(renderer, {
|
||||||
|
id: 'posts-input',
|
||||||
|
width: 5,
|
||||||
|
placeholder: '100',
|
||||||
|
cursorColor: '#00FF00',
|
||||||
|
focusedBackgroundColor: '#1a1a1a',
|
||||||
|
})
|
||||||
|
|
||||||
|
postsInput.on(InputRenderableEvents.CHANGE, (value: string) => {
|
||||||
|
const num = parseInt(value, 10)
|
||||||
|
if (!isNaN(num) && num > 0) {
|
||||||
|
config.onPostsPerPageChange(num)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
optionsRow.add(postsInput)
|
||||||
|
|
||||||
|
optionsRow.add(new TextRenderable(renderer, {
|
||||||
|
id: 'start-hint',
|
||||||
|
content: '[enter to start]',
|
||||||
|
}))
|
||||||
|
|
||||||
|
container.add(optionsRow)
|
||||||
|
|
||||||
|
return container
|
||||||
|
}
|
||||||
|
|
||||||
|
export function focusUrlInput(container: BoxRenderable): void {
|
||||||
|
container.getRenderable('url-input')?.focus()
|
||||||
|
}
|
||||||
66
src/utils/rate-limit.ts
Normal file
66
src/utils/rate-limit.ts
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
export function delay(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms))
|
||||||
|
}
|
||||||
|
|
||||||
|
export class RateLimiter {
|
||||||
|
private lastRequest = 0
|
||||||
|
private minDelay: number
|
||||||
|
private backoffMultiplier = 1
|
||||||
|
|
||||||
|
constructor(minDelayMs: number = 5000) {
|
||||||
|
this.minDelay = minDelayMs
|
||||||
|
}
|
||||||
|
|
||||||
|
async wait(): Promise<void> {
|
||||||
|
const now = Date.now()
|
||||||
|
const effectiveDelay = this.minDelay * this.backoffMultiplier
|
||||||
|
const elapsed = now - this.lastRequest
|
||||||
|
if (elapsed < effectiveDelay) {
|
||||||
|
await delay(effectiveDelay - elapsed)
|
||||||
|
}
|
||||||
|
this.lastRequest = Date.now()
|
||||||
|
}
|
||||||
|
|
||||||
|
backoff(): void {
|
||||||
|
this.backoffMultiplier = Math.min(this.backoffMultiplier * 2, 10)
|
||||||
|
console.log(` rate limited, backing off to ${this.minDelay * this.backoffMultiplier}ms`)
|
||||||
|
}
|
||||||
|
|
||||||
|
reset(): void {
|
||||||
|
this.backoffMultiplier = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
setDelay(ms: number): void {
|
||||||
|
this.minDelay = ms
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchWithRetry<T>(
|
||||||
|
url: string,
|
||||||
|
options: RequestInit,
|
||||||
|
rateLimiter: RateLimiter,
|
||||||
|
maxRetries: number = 3
|
||||||
|
): Promise<T> {
|
||||||
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
|
await rateLimiter.wait()
|
||||||
|
|
||||||
|
const response = await fetch(url, options)
|
||||||
|
|
||||||
|
if (response.status === 429) {
|
||||||
|
rateLimiter.backoff()
|
||||||
|
if (attempt < maxRetries) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
throw new Error('Rate limited after max retries')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP error: ${response.status} ${response.statusText}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
rateLimiter.reset()
|
||||||
|
return response.json() as Promise<T>
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Max retries exceeded')
|
||||||
|
}
|
||||||
69
src/utils/text.ts
Normal file
69
src/utils/text.ts
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
const HTML_ENTITIES: Record<string, string> = {
|
||||||
|
'&': '&',
|
||||||
|
'<': '<',
|
||||||
|
'>': '>',
|
||||||
|
'"': '"',
|
||||||
|
''': "'",
|
||||||
|
' ': ' ',
|
||||||
|
'​': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
export function decodeHtmlEntities(text: string): string {
|
||||||
|
let result = text
|
||||||
|
for (const [entity, char] of Object.entries(HTML_ENTITIES)) {
|
||||||
|
result = result.replaceAll(entity, char)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
export function stripHtml(text: string): string {
|
||||||
|
return text
|
||||||
|
.replace(/<[^>]+>/g, '')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
export function cleanText(text: string): string {
|
||||||
|
return decodeHtmlEntities(stripHtml(text))
|
||||||
|
.replace(/\[deleted\]/gi, '')
|
||||||
|
.replace(/\[removed\]/gi, '')
|
||||||
|
.replace(/https?:\/\/\S+/g, '')
|
||||||
|
.replace(/\s+/g, ' ')
|
||||||
|
.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
export function truncateText(text: string, maxLength: number = 8000): string {
|
||||||
|
if (text.length <= maxLength) return text
|
||||||
|
return text.slice(0, maxLength) + '...'
|
||||||
|
}
|
||||||
|
|
||||||
|
export function prepareForEmbedding(
|
||||||
|
title: string,
|
||||||
|
body: string,
|
||||||
|
comments: string[] = []
|
||||||
|
): string {
|
||||||
|
const parts: string[] = []
|
||||||
|
|
||||||
|
if (title) {
|
||||||
|
parts.push(`Title: ${cleanText(title)}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (body) {
|
||||||
|
const cleanBody = cleanText(body)
|
||||||
|
if (cleanBody) {
|
||||||
|
parts.push(`Content: ${cleanBody}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (comments.length > 0) {
|
||||||
|
const topComments = comments
|
||||||
|
.slice(0, 5)
|
||||||
|
.map(c => cleanText(c))
|
||||||
|
.filter(c => c.length > 10)
|
||||||
|
if (topComments.length > 0) {
|
||||||
|
parts.push(`Discussion: ${topComments.join(' | ')}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return truncateText(parts.join('\n\n'))
|
||||||
|
}
|
||||||
29
tsconfig.json
Normal file
29
tsconfig.json
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
// Environment setup & latest features
|
||||||
|
"lib": ["ESNext"],
|
||||||
|
"target": "ESNext",
|
||||||
|
"module": "Preserve",
|
||||||
|
"moduleDetection": "force",
|
||||||
|
"jsx": "react-jsx",
|
||||||
|
"allowJs": true,
|
||||||
|
|
||||||
|
// Bundler mode
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"allowImportingTsExtensions": true,
|
||||||
|
"verbatimModuleSyntax": true,
|
||||||
|
"noEmit": true,
|
||||||
|
|
||||||
|
// Best practices
|
||||||
|
"strict": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"noFallthroughCasesInSwitch": true,
|
||||||
|
"noUncheckedIndexedAccess": true,
|
||||||
|
"noImplicitOverride": true,
|
||||||
|
|
||||||
|
// Some stricter flags (disabled by default)
|
||||||
|
"noUnusedLocals": false,
|
||||||
|
"noUnusedParameters": false,
|
||||||
|
"noPropertyAccessFromIndexSignature": false
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user