diff --git a/src/config.ts b/src/config.ts index b5f00d0..5a07d91 100644 --- a/src/config.ts +++ b/src/config.ts @@ -55,6 +55,24 @@ export const TWITTER_MAX_AGE_HOURS = 24; export const TWITTER_MAX_RESULTS = 15; export const TWITTER_SNIPPET_LENGTH = 280; +// --- Weekly Trends --- +export const WEEKLY_TRENDS_SUBREDDITS = [ + 'LocalLLaMA', + 'MachineLearning', + 'ClaudeAI', + 'ChatGPT', + 'artificial', + 'LangChain', + 'AutoGPT', +]; + +export const WEEKLY_TRENDS_SIMILARITY_THRESHOLD = 0.6; +export const WEEKLY_TRENDS_MIN_CLUSTER_SIZE = 3; +export const WEEKLY_TRENDS_MAX_CLUSTERS = 5; +export const WEEKLY_TRENDS_SNIPPET_LENGTH = 200; +export const WEEKLY_TRENDS_REDDIT_DELAY_MS = 2000; +export const WEEKLY_TRENDS_EMBED_DELAY_MS = 100; + // --- Claude Code Releases --- export const GITHUB_API = 'https://api.github.com'; export const NPM_REGISTRY = 'https://registry.npmjs.org'; diff --git a/src/feeds/weekly-trends.ts b/src/feeds/weekly-trends.ts new file mode 100644 index 0000000..4f6416f --- /dev/null +++ b/src/feeds/weekly-trends.ts @@ -0,0 +1,472 @@ +import { + WEEKLY_TRENDS_SUBREDDITS, + WEEKLY_TRENDS_SIMILARITY_THRESHOLD, + WEEKLY_TRENDS_MIN_CLUSTER_SIZE, + WEEKLY_TRENDS_MAX_CLUSTERS, + WEEKLY_TRENDS_SNIPPET_LENGTH, + WEEKLY_TRENDS_REDDIT_DELAY_MS, + WEEKLY_TRENDS_EMBED_DELAY_MS, + REDDIT_USER_AGENT, + REDDIT_POSTS_PER_PAGE, +} from '../config'; +import { log, sleep } from '../utils'; + +// --- Constants --- + +const OLLAMA_URL = process.env.OLLAMA_HOST || 'http://localhost:11434'; +const QDRANT_URL = process.env.QDRANT_URL || 'http://localhost:6333'; +const QDRANT_API_KEY = process.env.QDRANT_API_KEY || ''; +const COLLECTION = 'weekly-trends'; +const VECTOR_DIM = 768; +const EMBED_MODEL = 'nomic-embed-text'; +const SUMMARY_MODEL = 'glm-4.7-flash:latest'; +const FETCH_TIMEOUT_MS = 30_000; +const OLLAMA_TIMEOUT_MS = 120_000; + +// --- Types --- + +interface RedditPost { + id: string; + title: string; + selftext: string; + score: number; + num_comments: number; + created_utc: number; + permalink: string; + subreddit: string; +} + +interface EmbeddedPost { + id: string; + vector: number[]; + title: string; + subreddit: string; + score: number; + url: string; + text_snippet: string; +} + +interface ClusterResult { + posts: EmbeddedPost[]; + theme?: string; + description?: string; +} + +// --- Reddit Scraping --- + +async function fetchTopPosts( + subreddit: string +): Promise { + const url = + `https://www.reddit.com/r/${subreddit}/top.json` + + `?sort=top&t=week&limit=${REDDIT_POSTS_PER_PAGE}&raw_json=1`; + + const res = await fetch(url, { + headers: { + 'User-Agent': REDDIT_USER_AGENT, + Accept: 'application/json', + }, + signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + }); + + if (!res.ok) { + log(` [warn] r/${subreddit}/top: ${res.status}`); + return []; + } + + const json = await res.json(); + return (json.data?.children ?? []) + .filter((c: any) => c.kind === 't3') + .map((c: any) => c.data as RedditPost); +} + +async function scrapeAll(): Promise { + const allPosts: RedditPost[] = []; + + for (const sub of WEEKLY_TRENDS_SUBREDDITS) { + log(` r/${sub}...`); + try { + const posts = await fetchTopPosts(sub); + allPosts.push(...posts); + log(` ${posts.length} posts`); + } catch (e) { + log(` [error] r/${sub}: ${e}`); + } + await sleep(WEEKLY_TRENDS_REDDIT_DELAY_MS); + } + + return allPosts; +} + +// --- Ollama Embeddings --- + +async function embedText(text: string): Promise { + const res = await fetch(`${OLLAMA_URL}/api/embeddings`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: EMBED_MODEL, prompt: text }), + signal: AbortSignal.timeout(OLLAMA_TIMEOUT_MS), + }); + + if (!res.ok) { + throw new Error(`Ollama embed failed: ${res.status}`); + } + + const json = await res.json(); + return json.embedding as number[]; +} + +async function embedPosts( + posts: RedditPost[] +): Promise { + const results: EmbeddedPost[] = []; + + for (let i = 0; i < posts.length; i++) { + const post = posts[i]; + const text = [post.title, post.selftext] + .filter(Boolean) + .join('\n\n') + .slice(0, 2000); + + try { + const vector = await embedText(text); + results.push({ + id: post.id, + vector, + title: post.title, + subreddit: post.subreddit, + score: post.score, + url: `https://reddit.com${post.permalink}`, + text_snippet: snippet(post.selftext), + }); + } catch (e) { + log(` [embed error] ${post.id}: ${e}`); + } + + if (i % 50 === 0 && i > 0) { + log(` embedded ${i}/${posts.length}`); + } + await sleep(WEEKLY_TRENDS_EMBED_DELAY_MS); + } + + return results; +} + +// --- Qdrant Storage --- + +async function qdrantRequest( + path: string, + method = 'GET', + body?: unknown +): Promise { + const headers: Record = { 'Content-Type': 'application/json' }; + if (QDRANT_API_KEY) headers['api-key'] = QDRANT_API_KEY; + const opts: RequestInit = { + method, + headers, + signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + }; + if (body) opts.body = JSON.stringify(body); + + const res = await fetch(`${QDRANT_URL}${path}`, opts); + if (!res.ok && res.status !== 404) { + const text = await res.text(); + throw new Error(`Qdrant ${method} ${path}: ${res.status} ${text}`); + } + return res.json(); +} + +async function createCollection(): Promise { + // Delete if exists (fresh each run) + await qdrantRequest( + `/collections/${COLLECTION}`, + 'DELETE' + ).catch(() => {}); + + await qdrantRequest(`/collections/${COLLECTION}`, 'PUT', { + vectors: { size: VECTOR_DIM, distance: 'Cosine' }, + }); +} + +async function storePoints(posts: EmbeddedPost[]): Promise { + const BATCH = 100; + for (let i = 0; i < posts.length; i += BATCH) { + const batch = posts.slice(i, i + BATCH); + await qdrantRequest( + `/collections/${COLLECTION}/points`, + 'PUT', + { + points: batch.map((p, idx) => ({ + id: i + idx, + vector: p.vector, + payload: { + title: p.title, + subreddit: p.subreddit, + score: p.score, + url: p.url, + text_snippet: p.text_snippet, + }, + })), + } + ); + } +} + +async function deleteCollection(): Promise { + await qdrantRequest( + `/collections/${COLLECTION}`, + 'DELETE' + ).catch(() => {}); +} + +// --- Clustering --- + +function cosineSimilarity(a: number[], b: number[]): number { + let dot = 0; + let normA = 0; + let normB = 0; + + for (let i = 0; i < a.length; i++) { + const av = a[i] ?? 0; + const bv = b[i] ?? 0; + dot += av * bv; + normA += av * av; + normB += bv * bv; + } + + if (normA === 0 || normB === 0) return 0; + return dot / (Math.sqrt(normA) * Math.sqrt(normB)); +} + +function clusterPosts(posts: EmbeddedPost[]): EmbeddedPost[][] { + const n = posts.length; + const threshold = WEEKLY_TRENDS_SIMILARITY_THRESHOLD; + const minSize = WEEKLY_TRENDS_MIN_CLUSTER_SIZE; + + // Build adjacency via cosine similarity + const neighbors: number[][] = Array.from({ length: n }, () => []); + for (let i = 0; i < n; i++) { + for (let j = i + 1; j < n; j++) { + const sim = cosineSimilarity(posts[i].vector, posts[j].vector); + if (sim >= threshold) { + neighbors[i].push(j); + neighbors[j].push(i); + } + } + } + + // Connected components with BFS + const visited = new Set(); + const clusters: EmbeddedPost[][] = []; + + for (let i = 0; i < n; i++) { + if (visited.has(i)) continue; + if (neighbors[i].length === 0) continue; + + const component: number[] = []; + const queue = [i]; + visited.add(i); + + while (queue.length > 0) { + const curr = queue.shift()!; + component.push(curr); + + for (const nb of neighbors[curr]) { + if (!visited.has(nb)) { + visited.add(nb); + queue.push(nb); + } + } + } + + if (component.length >= minSize) { + clusters.push(component.map((idx) => posts[idx])); + } + } + + // Sort by size descending, take top N + clusters.sort((a, b) => b.length - a.length); + return clusters.slice(0, WEEKLY_TRENDS_MAX_CLUSTERS); +} + +// --- Summarization --- + +async function summarizeCluster( + posts: EmbeddedPost[] +): Promise<{ theme: string; description: string }> { + const titles = posts + .slice(0, 15) + .map((p) => `- [r/${p.subreddit}] ${p.title}`) + .join('\n'); + + const prompt = + 'Summarize the common theme/problem discussed in ' + + 'these posts:\n\n' + + titles + + '\n\nRespond with:\n' + + '1) A short theme title (5-10 words)\n' + + '2) A 1-2 sentence description of what people are ' + + 'discussing/struggling with'; + + const res = await fetch(`${OLLAMA_URL}/api/generate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: SUMMARY_MODEL, + prompt, + stream: false, + }), + signal: AbortSignal.timeout(OLLAMA_TIMEOUT_MS), + }); + + if (!res.ok) { + log(` [warn] summarize failed: ${res.status}`); + return { theme: 'Unknown Theme', description: '' }; + } + + const json = await res.json(); + const text = (json.response ?? '').trim(); + + // Parse response - look for numbered items or just split + const lines = text.split('\n').filter(Boolean); + const theme = lines[0] + ?.replace(/^1[.)]\s*/, '') + ?.replace(/\*+/g, '') + ?.trim() ?? 'Emerging Topic'; + const description = lines + .slice(1) + .join(' ') + .replace(/^2[.)]\s*/, '') + .replace(/\*+/g, '') + .trim(); + + return { theme, description }; +} + +// --- Formatting --- + +function snippet(text: string): string { + if (!text) return ''; + const clean = text.replace(/\n+/g, ' ').trim(); + if (clean.length <= WEEKLY_TRENDS_SNIPPET_LENGTH) return clean; + return clean.slice(0, WEEKLY_TRENDS_SNIPPET_LENGTH).trimEnd() + 'โ€ฆ'; +} + +function formatDateRange(): string { + const end = new Date(); + const start = new Date(); + start.setDate(start.getDate() - 7); + + const fmt = (d: Date) => + d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }); + return `${fmt(start)} โ€“ ${fmt(end)}, ${end.getFullYear()}`; +} + +function formatOutput( + clusters: ClusterResult[], + totalPosts: number +): string { + if (clusters.length === 0) { + return '*No significant trends detected this week.*'; + } + + const lines: string[] = []; + lines.push( + `## ๐Ÿ“Š Weekly AI Trend Analysis โ€” ${formatDateRange()}\n` + ); + + for (let i = 0; i < clusters.length; i++) { + const cluster = clusters[i]; + const avgScore = Math.round( + cluster.posts.reduce((s, p) => s + p.score, 0) / + cluster.posts.length + ); + + const topPosts = [...cluster.posts] + .sort((a, b) => b.score - a.score) + .slice(0, 3); + + lines.push(`### ${i + 1}. ${cluster.theme ?? 'Emerging Topic'}`); + if (cluster.description) { + lines.push(`> ${cluster.description}\n`); + } + lines.push( + `๐Ÿ“ ${cluster.posts.length} posts | ` + + `โฌ† ${avgScore} avg score\n` + ); + + for (const post of topPosts) { + lines.push( + `- [${post.title.slice(0, 80)}](${post.url}) ` + + `(r/${post.subreddit}, โฌ†${post.score})` + ); + } + lines.push(''); + } + + lines.push('---'); + lines.push( + `*${totalPosts} posts analyzed across ` + + `${WEEKLY_TRENDS_SUBREDDITS.length} subreddits ยท ` + + `${clusters.length} trend clusters found*` + ); + + return lines.join('\n'); +} + +// --- Entry --- + +export async function run(): Promise { + log('[weekly-trends] Starting weekly trend analysis...'); + + // 1. Scrape + log('[weekly-trends] Scraping top posts...'); + const posts = await scrapeAll(); + log(`[weekly-trends] ${posts.length} total posts scraped`); + + if (posts.length === 0) { + return '*No posts found for trend analysis.*'; + } + + // 2. Embed + log('[weekly-trends] Embedding posts...'); + const embedded = await embedPosts(posts); + log(`[weekly-trends] ${embedded.length} posts embedded`); + + if (embedded.length < WEEKLY_TRENDS_MIN_CLUSTER_SIZE) { + return '*Not enough posts to analyze trends.*'; + } + + // 3. Store in Qdrant + log('[weekly-trends] Storing in Qdrant...'); + await createCollection(); + await storePoints(embedded); + + // 4. Cluster + log('[weekly-trends] Clustering...'); + const clusters = clusterPosts(embedded); + log(`[weekly-trends] ${clusters.length} clusters found`); + + // 5. Summarize + log('[weekly-trends] Summarizing clusters...'); + const results: ClusterResult[] = []; + for (const cluster of clusters) { + const { theme, description } = await summarizeCluster(cluster); + results.push({ posts: cluster, theme, description }); + await sleep(WEEKLY_TRENDS_EMBED_DELAY_MS); + } + + // 6. Format + const output = formatOutput(results, posts.length); + + // 7. Cleanup + log('[weekly-trends] Cleaning up Qdrant collection...'); + await deleteCollection(); + + log('[weekly-trends] Done.'); + return output; +} + +if (import.meta.main) { + console.log(await run()); +} diff --git a/src/index.ts b/src/index.ts index ee376ae..95ee401 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,6 +3,7 @@ import { run as trending } from './feeds/github-trending'; import { run as newRepos } from './feeds/new-ai-repos'; import { run as claudeReleases } from './feeds/claude-code-releases'; import { run as twitter } from './feeds/twitter-ai'; +import { run as weeklyTrends } from './feeds/weekly-trends'; import { log } from './utils'; const COMMANDS: Record Promise> = { @@ -11,6 +12,7 @@ const COMMANDS: Record Promise> = { 'new-repos': newRepos, 'claude-releases': claudeReleases, twitter, + 'weekly-trends': weeklyTrends, }; async function main() { @@ -18,8 +20,8 @@ async function main() { if (!cmd || cmd === '--help' || cmd === '-h') { console.error( - 'Usage: bun run feed ' + - '' + 'Usage: bun run feed ' ); process.exit(1); } diff --git a/src/post.ts b/src/post.ts new file mode 100644 index 0000000..3804e98 --- /dev/null +++ b/src/post.ts @@ -0,0 +1,99 @@ +/** + * Runs a feed and posts output directly to Discord via webhook. + * Usage: bun run src/post.ts + * Zero LLM tokens - just fetch + format + post. + */ + +import { run as reddit } from './feeds/reddit-digest'; +import { run as trending } from './feeds/github-trending'; +import { run as newRepos } from './feeds/new-ai-repos'; +import { run as claudeReleases } from './feeds/claude-code-releases'; +import { run as twitter } from './feeds/twitter-ai'; +import { run as weeklyTrends } from './feeds/weekly-trends'; + +const FEEDS: Record Promise; webhookEnv: string }> = { + reddit: { run: reddit, webhookEnv: 'WEBHOOK_REDDIT' }, + trending: { run: trending, webhookEnv: 'WEBHOOK_TRENDING' }, + 'new-repos': { run: newRepos, webhookEnv: 'WEBHOOK_NEW_REPOS' }, + 'claude-releases': { run: claudeReleases, webhookEnv: 'WEBHOOK_CLAUDE_RELEASES' }, + twitter: { run: twitter, webhookEnv: 'WEBHOOK_TWITTER' }, + 'weekly-trends': { run: weeklyTrends, webhookEnv: 'WEBHOOK_WEEKLY_TRENDS' }, +}; + +async function postToWebhook(webhookUrl: string, content: string) { + // Discord max message length is 2000 + const chunks: string[] = []; + const lines = content.split('\n'); + let current = ''; + + for (const line of lines) { + if ((current + '\n' + line).length > 1900) { + if (current) chunks.push(current); + current = line; + } else { + current = current ? current + '\n' + line : line; + } + } + if (current) chunks.push(current); + + for (const chunk of chunks) { + const res = await fetch(webhookUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ content: chunk }), + }); + if (!res.ok) { + console.error(`Webhook post failed: ${res.status} ${await res.text()}`); + } + // Rate limit safety + await new Promise(r => setTimeout(r, 500)); + } +} + +async function runFeed(name: string) { + const feed = FEEDS[name]; + if (!feed) { + console.error(`Unknown feed: ${name}`); + return; + } + + const webhookUrl = process.env[feed.webhookEnv]; + if (!webhookUrl) { + console.error(`Missing env: ${feed.webhookEnv}`); + return; + } + + console.error(`Running ${name}...`); + const output = await feed.run(); + + if (!output) { + console.error(`${name}: no output (nothing new)`); + return; + } + + await postToWebhook(webhookUrl, output); + console.error(`${name}: posted to Discord`); +} + +async function main() { + const cmd = process.argv[2]; + + if (!cmd || cmd === '--help') { + console.error('Usage: bun run src/post.ts '); + process.exit(1); + } + + if (cmd === 'all') { + for (const name of Object.keys(FEEDS)) { + await runFeed(name); + } + return; + } + + await runFeed(cmd); +} + +main().catch(e => { + console.error(e); + process.exit(1); +});