feat(weekly-trends): add qdrant-powered trend analysis

pipeline: scrape → embed (nomic) → cluster → summarize (glm-4.7-flash)
posts weekly clustered themes to #weekly-trends channel.
uses remote qdrant at vectors.biohazardvfx.com.
also adds discord webhook posting support (src/post.ts).
This commit is contained in:
Nicholai Vogel 2026-01-24 01:57:18 -07:00
parent d13032d515
commit d048c1fb31
4 changed files with 593 additions and 2 deletions

View File

@ -55,6 +55,24 @@ export const TWITTER_MAX_AGE_HOURS = 24;
export const TWITTER_MAX_RESULTS = 15;
export const TWITTER_SNIPPET_LENGTH = 280;
// --- Weekly Trends ---
export const WEEKLY_TRENDS_SUBREDDITS = [
'LocalLLaMA',
'MachineLearning',
'ClaudeAI',
'ChatGPT',
'artificial',
'LangChain',
'AutoGPT',
];
export const WEEKLY_TRENDS_SIMILARITY_THRESHOLD = 0.6;
export const WEEKLY_TRENDS_MIN_CLUSTER_SIZE = 3;
export const WEEKLY_TRENDS_MAX_CLUSTERS = 5;
export const WEEKLY_TRENDS_SNIPPET_LENGTH = 200;
export const WEEKLY_TRENDS_REDDIT_DELAY_MS = 2000;
export const WEEKLY_TRENDS_EMBED_DELAY_MS = 100;
// --- Claude Code Releases ---
export const GITHUB_API = 'https://api.github.com';
export const NPM_REGISTRY = 'https://registry.npmjs.org';

472
src/feeds/weekly-trends.ts Normal file
View File

@ -0,0 +1,472 @@
import {
WEEKLY_TRENDS_SUBREDDITS,
WEEKLY_TRENDS_SIMILARITY_THRESHOLD,
WEEKLY_TRENDS_MIN_CLUSTER_SIZE,
WEEKLY_TRENDS_MAX_CLUSTERS,
WEEKLY_TRENDS_SNIPPET_LENGTH,
WEEKLY_TRENDS_REDDIT_DELAY_MS,
WEEKLY_TRENDS_EMBED_DELAY_MS,
REDDIT_USER_AGENT,
REDDIT_POSTS_PER_PAGE,
} from '../config';
import { log, sleep } from '../utils';
// --- Constants ---
const OLLAMA_URL = process.env.OLLAMA_HOST || 'http://localhost:11434';
const QDRANT_URL = process.env.QDRANT_URL || 'http://localhost:6333';
const QDRANT_API_KEY = process.env.QDRANT_API_KEY || '';
const COLLECTION = 'weekly-trends';
const VECTOR_DIM = 768;
const EMBED_MODEL = 'nomic-embed-text';
const SUMMARY_MODEL = 'glm-4.7-flash:latest';
const FETCH_TIMEOUT_MS = 30_000;
const OLLAMA_TIMEOUT_MS = 120_000;
// --- Types ---
interface RedditPost {
id: string;
title: string;
selftext: string;
score: number;
num_comments: number;
created_utc: number;
permalink: string;
subreddit: string;
}
interface EmbeddedPost {
id: string;
vector: number[];
title: string;
subreddit: string;
score: number;
url: string;
text_snippet: string;
}
interface ClusterResult {
posts: EmbeddedPost[];
theme?: string;
description?: string;
}
// --- Reddit Scraping ---
async function fetchTopPosts(
subreddit: string
): Promise<RedditPost[]> {
const url =
`https://www.reddit.com/r/${subreddit}/top.json` +
`?sort=top&t=week&limit=${REDDIT_POSTS_PER_PAGE}&raw_json=1`;
const res = await fetch(url, {
headers: {
'User-Agent': REDDIT_USER_AGENT,
Accept: 'application/json',
},
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
});
if (!res.ok) {
log(` [warn] r/${subreddit}/top: ${res.status}`);
return [];
}
const json = await res.json();
return (json.data?.children ?? [])
.filter((c: any) => c.kind === 't3')
.map((c: any) => c.data as RedditPost);
}
async function scrapeAll(): Promise<RedditPost[]> {
const allPosts: RedditPost[] = [];
for (const sub of WEEKLY_TRENDS_SUBREDDITS) {
log(` r/${sub}...`);
try {
const posts = await fetchTopPosts(sub);
allPosts.push(...posts);
log(` ${posts.length} posts`);
} catch (e) {
log(` [error] r/${sub}: ${e}`);
}
await sleep(WEEKLY_TRENDS_REDDIT_DELAY_MS);
}
return allPosts;
}
// --- Ollama Embeddings ---
async function embedText(text: string): Promise<number[]> {
const res = await fetch(`${OLLAMA_URL}/api/embeddings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: EMBED_MODEL, prompt: text }),
signal: AbortSignal.timeout(OLLAMA_TIMEOUT_MS),
});
if (!res.ok) {
throw new Error(`Ollama embed failed: ${res.status}`);
}
const json = await res.json();
return json.embedding as number[];
}
async function embedPosts(
posts: RedditPost[]
): Promise<EmbeddedPost[]> {
const results: EmbeddedPost[] = [];
for (let i = 0; i < posts.length; i++) {
const post = posts[i];
const text = [post.title, post.selftext]
.filter(Boolean)
.join('\n\n')
.slice(0, 2000);
try {
const vector = await embedText(text);
results.push({
id: post.id,
vector,
title: post.title,
subreddit: post.subreddit,
score: post.score,
url: `https://reddit.com${post.permalink}`,
text_snippet: snippet(post.selftext),
});
} catch (e) {
log(` [embed error] ${post.id}: ${e}`);
}
if (i % 50 === 0 && i > 0) {
log(` embedded ${i}/${posts.length}`);
}
await sleep(WEEKLY_TRENDS_EMBED_DELAY_MS);
}
return results;
}
// --- Qdrant Storage ---
async function qdrantRequest(
path: string,
method = 'GET',
body?: unknown
): Promise<any> {
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
if (QDRANT_API_KEY) headers['api-key'] = QDRANT_API_KEY;
const opts: RequestInit = {
method,
headers,
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
};
if (body) opts.body = JSON.stringify(body);
const res = await fetch(`${QDRANT_URL}${path}`, opts);
if (!res.ok && res.status !== 404) {
const text = await res.text();
throw new Error(`Qdrant ${method} ${path}: ${res.status} ${text}`);
}
return res.json();
}
async function createCollection(): Promise<void> {
// Delete if exists (fresh each run)
await qdrantRequest(
`/collections/${COLLECTION}`,
'DELETE'
).catch(() => {});
await qdrantRequest(`/collections/${COLLECTION}`, 'PUT', {
vectors: { size: VECTOR_DIM, distance: 'Cosine' },
});
}
async function storePoints(posts: EmbeddedPost[]): Promise<void> {
const BATCH = 100;
for (let i = 0; i < posts.length; i += BATCH) {
const batch = posts.slice(i, i + BATCH);
await qdrantRequest(
`/collections/${COLLECTION}/points`,
'PUT',
{
points: batch.map((p, idx) => ({
id: i + idx,
vector: p.vector,
payload: {
title: p.title,
subreddit: p.subreddit,
score: p.score,
url: p.url,
text_snippet: p.text_snippet,
},
})),
}
);
}
}
async function deleteCollection(): Promise<void> {
await qdrantRequest(
`/collections/${COLLECTION}`,
'DELETE'
).catch(() => {});
}
// --- Clustering ---
function cosineSimilarity(a: number[], b: number[]): number {
let dot = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
const av = a[i] ?? 0;
const bv = b[i] ?? 0;
dot += av * bv;
normA += av * av;
normB += bv * bv;
}
if (normA === 0 || normB === 0) return 0;
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}
function clusterPosts(posts: EmbeddedPost[]): EmbeddedPost[][] {
const n = posts.length;
const threshold = WEEKLY_TRENDS_SIMILARITY_THRESHOLD;
const minSize = WEEKLY_TRENDS_MIN_CLUSTER_SIZE;
// Build adjacency via cosine similarity
const neighbors: number[][] = Array.from({ length: n }, () => []);
for (let i = 0; i < n; i++) {
for (let j = i + 1; j < n; j++) {
const sim = cosineSimilarity(posts[i].vector, posts[j].vector);
if (sim >= threshold) {
neighbors[i].push(j);
neighbors[j].push(i);
}
}
}
// Connected components with BFS
const visited = new Set<number>();
const clusters: EmbeddedPost[][] = [];
for (let i = 0; i < n; i++) {
if (visited.has(i)) continue;
if (neighbors[i].length === 0) continue;
const component: number[] = [];
const queue = [i];
visited.add(i);
while (queue.length > 0) {
const curr = queue.shift()!;
component.push(curr);
for (const nb of neighbors[curr]) {
if (!visited.has(nb)) {
visited.add(nb);
queue.push(nb);
}
}
}
if (component.length >= minSize) {
clusters.push(component.map((idx) => posts[idx]));
}
}
// Sort by size descending, take top N
clusters.sort((a, b) => b.length - a.length);
return clusters.slice(0, WEEKLY_TRENDS_MAX_CLUSTERS);
}
// --- Summarization ---
async function summarizeCluster(
posts: EmbeddedPost[]
): Promise<{ theme: string; description: string }> {
const titles = posts
.slice(0, 15)
.map((p) => `- [r/${p.subreddit}] ${p.title}`)
.join('\n');
const prompt =
'Summarize the common theme/problem discussed in ' +
'these posts:\n\n' +
titles +
'\n\nRespond with:\n' +
'1) A short theme title (5-10 words)\n' +
'2) A 1-2 sentence description of what people are ' +
'discussing/struggling with';
const res = await fetch(`${OLLAMA_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: SUMMARY_MODEL,
prompt,
stream: false,
}),
signal: AbortSignal.timeout(OLLAMA_TIMEOUT_MS),
});
if (!res.ok) {
log(` [warn] summarize failed: ${res.status}`);
return { theme: 'Unknown Theme', description: '' };
}
const json = await res.json();
const text = (json.response ?? '').trim();
// Parse response - look for numbered items or just split
const lines = text.split('\n').filter(Boolean);
const theme = lines[0]
?.replace(/^1[.)]\s*/, '')
?.replace(/\*+/g, '')
?.trim() ?? 'Emerging Topic';
const description = lines
.slice(1)
.join(' ')
.replace(/^2[.)]\s*/, '')
.replace(/\*+/g, '')
.trim();
return { theme, description };
}
// --- Formatting ---
function snippet(text: string): string {
if (!text) return '';
const clean = text.replace(/\n+/g, ' ').trim();
if (clean.length <= WEEKLY_TRENDS_SNIPPET_LENGTH) return clean;
return clean.slice(0, WEEKLY_TRENDS_SNIPPET_LENGTH).trimEnd() + '…';
}
function formatDateRange(): string {
const end = new Date();
const start = new Date();
start.setDate(start.getDate() - 7);
const fmt = (d: Date) =>
d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
return `${fmt(start)} ${fmt(end)}, ${end.getFullYear()}`;
}
function formatOutput(
clusters: ClusterResult[],
totalPosts: number
): string {
if (clusters.length === 0) {
return '*No significant trends detected this week.*';
}
const lines: string[] = [];
lines.push(
`## 📊 Weekly AI Trend Analysis — ${formatDateRange()}\n`
);
for (let i = 0; i < clusters.length; i++) {
const cluster = clusters[i];
const avgScore = Math.round(
cluster.posts.reduce((s, p) => s + p.score, 0) /
cluster.posts.length
);
const topPosts = [...cluster.posts]
.sort((a, b) => b.score - a.score)
.slice(0, 3);
lines.push(`### ${i + 1}. ${cluster.theme ?? 'Emerging Topic'}`);
if (cluster.description) {
lines.push(`> ${cluster.description}\n`);
}
lines.push(
`📝 ${cluster.posts.length} posts | ` +
`${avgScore} avg score\n`
);
for (const post of topPosts) {
lines.push(
`- [${post.title.slice(0, 80)}](${post.url}) ` +
`(r/${post.subreddit}, ⬆${post.score})`
);
}
lines.push('');
}
lines.push('---');
lines.push(
`*${totalPosts} posts analyzed across ` +
`${WEEKLY_TRENDS_SUBREDDITS.length} subreddits · ` +
`${clusters.length} trend clusters found*`
);
return lines.join('\n');
}
// --- Entry ---
export async function run(): Promise<string> {
log('[weekly-trends] Starting weekly trend analysis...');
// 1. Scrape
log('[weekly-trends] Scraping top posts...');
const posts = await scrapeAll();
log(`[weekly-trends] ${posts.length} total posts scraped`);
if (posts.length === 0) {
return '*No posts found for trend analysis.*';
}
// 2. Embed
log('[weekly-trends] Embedding posts...');
const embedded = await embedPosts(posts);
log(`[weekly-trends] ${embedded.length} posts embedded`);
if (embedded.length < WEEKLY_TRENDS_MIN_CLUSTER_SIZE) {
return '*Not enough posts to analyze trends.*';
}
// 3. Store in Qdrant
log('[weekly-trends] Storing in Qdrant...');
await createCollection();
await storePoints(embedded);
// 4. Cluster
log('[weekly-trends] Clustering...');
const clusters = clusterPosts(embedded);
log(`[weekly-trends] ${clusters.length} clusters found`);
// 5. Summarize
log('[weekly-trends] Summarizing clusters...');
const results: ClusterResult[] = [];
for (const cluster of clusters) {
const { theme, description } = await summarizeCluster(cluster);
results.push({ posts: cluster, theme, description });
await sleep(WEEKLY_TRENDS_EMBED_DELAY_MS);
}
// 6. Format
const output = formatOutput(results, posts.length);
// 7. Cleanup
log('[weekly-trends] Cleaning up Qdrant collection...');
await deleteCollection();
log('[weekly-trends] Done.');
return output;
}
if (import.meta.main) {
console.log(await run());
}

View File

@ -3,6 +3,7 @@ import { run as trending } from './feeds/github-trending';
import { run as newRepos } from './feeds/new-ai-repos';
import { run as claudeReleases } from './feeds/claude-code-releases';
import { run as twitter } from './feeds/twitter-ai';
import { run as weeklyTrends } from './feeds/weekly-trends';
import { log } from './utils';
const COMMANDS: Record<string, () => Promise<string>> = {
@ -11,6 +12,7 @@ const COMMANDS: Record<string, () => Promise<string>> = {
'new-repos': newRepos,
'claude-releases': claudeReleases,
twitter,
'weekly-trends': weeklyTrends,
};
async function main() {
@ -18,8 +20,8 @@ async function main() {
if (!cmd || cmd === '--help' || cmd === '-h') {
console.error(
'Usage: bun run feed ' +
'<reddit|trending|new-repos|claude-releases|twitter|all>'
'Usage: bun run feed <reddit|trending|' +
'new-repos|claude-releases|twitter|weekly-trends|all>'
);
process.exit(1);
}

99
src/post.ts Normal file
View File

@ -0,0 +1,99 @@
/**
* Runs a feed and posts output directly to Discord via webhook.
* Usage: bun run src/post.ts <reddit|trending|new-repos|claude-releases|twitter|all>
* Zero LLM tokens - just fetch + format + post.
*/
import { run as reddit } from './feeds/reddit-digest';
import { run as trending } from './feeds/github-trending';
import { run as newRepos } from './feeds/new-ai-repos';
import { run as claudeReleases } from './feeds/claude-code-releases';
import { run as twitter } from './feeds/twitter-ai';
import { run as weeklyTrends } from './feeds/weekly-trends';
const FEEDS: Record<string, { run: () => Promise<string>; webhookEnv: string }> = {
reddit: { run: reddit, webhookEnv: 'WEBHOOK_REDDIT' },
trending: { run: trending, webhookEnv: 'WEBHOOK_TRENDING' },
'new-repos': { run: newRepos, webhookEnv: 'WEBHOOK_NEW_REPOS' },
'claude-releases': { run: claudeReleases, webhookEnv: 'WEBHOOK_CLAUDE_RELEASES' },
twitter: { run: twitter, webhookEnv: 'WEBHOOK_TWITTER' },
'weekly-trends': { run: weeklyTrends, webhookEnv: 'WEBHOOK_WEEKLY_TRENDS' },
};
async function postToWebhook(webhookUrl: string, content: string) {
// Discord max message length is 2000
const chunks: string[] = [];
const lines = content.split('\n');
let current = '';
for (const line of lines) {
if ((current + '\n' + line).length > 1900) {
if (current) chunks.push(current);
current = line;
} else {
current = current ? current + '\n' + line : line;
}
}
if (current) chunks.push(current);
for (const chunk of chunks) {
const res = await fetch(webhookUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ content: chunk }),
});
if (!res.ok) {
console.error(`Webhook post failed: ${res.status} ${await res.text()}`);
}
// Rate limit safety
await new Promise(r => setTimeout(r, 500));
}
}
async function runFeed(name: string) {
const feed = FEEDS[name];
if (!feed) {
console.error(`Unknown feed: ${name}`);
return;
}
const webhookUrl = process.env[feed.webhookEnv];
if (!webhookUrl) {
console.error(`Missing env: ${feed.webhookEnv}`);
return;
}
console.error(`Running ${name}...`);
const output = await feed.run();
if (!output) {
console.error(`${name}: no output (nothing new)`);
return;
}
await postToWebhook(webhookUrl, output);
console.error(`${name}: posted to Discord`);
}
async function main() {
const cmd = process.argv[2];
if (!cmd || cmd === '--help') {
console.error('Usage: bun run src/post.ts <reddit|trending|new-repos|claude-releases|twitter|weekly-trends|all>');
process.exit(1);
}
if (cmd === 'all') {
for (const name of Object.keys(FEEDS)) {
await runFeed(name);
}
return;
}
await runFeed(cmd);
}
main().catch(e => {
console.error(e);
process.exit(1);
});