discord-tldr/standalone/discord_tldr.py

300 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Discord TLDR Bot
Summarizes Discord server conversations and posts to a #tldr channel.
Standalone version - no Clawdbot required.
Uses Discord.py for Discord API and Anthropic/OpenAI for summarization.
"""
import os
import asyncio
import logging
from datetime import datetime, timedelta, timezone
from typing import Optional
import discord
from discord.ext import commands, tasks
# Optional: use anthropic or openai for summarization
try:
import anthropic
HAS_ANTHROPIC = True
except ImportError:
HAS_ANTHROPIC = False
try:
import openai
HAS_OPENAI = True
except ImportError:
HAS_OPENAI = False
# Configuration (override with environment variables)
DISCORD_TOKEN = os.getenv("DISCORD_TOKEN")
GUILD_ID = int(os.getenv("DISCORD_GUILD_ID", "0"))
TLDR_CHANNEL_ID = int(os.getenv("DISCORD_TLDR_CHANNEL_ID", "0"))
# LLM Configuration
LLM_PROVIDER = os.getenv("LLM_PROVIDER", "anthropic") # "anthropic" or "openai"
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Summary schedule (hours in 24h format, comma-separated)
SUMMARY_HOURS = [int(h) for h in os.getenv("SUMMARY_HOURS", "6,13,22").split(",")]
# Channels to exclude (comma-separated channel IDs)
EXCLUDE_CHANNELS = [int(c) for c in os.getenv("EXCLUDE_CHANNELS", "").split(",") if c]
# How far back to look for messages (hours)
LOOKBACK_HOURS = int(os.getenv("LOOKBACK_HOURS", "8"))
# Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("discord-tldr")
# Bot setup
intents = discord.Intents.default()
intents.message_content = True
intents.guilds = True
bot = commands.Bot(command_prefix="!", intents=intents)
class MessageCollector:
"""Collects messages from Discord channels."""
def __init__(self, guild: discord.Guild, exclude_channels: list[int]):
self.guild = guild
self.exclude_channels = exclude_channels
async def collect_messages(self, since: datetime) -> dict[str, list[dict]]:
"""Collect messages from all text channels since the given time."""
messages_by_channel = {}
for channel in self.guild.text_channels:
if channel.id in self.exclude_channels:
continue
if channel.id == TLDR_CHANNEL_ID:
continue # Don't summarize the tldr channel itself
try:
messages = []
async for msg in channel.history(after=since, limit=500):
if msg.author.bot:
continue
messages.append({
"author": msg.author.display_name,
"content": msg.content,
"timestamp": msg.created_at.isoformat(),
"attachments": len(msg.attachments),
"reactions": sum(r.count for r in msg.reactions) if msg.reactions else 0
})
if messages:
messages_by_channel[channel.name] = list(reversed(messages))
logger.info(f"Collected {len(messages)} messages from #{channel.name}")
except discord.Forbidden:
logger.warning(f"No access to #{channel.name}")
except Exception as e:
logger.error(f"Error collecting from #{channel.name}: {e}")
return messages_by_channel
class Summarizer:
"""Summarizes messages using an LLM."""
def __init__(self, provider: str = "anthropic"):
self.provider = provider
if provider == "anthropic" and HAS_ANTHROPIC:
self.client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
elif provider == "openai" and HAS_OPENAI:
self.client = openai.OpenAI(api_key=OPENAI_API_KEY)
else:
self.client = None
logger.warning(f"LLM provider '{provider}' not available, using basic summarization")
def _format_messages_for_prompt(self, messages_by_channel: dict) -> str:
"""Format collected messages into a prompt-friendly string."""
parts = []
for channel, messages in messages_by_channel.items():
parts.append(f"\n## #{channel}")
for msg in messages:
parts.append(f"[{msg['author']}]: {msg['content']}")
return "\n".join(parts)
async def summarize(self, messages_by_channel: dict) -> str:
"""Generate a summary of the messages."""
if not messages_by_channel:
return "No new messages to summarize."
formatted = self._format_messages_for_prompt(messages_by_channel)
prompt = f"""Summarize the following Discord server conversations into a concise TLDR digest.
Format your response as:
1. A brief overview (1-2 sentences)
2. Bullet points organized by channel, highlighting:
- Key discussions and decisions
- Important ideas or proposals
- Action items or next steps
- Notable moments or highlights
Keep it scannable and useful for someone catching up.
---
MESSAGES:
{formatted}
---
Generate the TLDR summary:"""
if self.provider == "anthropic" and self.client:
response = self.client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1500,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
elif self.provider == "openai" and self.client:
response = self.client.chat.completions.create(
model="gpt-4o",
max_tokens=1500,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
else:
# Basic fallback: just list message counts
lines = ["📋 **Activity Summary**\n"]
for channel, messages in messages_by_channel.items():
lines.append(f"• **#{channel}**: {len(messages)} messages")
lines.append("\n*Install anthropic or openai package for AI summaries*")
return "\n".join(lines)
class TLDRBot:
"""Main bot class that orchestrates collection and summarization."""
def __init__(self):
self.collector = None
self.summarizer = Summarizer(LLM_PROVIDER)
self.last_summary_time = None
async def generate_and_post_summary(self):
"""Generate a summary and post it to the TLDR channel."""
guild = bot.get_guild(GUILD_ID)
if not guild:
logger.error(f"Guild {GUILD_ID} not found")
return
tldr_channel = guild.get_channel(TLDR_CHANNEL_ID)
if not tldr_channel:
logger.error(f"TLDR channel {TLDR_CHANNEL_ID} not found")
return
# Determine lookback time
if self.last_summary_time:
since = self.last_summary_time
else:
since = datetime.now(timezone.utc) - timedelta(hours=LOOKBACK_HOURS)
# Collect messages
self.collector = MessageCollector(guild, EXCLUDE_CHANNELS)
messages = await self.collector.collect_messages(since)
if not messages:
logger.info("No new messages to summarize")
return
# Generate summary
summary = await self.summarizer.summarize(messages)
# Format and post
now = datetime.now()
header = f"📋 **TLDR Summary** ({now.strftime('%b %d, %Y - %I:%M %p')})\n\n"
# Discord has a 2000 char limit
full_message = header + summary
if len(full_message) > 2000:
# Split into multiple messages
chunks = [full_message[i:i+1990] for i in range(0, len(full_message), 1990)]
for chunk in chunks:
await tldr_channel.send(chunk)
else:
await tldr_channel.send(full_message)
self.last_summary_time = datetime.now(timezone.utc)
logger.info("Posted TLDR summary")
tldr_bot = TLDRBot()
@bot.event
async def on_ready():
logger.info(f"Logged in as {bot.user}")
check_summary_time.start()
@tasks.loop(minutes=1)
async def check_summary_time():
"""Check if it's time to post a summary."""
now = datetime.now()
if now.hour in SUMMARY_HOURS and now.minute == 0:
await tldr_bot.generate_and_post_summary()
@bot.command(name="tldr")
async def manual_tldr(ctx, hours: int = None):
"""Manually trigger a TLDR summary. Usage: !tldr [hours]"""
if hours:
tldr_bot.last_summary_time = datetime.now(timezone.utc) - timedelta(hours=hours)
await ctx.send("Generating TLDR summary...")
await tldr_bot.generate_and_post_summary()
@bot.command(name="tldr-status")
async def tldr_status(ctx):
"""Check TLDR bot status."""
last = tldr_bot.last_summary_time
last_str = last.strftime('%Y-%m-%d %H:%M UTC') if last else "Never"
schedule = ", ".join(f"{h}:00" for h in SUMMARY_HOURS)
await ctx.send(
f"**TLDR Bot Status**\n"
f"• Last summary: {last_str}\n"
f"• Schedule: {schedule}\n"
f"• Lookback: {LOOKBACK_HOURS} hours\n"
f"• LLM Provider: {LLM_PROVIDER}\n"
f"• Excluded channels: {len(EXCLUDE_CHANNELS)}"
)
def main():
if not DISCORD_TOKEN:
print("Error: DISCORD_TOKEN environment variable not set")
print("\nRequired environment variables:")
print(" DISCORD_TOKEN - Your Discord bot token")
print(" DISCORD_GUILD_ID - Your server's guild ID")
print(" DISCORD_TLDR_CHANNEL_ID - Channel ID for posting summaries")
print("\nOptional:")
print(" LLM_PROVIDER - 'anthropic' or 'openai' (default: anthropic)")
print(" ANTHROPIC_API_KEY - Anthropic API key (if using Claude)")
print(" OPENAI_API_KEY - OpenAI API key (if using GPT)")
print(" SUMMARY_HOURS - Comma-separated hours (default: 6,13,22)")
print(" LOOKBACK_HOURS - Hours to look back (default: 8)")
print(" EXCLUDE_CHANNELS - Comma-separated channel IDs to skip")
return
if not GUILD_ID or not TLDR_CHANNEL_ID:
print("Error: DISCORD_GUILD_ID and DISCORD_TLDR_CHANNEL_ID must be set")
return
bot.run(DISCORD_TOKEN)
if __name__ == "__main__":
main()