connectd/scoutd/discord.py

324 lines
9.3 KiB
Python
Raw Permalink Normal View History

"""
scoutd/discord.py - discord discovery
discord requires a bot token to read messages.
target servers: programming help, career transition, indie hackers, etc.
SETUP:
1. create discord app at discord.com/developers
2. add bot, get token
3. join target servers with bot
4. set DISCORD_BOT_TOKEN env var
"""
import requests
import json
import time
import os
from datetime import datetime
from pathlib import Path
from .signals import analyze_text
from .lost import (
analyze_social_for_lost_signals,
classify_user,
)
DISCORD_BOT_TOKEN = os.environ.get('DISCORD_BOT_TOKEN', '')
DISCORD_API = 'https://discord.com/api/v10'
# default server IDs - values-aligned communities
# bot must be invited to these servers to scout them
# invite links for reference (use numeric IDs below):
# - self-hosted: discord.gg/self-hosted
# - foss-dev: discord.gg/foss-developers-group
# - grapheneos: discord.gg/grapheneos
# - queer-coded: discord.me/queer-coded
# - homelab: discord.gg/homelab
# - esphome: discord.gg/n9sdw7pnsn
# - home-assistant: discord.gg/home-assistant
# - linuxserver: discord.gg/linuxserver
# - proxmox-scripts: discord.gg/jsYVk5JBxq
DEFAULT_SERVERS = [
# self-hosted / foss / privacy
'693469700109369394', # self-hosted (selfhosted.show)
'920089648842293248', # foss developers group
'1176414688112820234', # grapheneos
# queer tech
'925804557001437184', # queer coded
# home automation / homelab
# note: these are large servers, bot needs to be invited
# '330944238910963714', # home assistant (150k+ members)
# '429907082951524364', # esphome (35k members)
# '478094546522079232', # homelab (35k members)
# '354974912613449730', # linuxserver.io (41k members)
]
# merge env var servers with defaults
_env_servers = os.environ.get('DISCORD_TARGET_SERVERS', '').split(',')
_env_servers = [s.strip() for s in _env_servers if s.strip()]
TARGET_SERVERS = list(set(DEFAULT_SERVERS + _env_servers))
# channels to focus on (keywords in channel name)
TARGET_CHANNEL_KEYWORDS = [
'help', 'career', 'jobs', 'learning', 'beginner',
'general', 'introductions', 'showcase', 'projects',
]
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'discord'
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def get_headers():
"""get discord api headers"""
if not DISCORD_BOT_TOKEN:
return None
return {
'Authorization': f'Bot {DISCORD_BOT_TOKEN}',
'Content-Type': 'application/json',
}
def get_guild_channels(guild_id):
"""get channels in a guild"""
headers = get_headers()
if not headers:
return []
try:
resp = requests.get(
f'{DISCORD_API}/guilds/{guild_id}/channels',
headers=headers,
timeout=30
)
if resp.status_code == 200:
return resp.json()
return []
except Exception:
return []
def get_channel_messages(channel_id, limit=100):
"""get recent messages from a channel"""
headers = get_headers()
if not headers:
return []
try:
resp = requests.get(
f'{DISCORD_API}/channels/{channel_id}/messages',
headers=headers,
params={'limit': limit},
timeout=30
)
if resp.status_code == 200:
return resp.json()
return []
except Exception:
return []
def get_user_info(user_id):
"""get discord user info"""
headers = get_headers()
if not headers:
return None
try:
resp = requests.get(
f'{DISCORD_API}/users/{user_id}',
headers=headers,
timeout=30
)
if resp.status_code == 200:
return resp.json()
return None
except Exception:
return None
def analyze_discord_user(user_data, messages=None):
"""analyze a discord user for values alignment and lost signals"""
username = user_data.get('username', '')
display_name = user_data.get('global_name') or username
user_id = user_data.get('id')
# analyze messages
all_signals = []
all_text = []
total_score = 0
if messages:
for msg in messages[:20]:
content = msg.get('content', '')
if not content or len(content) < 20:
continue
all_text.append(content)
score, signals, _ = analyze_text(content)
all_signals.extend(signals)
total_score += score
all_signals = list(set(all_signals))
# lost builder detection
profile_for_lost = {
'bio': '',
'message_count': len(messages) if messages else 0,
}
posts_for_lost = [{'text': t} for t in all_text]
lost_signals, lost_weight = analyze_social_for_lost_signals(profile_for_lost, posts_for_lost)
lost_potential_score = lost_weight
user_type = classify_user(lost_potential_score, 50, total_score)
return {
'platform': 'discord',
'username': username,
'url': f"https://discord.com/users/{user_id}",
'name': display_name,
'bio': '',
'location': None,
'score': total_score,
'confidence': min(0.8, 0.2 + len(all_signals) * 0.1),
'signals': all_signals,
'negative_signals': [],
'reasons': [],
'contact': {'discord': f"{username}#{user_data.get('discriminator', '0')}"},
'extra': {
'user_id': user_id,
'message_count': len(messages) if messages else 0,
},
'lost_potential_score': lost_potential_score,
'lost_signals': lost_signals,
'user_type': user_type,
}
def scrape_discord(db, limit_per_channel=50):
"""scrape discord servers for aligned builders"""
if not DISCORD_BOT_TOKEN:
print("discord: DISCORD_BOT_TOKEN not set, skipping")
return 0
if not TARGET_SERVERS or TARGET_SERVERS == ['']:
print("discord: DISCORD_TARGET_SERVERS not set, skipping")
return 0
print("scouting discord...")
found = 0
lost_found = 0
seen_users = set()
for guild_id in TARGET_SERVERS:
if not guild_id:
continue
guild_id = guild_id.strip()
channels = get_guild_channels(guild_id)
if not channels:
print(f" guild {guild_id}: no access or no channels")
continue
# filter to relevant channels
target_channels = []
for ch in channels:
if ch.get('type') != 0: # text channels only
continue
name = ch.get('name', '').lower()
if any(kw in name for kw in TARGET_CHANNEL_KEYWORDS):
target_channels.append(ch)
print(f" guild {guild_id}: {len(target_channels)} relevant channels")
for channel in target_channels[:5]: # limit channels per server
messages = get_channel_messages(channel['id'], limit=limit_per_channel)
if not messages:
continue
# group messages by user
user_messages = {}
for msg in messages:
author = msg.get('author', {})
if author.get('bot'):
continue
user_id = author.get('id')
if not user_id or user_id in seen_users:
continue
if user_id not in user_messages:
user_messages[user_id] = {'user': author, 'messages': []}
user_messages[user_id]['messages'].append(msg)
# analyze each user
for user_id, data in user_messages.items():
if user_id in seen_users:
continue
seen_users.add(user_id)
result = analyze_discord_user(data['user'], data['messages'])
if not result:
continue
if result['score'] >= 20 or result.get('lost_potential_score', 0) >= 30:
db.save_human(result)
found += 1
if result.get('user_type') in ['lost', 'both']:
lost_found += 1
time.sleep(1) # rate limit between channels
time.sleep(2) # between guilds
print(f"discord: found {found} humans ({lost_found} lost builders)")
return found
def send_discord_dm(user_id, message, dry_run=False):
"""send a DM to a discord user"""
if not DISCORD_BOT_TOKEN:
return False, "DISCORD_BOT_TOKEN not set"
if dry_run:
print(f" [dry run] would DM discord user {user_id}")
return True, "dry run"
headers = get_headers()
try:
# create DM channel
dm_resp = requests.post(
f'{DISCORD_API}/users/@me/channels',
headers=headers,
json={'recipient_id': user_id},
timeout=30
)
if dm_resp.status_code not in [200, 201]:
return False, f"couldn't create DM channel: {dm_resp.status_code}"
channel_id = dm_resp.json().get('id')
# send message
msg_resp = requests.post(
f'{DISCORD_API}/channels/{channel_id}/messages',
headers=headers,
json={'content': message},
timeout=30
)
if msg_resp.status_code in [200, 201]:
return True, f"sent to {user_id}"
else:
return False, f"send failed: {msg_resp.status_code}"
except Exception as e:
return False, str(e)