add forge support, central coordination, lost builder detection

- central API client for distributed instance coordination - forge scraper: gitea, forgejo, gogs, gitlab, sourcehut, codeberg - forge issue delivery as outreach method - usage-based contact method ranking with fallback chain - lost builder detection and targeted outreach - reddit and lobsters handle discovery - deep scrape for handle/email discovery from profiles
2026-04-14 11:37:42 +00:00 · 2025-12-16 21:30:05 +00:00 · 2025-12-16 21:30:05 +00:00 · f33409ceda
commit f33409ceda
parent 99946bfef5
15 changed files with 2102 additions and 837 deletions
--- a/api.py
+++ b/api.py
@ -116,6 +116,7 @@ DASHBOARD_HTML = """<!DOCTYPE html>
    <div id="queue" class="pnl"></div>
    <div id="sent" class="pnl"></div>
    <div id="failed" class="pnl"></div>
    <div id="lost" class="pnl"></div>
 <script>
 var currentTab = 'host';
@ -130,7 +131,8 @@ function initTabs() {
        {id: 'host', label: 'you'},
        {id: 'queue', label: 'queue'},
        {id: 'sent', label: 'sent'},
-        {id: 'failed', label: 'failed'}
+        {id: 'failed', label: 'failed'},
        {id: 'lost', label: 'lost builders'}
    ];
    tabs.forEach(function(t) {
@ -319,6 +321,31 @@ async function loadFailed() {
    $('failed').innerHTML = html;
 }
 async function loadLost() {
    var res = await fetch("/api/lost_builders");
    var data = await res.json();
    var html = "<h2>lost builders (" + (data.total || 0) + ")</h2>";
    html += "<p style=\"color:#c792ea;font-size:0.8em;margin-bottom:10px\">people who need to see that someone like them made it</p>";
    if (!data.matches || data.matches.length === 0) {
        html += "<div class=\"meta\">no lost builders found</div>";
    }
    for (var i = 0; i < (data.matches || []).length; i++) {
        var m = data.matches[i];
        html += "<div class=\"card\">";
        html += "<div class=\"card-hdr\"><span class=\"to\">LOST: " + m.lost_user + "</span><span class=\"score\">" + m.match_score + "</span></div>";
        html += "<div class=\"meta\">lost: " + m.lost_score + " | values: " + m.values_score + "</div>";
        html += "<div class=\"meta\" style=\"color:#0f8\">BUILDER: " + m.builder + " (" + m.builder_platform + ")</div>";
        html += "<div class=\"meta\">score: " + m.builder_score + " | repos: " + m.builder_repos + " | stars: " + m.builder_stars + "</div>";
        html += "<div class=\"meta\">shared: " + (m.shared || []).join(", ") + "</div>";
        html += "</div>";
    }
    $("lost").innerHTML = html;
 }
 function load() {
    loadStats();
@ -326,6 +353,7 @@ function load() {
    loadQueue();
    loadSent();
    loadFailed();
    loadLost();
 }
 document.addEventListener('click', function(e) {
@ -438,6 +466,8 @@ class APIHandler(BaseHTTPRequestHandler):
            self._handle_top_humans()
        elif path == '/api/user':
            self._handle_user()
        elif path == '/api/lost_builders':
            self._handle_lost_builders()
        else:
            self._send_json({'error': 'not found'}, 404)
    def _handle_favicon(self):
@ -1171,6 +1201,44 @@ class APIHandler(BaseHTTPRequestHandler):
            self._send_json({'error': str(e)}, 500)
    def _handle_lost_builders(self):
        """return lost builders with their inspiring matches"""
        try:
            from matchd.lost import find_matches_for_lost_builders
            db = Database()
            matches, error = find_matches_for_lost_builders(db, min_lost_score=30, min_values_score=15, limit=50)
            result = {
                'total': len(matches) if matches else 0,
                'error': error,
                'matches': []
            }
            if matches:
                for m in matches:
                    lost = m.get('lost_user', {})
                    builder = m.get('inspiring_builder', {})
                    result['matches'].append({
                        'lost_user': lost.get('username'),
                        'lost_platform': lost.get('platform'),
                        'lost_score': lost.get('lost_potential_score', 0),
                        'values_score': lost.get('score', 0),
                        'builder': builder.get('username'),
                        'builder_platform': builder.get('platform'),
                        'builder_score': builder.get('score', 0),
                        'builder_repos': m.get('builder_repos', 0),
                        'builder_stars': m.get('builder_stars', 0),
                        'match_score': m.get('match_score', 0),
                        'shared': m.get('shared_interests', [])[:5],
                    })
            db.close()
            self._send_json(result)
        except Exception as e:
            self._send_json({'error': str(e)}, 500)
 def run_api_server():
    """run the API server in a thread"""
    server = HTTPServer(('0.0.0.0', API_PORT), APIHandler)
--- a/central_client.py
+++ b/central_client.py
@ -0,0 +1,183 @@
 """
 connectd/central_client.py - client for connectd-central API
 provides similar interface to local Database class but uses remote API.
 allows distributed instances to share data and coordinate outreach.
 """
 import os
 import json
 import requests
 from typing import Optional, List, Dict, Any, Tuple
 from datetime import datetime
 CENTRAL_API = os.environ.get('CONNECTD_CENTRAL_API', '')
 API_KEY = os.environ.get('CONNECTD_API_KEY', '')
 INSTANCE_ID = os.environ.get('CONNECTD_INSTANCE_ID', 'default')
 class CentralClient:
    """client for connectd-central API"""
    def __init__(self, api_url: str = None, api_key: str = None, instance_id: str = None):
        self.api_url = api_url or CENTRAL_API
        self.api_key = api_key or API_KEY
        self.instance_id = instance_id or INSTANCE_ID
        self.headers = {
            'X-API-Key': self.api_key,
            'Content-Type': 'application/json'
        }
        if not self.api_key:
            raise ValueError('CONNECTD_API_KEY environment variable required')
    def _get(self, endpoint: str, params: dict = None) -> dict:
        resp = requests.get(f'{self.api_url}{endpoint}', headers=self.headers, params=params)
        resp.raise_for_status()
        return resp.json()
    def _post(self, endpoint: str, data: dict) -> dict:
        resp = requests.post(f'{self.api_url}{endpoint}', headers=self.headers, json=data)
        resp.raise_for_status()
        return resp.json()
    # === HUMANS ===
    def get_human(self, human_id: int) -> Optional[dict]:
        try:
            return self._get(f'/humans/{human_id}')
        except:
            return None
    def get_humans(self, platform: str = None, user_type: str = None, 
                   min_score: float = 0, limit: int = 100, offset: int = 0) -> List[dict]:
        params = {'min_score': min_score, 'limit': limit, 'offset': offset}
        if platform:
            params['platform'] = platform
        if user_type:
            params['user_type'] = user_type
        result = self._get('/humans', params)
        return result.get('humans', [])
    def get_all_humans(self, min_score: float = 0, limit: int = 100000) -> List[dict]:
        """get all humans (for matching)"""
        return self.get_humans(min_score=min_score, limit=limit)
    def get_lost_builders(self, min_score: float = 30, limit: int = 100) -> List[dict]:
        """get lost builders for outreach"""
        return self.get_humans(user_type='lost', min_score=min_score, limit=limit)
    def get_builders(self, min_score: float = 50, limit: int = 100) -> List[dict]:
        """get active builders"""
        return self.get_humans(user_type='builder', min_score=min_score, limit=limit)
    def upsert_human(self, human: dict) -> int:
        """create or update human, returns id"""
        result = self._post('/humans', human)
        return result.get('id')
    def upsert_humans_bulk(self, humans: List[dict]) -> Tuple[int, int]:
        """bulk upsert humans, returns (created, updated)"""
        result = self._post('/humans/bulk', humans)
        return result.get('created', 0), result.get('updated', 0)
    # === MATCHES ===
    def get_matches(self, min_score: float = 0, limit: int = 100, offset: int = 0) -> List[dict]:
        params = {'min_score': min_score, 'limit': limit, 'offset': offset}
        result = self._get('/matches', params)
        return result.get('matches', [])
    def create_match(self, human_a_id: int, human_b_id: int, 
                     overlap_score: float, overlap_reasons: str = None) -> int:
        """create match, returns id"""
        result = self._post('/matches', {
            'human_a_id': human_a_id,
            'human_b_id': human_b_id,
            'overlap_score': overlap_score,
            'overlap_reasons': overlap_reasons
        })
        return result.get('id')
    def create_matches_bulk(self, matches: List[dict]) -> int:
        """bulk create matches, returns count"""
        result = self._post('/matches/bulk', matches)
        return result.get('created', 0)
    # === OUTREACH COORDINATION ===
    def get_pending_outreach(self, outreach_type: str = None, limit: int = 50) -> List[dict]:
        """get pending outreach that hasn't been claimed"""
        params = {'limit': limit}
        if outreach_type:
            params['outreach_type'] = outreach_type
        result = self._get('/outreach/pending', params)
        return result.get('pending', [])
    def claim_outreach(self, human_id: int, match_id: int = None, 
                       outreach_type: str = 'intro') -> Optional[int]:
        """claim outreach for a human, returns outreach_id or None if already claimed"""
        try:
            result = self._post('/outreach/claim', {
                'human_id': human_id,
                'match_id': match_id,
                'outreach_type': outreach_type
            })
            return result.get('outreach_id')
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 409:
                return None  # already claimed by another instance
            raise
    def complete_outreach(self, outreach_id: int, status: str,
                          sent_via: str = None, draft: str = None, error: str = None):
        """mark outreach as complete"""
        self._post('/outreach/complete', {
            'outreach_id': outreach_id,
            'status': status,
            'sent_via': sent_via,
            'draft': draft,
            'error': error
        })
    def get_outreach_history(self, status: str = None, limit: int = 100) -> List[dict]:
        params = {'limit': limit}
        if status:
            params['status'] = status
        result = self._get('/outreach/history', params)
        return result.get('history', [])
    def already_contacted(self, human_id: int) -> bool:
        """check if human has been contacted"""
        history = self._get('/outreach/history', {'limit': 10000})
        sent = history.get('history', [])
        return any(h['human_id'] == human_id and h['status'] == 'sent' for h in sent)
    # === STATS ===
    def get_stats(self) -> dict:
        return self._get('/stats')
    # === INSTANCE MANAGEMENT ===
    def register_instance(self, name: str, host: str):
        """register this instance with central"""
        self._post(f'/instances/register?name={name}&host={host}', {})
    def get_instances(self) -> List[dict]:
        result = self._get('/instances')
        return result.get('instances', [])
    # === HEALTH ===
    def health_check(self) -> bool:
        try:
            result = self._get('/health')
            return result.get('status') == 'ok'
        except:
            return False
 # convenience function
 def get_client() -> CentralClient:
    return CentralClient()
--- a/config.py
+++ b/config.py
@ -22,7 +22,7 @@ CACHE_DIR.mkdir(exist_ok=True)
 SCOUT_INTERVAL = 3600 * 4       # full scout every 4 hours
 MATCH_INTERVAL = 3600           # check matches every hour
 INTRO_INTERVAL = 1800       # send intros every 2 hours
-MAX_INTROS_PER_DAY = 250         # rate limit builder-to-builder outreach
+MAX_INTROS_PER_DAY = 1000         # rate limit builder-to-builder outreach
 # === MATCHING CONFIG ===
@ -42,7 +42,7 @@ LOST_CONFIG = {
    # outreach settings
    'enabled': True,
-    'max_per_day': 20,               # lower volume, higher care
+    'max_per_day': 100,               # lower volume, higher care
    'require_review': False,        # fully autonomous
    'cooldown_days': 90,            # don't spam struggling people
@ -70,6 +70,47 @@ GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
 GROQ_MODEL = os.environ.get('GROQ_MODEL', 'llama-3.3-70b-versatile')
 GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
 # === FORGE TOKENS ===
 # for creating issues on self-hosted git forges
 # each forge needs its own token from that instance
 #
 # CODEBERG: Settings -> Applications -> Generate Token (repo:write scope)
 # GITEA/FORGEJO: Settings -> Applications -> Generate Token
 # GITLAB: Settings -> Access Tokens -> Personal Access Token (api scope)
 # SOURCEHUT: Settings -> Personal Access Tokens (uses email instead)
 CODEBERG_TOKEN = os.environ.get('CODEBERG_TOKEN', '')
 GITEA_TOKENS = {}  # instance_url -> token, loaded from env
 GITLAB_TOKENS = {}  # instance_url -> token, loaded from env
 # parse GITEA_TOKENS from env
 # format: GITEA_TOKEN_192_168_1_8_3259=token -> http://192.168.1.8:3259
 # format: GITEA_TOKEN_codeberg_org=token -> https://codeberg.org
 def _parse_instance_url(env_key, prefix):
    """convert env key to instance URL"""
    raw = env_key.replace(prefix, '')
    parts = raw.split('_')
    # check if last part is a port number
    if parts[-1].isdigit() and len(parts[-1]) <= 5:
        port = parts[-1]
        host = '.'.join(parts[:-1])
        # local IPs use http
        if host.startswith('192.168.') or host.startswith('10.') or host == 'localhost':
            return f'http://{host}:{port}'
        return f'https://{host}:{port}'
    else:
        host = '.'.join(parts)
        return f'https://{host}'
 for key, value in os.environ.items():
    if key.startswith('GITEA_TOKEN_'):
        url = _parse_instance_url(key, 'GITEA_TOKEN_')
        GITEA_TOKENS[url] = value
    elif key.startswith('GITLAB_TOKEN_'):
        url = _parse_instance_url(key, 'GITLAB_TOKEN_')
        GITLAB_TOKENS[url] = value
 MASTODON_TOKEN = os.environ.get('MASTODON_TOKEN', '')
 MASTODON_INSTANCE = os.environ.get('MASTODON_INSTANCE', '')
--- a/daemon.py
+++ b/daemon.py
@ -12,6 +12,7 @@ runs continuously, respects rate limits, sends intros automatically
 import time
 import json
 import signal
 import os
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
@ -20,13 +21,14 @@ from db import Database
 from db.users import (init_users_table, get_priority_users, save_priority_match,
                      get_priority_user_matches, discover_host_user)
 from scoutd import scrape_github, scrape_reddit, scrape_mastodon, scrape_lobsters, scrape_lemmy, scrape_discord
-from config import HOST_USER, INTRO_INTERVAL, MAX_INTROS_PER_DAY, SCOUT_INTERVAL, MATCH_INTERVAL
+from scoutd.forges import scrape_all_forges
 from config import HOST_USER
 from scoutd.github import analyze_github_user, get_github_user
 from scoutd.signals import analyze_text
 from matchd.fingerprint import generate_fingerprint, fingerprint_similarity
 from matchd.overlap import find_overlap
 from matchd.lost import find_matches_for_lost_builders
-from introd.groq_draft import draft_intro_with_llm as draft_intro
+from introd.draft import draft_intro, summarize_human, summarize_overlap
 from introd.lost_intro import draft_lost_intro, get_lost_intro_config
 from introd.send import send_email
 from introd.deliver import deliver_intro, determine_best_contact
@ -34,7 +36,19 @@ from config import get_lost_config
 from api import start_api_thread, update_daemon_state
 # daemon config
 SCOUT_INTERVAL = 3600 * 4      # full scout every 4 hours
 MATCH_INTERVAL = 3600          # check matches every hour
 INTRO_INTERVAL = 3600 * 2      # send intros every 2 hours
 LOST_INTERVAL = 3600 * 6       # lost builder outreach every 6 hours (lower volume)
 from config import MAX_INTROS_PER_DAY
 # central coordination (optional - for distributed instances)
 try:
    from central_client import CentralClient
    CENTRAL_ENABLED = bool(os.environ.get('CONNECTD_API_KEY'))
 except ImportError:
    CENTRAL_ENABLED = False
    CentralClient = None  # from config.py
 MIN_OVERLAP_PRIORITY = 30      # min score for priority user matches
 MIN_OVERLAP_STRANGERS = 50     # higher bar for stranger intros
@ -43,6 +57,9 @@ class ConnectDaemon:
    def __init__(self, dry_run=False):
        self.db = Database()
        init_users_table(self.db.conn)
        purged = self.db.purge_disqualified()
        if any(purged.values()):
            self.log(f"purged disqualified: {purged}")
        self.running = True
        self.dry_run = dry_run
        self.started_at = datetime.now()
@ -52,6 +69,18 @@ class ConnectDaemon:
        self.last_lost = None
        self.intros_today = 0
        self.lost_intros_today = 0
        # central coordination
        self.central = None
        if CENTRAL_ENABLED:
            try:
                self.central = CentralClient()
                instance_id = os.environ.get('CONNECTD_INSTANCE_ID', 'unknown')
                self.central.register_instance(instance_id, os.environ.get('CONNECTD_INSTANCE_IP', 'unknown'))
                self.log(f"connected to central API as {instance_id}")
            except Exception as e:
                self.log(f"central API unavailable: {e}")
                self.central = None
        self.today = datetime.now().date()
        # handle shutdown gracefully
@ -108,6 +137,18 @@ class ConnectDaemon:
            self.today = datetime.now().date()
            self.intros_today = 0
            self.lost_intros_today = 0
        # central coordination
        self.central = None
        if CENTRAL_ENABLED:
            try:
                self.central = CentralClient()
                instance_id = os.environ.get('CONNECTD_INSTANCE_ID', 'unknown')
                self.central.register_instance(instance_id, os.environ.get('CONNECTD_INSTANCE_IP', 'unknown'))
                self.log(f"connected to central API as {instance_id}")
            except Exception as e:
                self.log(f"central API unavailable: {e}")
                self.central = None
            self.log("reset daily intro limits")
    def scout_cycle(self):
@ -126,6 +167,16 @@ class ConnectDaemon:
        try:
            scrape_mastodon(self.db, limit_per_instance=30)
            # scrape self-hosted git forges (highest signal)
            self.log("scraping self-hosted git forges...")
            try:
                forge_humans = scrape_all_forges(limit_per_instance=30)
                for h in forge_humans:
                    self.db.upsert_human(h)
                self.log(f"  forges: {len(forge_humans)} humans")
            except Exception as e:
                self.log(f"  forge scrape error: {e}")
        except Exception as e:
            self.log(f"mastodon scout error: {e}")
@ -157,7 +208,7 @@ class ConnectDaemon:
        self.log(f"matching for {len(priority_users)} priority users...")
-        humans = self.db.get_all_humans(min_score=20, limit=500)
+        humans = self.db.get_all_humans(min_score=20)
        for puser in priority_users:
            # build priority user's fingerprint from their linked profiles
@ -230,7 +281,7 @@ class ConnectDaemon:
        """find matches between discovered humans (altruistic)"""
        self.log("matching strangers...")
-        humans = self.db.get_all_humans(min_score=40, limit=200)
+        humans = self.db.get_all_humans(min_score=40)
        if len(humans) < 2:
            return
@ -256,7 +307,7 @@ class ConnectDaemon:
            overlap = find_overlap(human_a, human_b, fp_a, fp_b)
-            if overlap['overlap_score'] >= MIN_OVERLAP_STRANGERS:
+            if overlap and overlap["overlap_score"] >= MIN_OVERLAP_STRANGERS:
                # save match
                self.db.save_match(human_a['id'], human_b['id'], overlap)
                matches_found += 1
@ -266,6 +317,37 @@ class ConnectDaemon:
        self.last_match = datetime.now()
    def claim_from_central(self, human_id, match_id=None, outreach_type='intro'):
        """claim outreach from central - returns outreach_id or None if already claimed"""
        if not self.central:
            return -1  # local mode, always allow
        try:
            return self.central.claim_outreach(human_id, match_id, outreach_type)
        except Exception as e:
            self.log(f"central claim error: {e}")
            return -1  # allow local if central fails
    def complete_on_central(self, outreach_id, status, sent_via=None, draft=None, error=None):
        """mark outreach complete on central"""
        if not self.central or outreach_id == -1:
            return
        try:
            self.central.complete_outreach(outreach_id, status, sent_via, draft, error)
        except Exception as e:
            self.log(f"central complete error: {e}")
    def sync_to_central(self, humans=None, matches=None):
        """sync local data to central"""
        if not self.central:
            return
        try:
            if humans:
                self.central.upsert_humans_bulk(humans)
            if matches:
                self.central.create_matches_bulk(matches)
        except Exception as e:
            self.log(f"central sync error: {e}")
    def send_stranger_intros(self):
        """send intros to connect strangers (or preview in dry-run mode)"""
        self.reset_daily_limits()
@ -331,29 +413,18 @@ class ConnectDaemon:
                'overlap_reasons': match['overlap_reasons'],
            }
-            # ACTIVITY-BASED CONTACT SELECTION
+            # try to send intro to person with email
            # use deliver_intro which calls determine_best_contact
            # picks method based on WHERE they're most active:
            # - mastodon DM if active on fediverse
            # - github issue if actively committing
            # - email ONLY as last resort
            for recipient, other in [(human_a, human_b), (human_b, human_a)]:
-                # draft intro using groq LLM
+                contact = recipient.get('contact', {})
-                # retry groq up to 3 times with 10s wait
+                if isinstance(contact, str):
-                intro_result, intro_error = None, None
+                    contact = json.loads(contact)
                for retry in range(3):
                    intro_result, intro_error = draft_intro(match_data, recipient='a' if recipient == human_a else 'b')
                    if not intro_error:
                        break
                    self.log(f"groq retry {retry+1}/3: {intro_error}")
                    import time
                    time.sleep(10)
-                if intro_error:
+                email = contact.get('email')
-                    self.log(f"failed to draft intro after retries: {intro_error}")
+                if not email:
                    continue
-                intro = {'draft': intro_result.get('draft', '')}
+
                # draft intro
                intro = draft_intro(match_data, recipient='a' if recipient == human_a else 'b')
                # parse overlap reasons for display
                reasons = match['overlap_reasons']
@ -361,13 +432,12 @@ class ConnectDaemon:
                    reasons = json.loads(reasons)
                reason_summary = ', '.join(reasons[:3]) if reasons else 'aligned values'
                # determine best contact method based on activity
                method, contact_info = determine_best_contact(recipient)
                if self.dry_run:
                    # print preview
                    print("\n" + "=" * 60)
                    print(f"TO: {recipient['username']} ({recipient['platform']})")
-                    print(f"METHOD: {method} -> {contact_info}")
+                    print(f"EMAIL: {email}")
                    print(f"SUBJECT: you might want to meet {other['username']}")
                    print(f"SCORE: {match['overlap_score']:.0f} ({reason_summary})")
                    print("-" * 60)
                    print("MESSAGE:")
@ -377,12 +447,23 @@ class ConnectDaemon:
                    print("=" * 60)
                    break
                else:
-                    # deliver via activity-based method selection
+                    # claim from central first
-                    success, error, delivery_method = deliver_intro(match_data, intro['draft'], intro.get('subject'))
+                    outreach_id = self.claim_from_central(recipient['id'], match['id'], 'intro')
                    if outreach_id is None:
                        self.log(f"skipping {recipient['username']} - already claimed by another instance")
                        continue
                    # actually send
                    success, error = send_email(
                        email,
                        f"connectd: you might want to meet {other['username']}",
                        intro['draft']
                    )
                    if success:
-                        self.log(f"sent intro to {recipient['username']} via {delivery_method}")
+                        self.log(f"sent intro to {recipient['username']} ({email})")
                        self.intros_today += 1
                        self.complete_on_central(outreach_id, 'sent', 'email', intro['draft'])
                        # mark match as intro_sent
                        c.execute('UPDATE matches SET status = "intro_sent" WHERE id = ?',
@ -390,7 +471,8 @@ class ConnectDaemon:
                        self.db.conn.commit()
                        break
                    else:
-                        self.log(f"failed to reach {recipient['username']} via {delivery_method}: {error}")
+                        self.log(f"failed to send to {email}: {error}")
                        self.complete_on_central(outreach_id, 'failed', error=error)
        self.last_intro = datetime.now()
@ -475,7 +557,7 @@ class ConnectDaemon:
                    'overlap_reasons': match.get('shared_interests', []),
                }
-                success, error, delivery_method = deliver_intro(match_data, draft, None)
+                success, error, delivery_method = deliver_intro(match_data, draft)
                if success:
                    self.log(f"sent lost builder intro to {lost_name} via {delivery_method}")
--- a/db_init.py
+++ b/db_init.py
@ -183,7 +183,7 @@ class Database:
        row = c.fetchone()
        return dict(row) if row else None
-    def get_all_humans(self, min_score=0, limit=1000):
+    def get_all_humans(self, min_score=0, limit=100000):
        """get all humans above score threshold"""
        c = self.conn.cursor()
        c.execute('''SELECT * FROM humans
@ -373,3 +373,64 @@ class Database:
    def close(self):
        self.conn.close()
    def purge_disqualified(self):
        """
        auto-cleanup: remove all matches/intros involving users with disqualifying signals
        DISQUALIFYING: maga, conspiracy, conservative, antivax, sovcit
        """
        c = self.conn.cursor()
        purged = {}
        # patterns to match disqualifying signals
        disq_patterns = ["maga", "conspiracy", "conservative", "antivax", "sovcit"]
        # build WHERE clause for negative_signals check
        neg_check = " OR ".join([f"negative_signals LIKE '%{p}%'" for p in disq_patterns])
        # 1. delete from intros where recipient is disqualified
        c.execute(f"""
            DELETE FROM intros WHERE recipient_human_id IN (
                SELECT id FROM humans WHERE {neg_check}
            )
        """)
        purged["intros"] = c.rowcount
        # 2. delete from priority_matches where matched_human is disqualified
        c.execute(f"""
            DELETE FROM priority_matches WHERE matched_human_id IN (
                SELECT id FROM humans WHERE {neg_check}
            )
        """)
        purged["priority_matches"] = c.rowcount
        # 3. delete from matches where either human is disqualified
        c.execute(f"""
            DELETE FROM matches WHERE 
                human_a_id IN (SELECT id FROM humans WHERE {neg_check})
                OR human_b_id IN (SELECT id FROM humans WHERE {neg_check})
        """)
        purged["matches"] = c.rowcount
        # 4. cleanup orphaned records (humans deleted but refs remain)
        c.execute("""
            DELETE FROM matches WHERE 
                NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = human_a_id)
                OR NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = human_b_id)
        """)
        purged["orphaned_matches"] = c.rowcount
        c.execute("""
            DELETE FROM priority_matches WHERE 
                NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = matched_human_id)
        """)
        purged["orphaned_priority"] = c.rowcount
        c.execute("""
            DELETE FROM intros WHERE 
                NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = recipient_human_id)
        """)
        purged["orphaned_intros"] = c.rowcount
        self.conn.commit()
        return purged
--- a/deliver.py
+++ b/deliver.py
@ -147,6 +147,87 @@ def create_github_issue(owner, repo, title, body, dry_run=False):
        return False, str(e)
 def create_forge_issue(platform_type, instance_url, owner, repo, title, body, dry_run=False):
    """
    create issue on self-hosted git forge.
    supports gitea/forgejo/gogs (same API) and gitlab.
    """
    from config import CODEBERG_TOKEN, GITEA_TOKENS, GITLAB_TOKENS
    if dry_run:
        print(f"  [dry run] would create issue on {platform_type}:{instance_url}/{owner}/{repo}")
        return True, None
    try:
        if platform_type in ('gitea', 'forgejo', 'gogs'):
            # get token for this instance
            token = None
            if 'codeberg.org' in instance_url:
                token = CODEBERG_TOKEN
            else:
                token = GITEA_TOKENS.get(instance_url)
            if not token:
                return False, f"no auth token for {instance_url}"
            # gitea API
            api_url = f"{instance_url}/api/v1/repos/{owner}/{repo}/issues"
            headers = {
                'Content-Type': 'application/json',
                'Authorization': f'token {token}'
            }
            data = {'title': title, 'body': body}
            resp = requests.post(api_url, headers=headers, json=data, timeout=15)
            if resp.status_code in (200, 201):
                return True, resp.json().get('html_url')
            else:
                return False, f"gitea api error: {resp.status_code} - {resp.text[:200]}"
        elif platform_type == 'gitlab':
            token = GITLAB_TOKENS.get(instance_url)
            if not token:
                return False, f"no auth token for {instance_url}"
            # need to get project ID first
            search_url = f"{instance_url}/api/v4/projects"
            headers = {'PRIVATE-TOKEN': token}
            params = {'search': repo}
            resp = requests.get(search_url, headers=headers, params=params, timeout=15)
            if resp.status_code != 200:
                return False, f"gitlab project lookup failed: {resp.status_code}"
            projects = resp.json()
            project_id = None
            for p in projects:
                if p.get('path') == repo or p.get('name') == repo:
                    project_id = p.get('id')
                    break
            if not project_id:
                return False, f"project {repo} not found"
            # create issue
            issue_url = f"{instance_url}/api/v4/projects/{project_id}/issues"
            data = {'title': title, 'description': body}
            resp = requests.post(issue_url, headers=headers, json=data, timeout=15)
            if resp.status_code in (200, 201):
                return True, resp.json().get('web_url')
            else:
                return False, f"gitlab api error: {resp.status_code}"
        elif platform_type == 'sourcehut':
            return False, "sourcehut uses mailing lists - use email instead"
        else:
            return False, f"unknown forge type: {platform_type}"
    except Exception as e:
        return False, str(e)
 def send_mastodon_dm(recipient_acct, message, dry_run=False):
    """send mastodon direct message"""
    if not MASTODON_TOKEN:
@ -419,14 +500,94 @@ def deliver_intro(match_data, intro_draft, subject=None, dry_run=False):
 """
        success, error = create_github_issue(owner, repo, title, github_body, dry_run)
    elif method == 'forge_issue':
        # self-hosted git forge issue (gitea/forgejo/gitlab/sourcehut)
        platform_type = contact_info.get('platform_type')
        instance_url = contact_info.get('instance_url')
        owner = contact_info.get('owner')
        repo = contact_info.get('repo')
        title = subject or "community introduction from connectd"
        # get the other person's contact info for bidirectional link
        sender = match_data.get('human_a', {})
        sender_name = sender.get('name') or sender.get('username') or 'someone'
        sender_platform = sender.get('platform', '')
        sender_url = sender.get('url', '')
        if not sender_url:
            if sender_platform == 'github':
                sender_url = f"https://github.com/{sender.get('username')}"
            elif sender_platform == 'mastodon':
                sender_url = f"https://fosstodon.org/@{sender.get('username')}"
            elif ':' in sender_platform:  # forge platform
                extra = sender.get('extra', {})
                if isinstance(extra, str):
                    import json as _json
                    extra = _json.loads(extra) if extra else {}
                sender_url = extra.get('instance_url', '') + '/' + sender.get('username', '')
        forge_body = f"""hey {recipient.get('name') or recipient.get('username')},
 {intro_draft}
 **reach them at:** {sender_url or 'see their profile'}
 ---
 *this is an automated introduction from [connectd](https://github.com/connectd-daemon) - a daemon that finds isolated builders with aligned values and connects them.*
 *if this feels spammy, close this issue and we won't reach out again.*
 """
        success, error = create_forge_issue(platform_type, instance_url, owner, repo, title, forge_body, dry_run)
    elif method == 'manual':
        # skip - no longer using manual queue
        success = False
        error = "manual method deprecated - skipping"
    # FALLBACK CHAIN: if primary method failed, try fallbacks
    if not success and fallbacks:
        for fallback_method, fallback_info in fallbacks:
            result['fallback_attempts'] = result.get('fallback_attempts', [])
            result['fallback_attempts'].append({'method': fallback_method})
            fb_success = False
            fb_error = None
            if fallback_method == 'email':
                fb_success, fb_error = send_email(fallback_info, email_subject, intro_draft, dry_run)
            elif fallback_method == 'mastodon':
                fb_success, fb_error = send_mastodon_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'bluesky':
                fb_success, fb_error = send_bluesky_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'matrix':
                fb_success, fb_error = send_matrix_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'github_issue':
                owner = fallback_info.get('owner') if isinstance(fallback_info, dict) else fallback_info.split('/')[0]
                repo = fallback_info.get('repo') if isinstance(fallback_info, dict) else fallback_info.split('/')[1]
                fb_success, fb_error = create_github_issue(owner, repo, email_subject, intro_draft, dry_run)
            elif fallback_method == 'forge_issue':
                fb_success, fb_error = create_forge_issue(
                    fallback_info.get('platform_type'),
                    fallback_info.get('instance_url'),
                    fallback_info.get('owner'),
                    fallback_info.get('repo'),
                    email_subject, intro_draft, dry_run
                )
            if fb_success:
                success = True
                method = fallback_method
                contact_info = fallback_info
                error = None
                result['fallback_succeeded'] = fallback_method
                break
            else:
                result['fallback_attempts'][-1]['error'] = fb_error
    # log result
    result['success'] = success
    result['error'] = error
    result['final_method'] = method
    if success:
        log['sent'].append(result)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -21,3 +21,7 @@ services:
      - ./api.py:/app/api.py:ro
      - ./deliver.py:/app/introd/deliver.py:ro
      - ./soul.txt:/app/soul.txt:ro
      - ./scoutd/reddit.py:/app/scoutd/reddit.py:ro
      - ./matchd/overlap.py:/app/matchd/overlap.py:ro
      - ./central_client.py:/app/central_client.py:ro
      - ./scoutd/forges.py:/app/scoutd/forges.py:ro
--- a/groq_draft.py
+++ b/groq_draft.py
@ -1,437 +1,419 @@
 """
-introd/groq_draft.py - groq llama 4 maverick for smart intro drafting
+connectd - groq message drafting
-
+reads soul from file, uses as guideline for llm to personalize
 uses groq api to generate personalized, natural intro messages
 that don't sound like ai-generated slop
 """
 import os
 import json
-import requests
+from groq import Groq
 from datetime import datetime
-GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
+GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
 MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
 client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
-def determine_contact_method(human):
+# load soul from file (guideline, not script)
-    """
+SOUL_PATH = os.getenv("SOUL_PATH", "/app/soul.txt")
-    determine best contact method based on WHERE THEY'RE MOST ACTIVE
+def load_soul():
    don't use fixed hierarchy - analyze activity per platform:
    - count posts/commits/activity
    - weight by recency (last 30 days matters more)
    - contact them where they already are
    - fall back to email only if no social activity
    """
    from datetime import datetime, timedelta
    extra = human.get('extra', {})
    if isinstance(extra, str):
        extra = json.loads(extra) if extra else {}
    # handle nested extra.extra from old save format
    if 'extra' in extra and isinstance(extra['extra'], dict):
        extra = {**extra, **extra['extra']}
    contact = human.get('contact', {})
    if isinstance(contact, str):
        contact = json.loads(contact) if contact else {}
    # collect activity scores per platform
    activity_scores = {}
    now = datetime.now()
    thirty_days_ago = now - timedelta(days=30)
    ninety_days_ago = now - timedelta(days=90)
    # github activity
    github_username = human.get('username') if human.get('platform') == 'github' else extra.get('github')
    if github_username:
        github_score = 0
        top_repos = extra.get('top_repos', [])
        for repo in top_repos:
            # recent commits weight more
            pushed_at = repo.get('pushed_at', '')
            if pushed_at:
    try:
-                    push_date = datetime.fromisoformat(pushed_at.replace('Z', '+00:00')).replace(tzinfo=None)
+        with open(SOUL_PATH, 'r') as f:
-                    if push_date > thirty_days_ago:
+            return f.read().strip()
                        github_score += 10  # very recent
                    elif push_date > ninety_days_ago:
                        github_score += 5   # somewhat recent
                    else:
                        github_score += 1   # old but exists
    except:
-                    github_score += 1
+        return None
-            # stars indicate engagement
+SIGNATURE_HTML = """
-            github_score += min(repo.get('stars', 0) // 10, 5)
+<div style="margin-top: 24px; padding-top: 16px; border-top: 1px solid #333;">
  <div style="margin-bottom: 12px;">
    <a href="https://github.com/sudoxnym/connectd" style="color: #8b5cf6; text-decoration: none; font-size: 14px;">github.com/sudoxnym/connectd</a>
    <span style="color: #666; font-size: 12px; margin-left: 8px;">(main repo)</span>
  </div>
  <div style="display: flex; gap: 16px; align-items: center;">
    <a href="https://github.com/connectd-daemon" title="GitHub" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"/></svg>
    </a>
    <a href="https://mastodon.sudoxreboot.com/@connectd" title="Mastodon" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M23.268 5.313c-.35-2.578-2.617-4.61-5.304-5.004C17.51.242 15.792 0 11.813 0h-.03c-3.98 0-4.835.242-5.288.309C3.882.692 1.496 2.518.917 5.127.64 6.412.61 7.837.661 9.143c.074 1.874.088 3.745.26 5.611.118 1.24.325 2.47.62 3.68.55 2.237 2.777 4.098 4.96 4.857 2.336.792 4.849.923 7.256.38.265-.061.527-.132.786-.213.585-.184 1.27-.39 1.774-.753a.057.057 0 0 0 .023-.043v-1.809a.052.052 0 0 0-.02-.041.053.053 0 0 0-.046-.01 20.282 20.282 0 0 1-4.709.545c-2.73 0-3.463-1.284-3.674-1.818a5.593 5.593 0 0 1-.319-1.433.053.053 0 0 1 .066-.054c1.517.363 3.072.546 4.632.546.376 0 .75 0 1.125-.01 1.57-.044 3.224-.124 4.768-.422.038-.008.077-.015.11-.024 2.435-.464 4.753-1.92 4.989-5.604.008-.145.03-1.52.03-1.67.002-.512.167-3.63-.024-5.545zm-3.748 9.195h-2.561V8.29c0-1.309-.55-1.976-1.67-1.976-1.23 0-1.846.79-1.846 2.35v3.403h-2.546V8.663c0-1.56-.617-2.35-1.848-2.35-1.112 0-1.668.668-1.67 1.977v6.218H4.822V8.102c0-1.31.337-2.35 1.011-3.12.696-.77 1.608-1.164 2.74-1.164 1.311 0 2.302.5 2.962 1.498l.638 1.06.638-1.06c.66-.999 1.65-1.498 2.96-1.498 1.13 0 2.043.395 2.74 1.164.675.77 1.012 1.81 1.012 3.12z"/></svg>
    </a>
    <a href="https://bsky.app/profile/connectd.bsky.social" title="Bluesky" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M5.202 2.857C7.954 4.922 10.913 9.11 12 11.358c1.087-2.247 4.046-6.436 6.798-8.501C20.783 1.366 24 .213 24 3.883c0 .732-.42 6.156-.667 7.037-.856 3.061-3.978 3.842-6.755 3.37 4.854.826 6.089 3.562 3.422 6.299-5.065 5.196-7.28-1.304-7.847-2.97-.104-.305-.152-.448-.153-.327 0-.121-.05.022-.153.327-.568 1.666-2.782 8.166-7.847 2.97-2.667-2.737-1.432-5.473 3.422-6.3-2.777.473-5.899-.308-6.755-3.369C.42 10.04 0 4.615 0 3.883c0-3.67 3.217-2.517 5.202-1.026"/></svg>
    </a>
    <a href="https://lemmy.sudoxreboot.com/c/connectd" title="Lemmy" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M2.9595 4.2228a3.9132 3.9132 0 0 0-.332.019c-.8781.1012-1.67.5699-2.155 1.3862-.475.8-.5922 1.6809-.35 2.4971.2421.8162.8297 1.5575 1.6982 2.1449.0053.0035.0106.0076.0163.0114.746.4498 1.492.7431 2.2877.8994-.02.3318-.0272.6689-.006 1.0181.0634 1.0432.4368 2.0006.996 2.8492l-2.0061.8189a.4163.4163 0 0 0-.2276.2239.416.416 0 0 0 .0879.455.415.415 0 0 0 .2941.1231.4156.4156 0 0 0 .1595-.0312l2.2093-.9035c.408.4859.8695.9315 1.3723 1.318.0196.0151.0407.0264.0603.0423l-1.2918 1.7103a.416.416 0 0 0 .664.501l1.314-1.7385c.7185.4548 1.4782.7927 2.2294 1.0242.3833.7209 1.1379 1.1871 2.0202 1.1871.8907 0 1.6442-.501 2.0242-1.2072.744-.2347 1.4959-.5729 2.2073-1.0262l1.332 1.7606a.4157.4157 0 0 0 .7439-.1936.4165.4165 0 0 0-.0799-.3074l-1.3099-1.7345c.0083-.0075.0178-.0113.0261-.0188.4968-.3803.9549-.8175 1.3622-1.2939l2.155.8794a.4156.4156 0 0 0 .5412-.2276.4151.4151 0 0 0-.2273-.5432l-1.9438-.7928c.577-.8538.9697-1.8183 1.0504-2.8693.0268-.3507.0242-.6914.0079-1.0262.7905-.1572 1.5321-.4502 2.2737-.8974.0053-.0033.011-.0076.0163-.0113.8684-.5874 1.456-1.3287 1.6982-2.145.2421-.8161.125-1.697-.3501-2.497-.4849-.8163-1.2768-1.2852-2.155-1.3863a3.2175 3.2175 0 0 0-.332-.0189c-.7852-.0151-1.6231.229-2.4286.6942-.5926.342-1.1252.867-1.5433 1.4387-1.1699-.6703-2.6923-1.0476-4.5635-1.0785a15.5768 15.5768 0 0 0-.5111 0c-2.085.034-3.7537.43-5.0142 1.1449-.0033-.0038-.0045-.0114-.008-.0152-.4233-.5916-.973-1.1365-1.5835-1.489-.8055-.465-1.6434-.7083-2.4286-.6941Zm.2858.7365c.5568.042 1.1696.2358 1.7787.5875.485.28.9757.7554 1.346 1.2696a5.6875 5.6875 0 0 0-.4969.4085c-.9201.8516-1.4615 1.9597-1.668 3.2335-.6809-.1402-1.3183-.3945-1.984-.7948-.7553-.5128-1.2159-1.1225-1.4004-1.7445-.1851-.624-.1074-1.2712.2776-1.9196.3743-.63.9275-.9534 1.6118-1.0322a2.796 2.796 0 0 1 .5352-.0076Zm17.5094 0a2.797 2.797 0 0 1 .5353.0075c.6842.0786 1.2374.4021 1.6117 1.0322.385.6484.4627 1.2957.2776 1.9196-.1845.622-.645 1.2317-1.4004 1.7445-.6578.3955-1.2881.6472-1.9598.7888-.1942-1.2968-.7375-2.4338-1.666-3.302a5.5639 5.5639 0 0 0-.4709-.3923c.3645-.49.8287-.9428 1.2938-1.2113.6091-.3515 1.2219-.5454 1.7787-.5875ZM12.006 6.0036a14.832 14.832 0 0 1 .487 0c2.3901.0393 4.0848.67 5.1631 1.678 1.1501 1.0754 1.6423 2.6006 1.499 4.467-.1311 1.7079-1.2203 3.2281-2.652 4.324-.694.5313-1.4626.9354-2.2254 1.2294.0031-.0453.014-.0888.014-.1349.0029-1.1964-.9313-2.2133-2.2918-2.2133-1.3606 0-2.3222 1.0154-2.2918 2.2213.0013.0507.014.0972.0181.1471-.781-.2933-1.5696-.7013-2.2777-1.2456-1.4239-1.0945-2.4997-2.6129-2.6037-4.322-.1129-1.8567.3778-3.3382 1.5212-4.3965C7.5094 6.7 9.352 6.047 12.006 6.0036Zm-3.6419 6.8291c-.6053 0-1.0966.4903-1.0966 1.0966 0 .6063.4913 1.0986 1.0966 1.0986s1.0966-.4923 1.0966-1.0986c0-.6063-.4913-1.0966-1.0966-1.0966zm7.2819.0113c-.5998 0-1.0866.4859-1.0866 1.0866s.4868 1.0885 1.0866 1.0885c.5997 0 1.0865-.4878 1.0865-1.0885s-.4868-1.0866-1.0865-1.0866zM12 16.0835c1.0237 0 1.5654.638 1.5634 1.4829-.0018.7849-.6723 1.485-1.5634 1.485-.9167 0-1.54-.5629-1.5634-1.493-.0212-.8347.5397-1.4749 1.5634-1.4749Z"/></svg>
    </a>
    <a href="https://discord.gg/connectd" title="Discord" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M20.317 4.3698a19.7913 19.7913 0 00-4.8851-1.5152.0741.0741 0 00-.0785.0371c-.211.3753-.4447.8648-.6083 1.2495-1.8447-.2762-3.68-.2762-5.4868 0-.1636-.3933-.4058-.8742-.6177-1.2495a.077.077 0 00-.0785-.037 19.7363 19.7363 0 00-4.8852 1.515.0699.0699 0 00-.0321.0277C.5334 9.0458-.319 13.5799.0992 18.0578a.0824.0824 0 00.0312.0561c2.0528 1.5076 4.0413 2.4228 5.9929 3.0294a.0777.0777 0 00.0842-.0276c.4616-.6304.8731-1.2952 1.226-1.9942a.076.076 0 00-.0416-.1057c-.6528-.2476-1.2743-.5495-1.8722-.8923a.077.077 0 01-.0076-.1277c.1258-.0943.2517-.1923.3718-.2914a.0743.0743 0 01.0776-.0105c3.9278 1.7933 8.18 1.7933 12.0614 0a.0739.0739 0 01.0785.0095c.1202.099.246.1981.3728.2924a.077.077 0 01-.0066.1276 12.2986 12.2986 0 01-1.873.8914.0766.0766 0 00-.0407.1067c.3604.698.7719 1.3628 1.225 1.9932a.076.076 0 00.0842.0286c1.961-.6067 3.9495-1.5219 6.0023-3.0294a.077.077 0 00.0313-.0552c.5004-5.177-.8382-9.6739-3.5485-13.6604a.061.061 0 00-.0312-.0286zM8.02 15.3312c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9555-2.4189 2.157-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.9555 2.4189-2.1569 2.4189zm7.9748 0c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9554-2.4189 2.1569-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.946 2.4189-2.1568 2.4189Z"/></svg>
    </a>
    <a href="https://matrix.to/#/@connectd:sudoxreboot.com" title="Matrix" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M.632.55v22.9H2.28V24H0V0h2.28v.55zm7.043 7.26v1.157h.033c.309-.443.683-.784 1.117-1.024.433-.245.936-.365 1.5-.365.54 0 1.033.107 1.481.314.448.208.785.582 1.02 1.108.254-.374.6-.706 1.034-.992.434-.287.95-.43 1.546-.43.453 0 .872.056 1.26.167.388.11.716.286.993.53.276.245.489.559.646.951.152.392.23.863.23 1.417v5.728h-2.349V11.52c0-.286-.01-.559-.032-.812a1.755 1.755 0 0 0-.18-.66 1.106 1.106 0 0 0-.438-.448c-.194-.11-.457-.166-.785-.166-.332 0-.6.064-.803.189a1.38 1.38 0 0 0-.48.499 1.946 1.946 0 0 0-.231.696 5.56 5.56 0 0 0-.06.785v4.768h-2.35v-4.8c0-.254-.004-.503-.018-.752a2.074 2.074 0 0 0-.143-.688 1.052 1.052 0 0 0-.415-.503c-.194-.125-.476-.19-.854-.19-.111 0-.259.024-.439.074-.18.051-.36.143-.53.282-.171.138-.319.337-.439.595-.12.259-.18.6-.18 1.02v4.966H5.46V7.81zm15.693 15.64V.55H21.72V0H24v24h-2.28v-.55z"/></svg>
    </a>
    <a href="https://reddit.com/r/connectd" title="Reddit" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0C5.373 0 0 5.373 0 12c0 3.314 1.343 6.314 3.515 8.485l-2.286 2.286C.775 23.225 1.097 24 1.738 24H12c6.627 0 12-5.373 12-12S18.627 0 12 0Zm4.388 3.199c1.104 0 1.999.895 1.999 1.999 0 1.105-.895 2-1.999 2-.946 0-1.739-.657-1.947-1.539v.002c-1.147.162-2.032 1.15-2.032 2.341v.007c1.776.067 3.4.567 4.686 1.363.473-.363 1.064-.58 1.707-.58 1.547 0 2.802 1.254 2.802 2.802 0 1.117-.655 2.081-1.601 2.531-.088 3.256-3.637 5.876-7.997 5.876-4.361 0-7.905-2.617-7.998-5.87-.954-.447-1.614-1.415-1.614-2.538 0-1.548 1.255-2.802 2.803-2.802.645 0 1.239.218 1.712.585 1.275-.79 2.881-1.291 4.64-1.365v-.01c0-1.663 1.263-3.034 2.88-3.207.188-.911.993-1.595 1.959-1.595Zm-8.085 8.376c-.784 0-1.459.78-1.506 1.797-.047 1.016.64 1.429 1.426 1.429.786 0 1.371-.369 1.418-1.385.047-1.017-.553-1.841-1.338-1.841Zm7.406 0c-.786 0-1.385.824-1.338 1.841.047 1.017.634 1.385 1.418 1.385.785 0 1.473-.413 1.426-1.429-.046-1.017-.721-1.797-1.506-1.797Zm-3.703 4.013c-.974 0-1.907.048-2.77.135-.147.015-.241.168-.183.305.483 1.154 1.622 1.964 2.953 1.964 1.33 0 2.47-.81 2.953-1.964.057-.137-.037-.29-.184-.305-.863-.087-1.795-.135-2.769-.135Z"/></svg>
    </a>
    <a href="mailto:connectd@sudoxreboot.com" title="Email" style="color: #888; text-decoration: none;">
      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M1.5 8.67v8.58a3 3 0 003 3h15a3 3 0 003-3V8.67l-8.928 5.493a3 3 0 01-3.144 0L1.5 8.67z"/><path d="M22.5 6.908V6.75a3 3 0 00-3-3h-15a3 3 0 00-3 3v.158l9.714 5.978a1.5 1.5 0 001.572 0L22.5 6.908z"/></svg>
    </a>
  </div>
 </div>
 """
-        # commit activity from deep scrape
+SIGNATURE_PLAINTEXT = """
-        commit_count = extra.get('commit_count', 0)
+---
-        github_score += min(commit_count // 10, 20)
+github.com/sudoxnym/connectd (main repo)
-        if github_score > 0:
+github: github.com/connectd-daemon
-            activity_scores['github_issue'] = {
+mastodon: @connectd@mastodon.sudoxreboot.com
-                'score': github_score,
+bluesky: connectd.bsky.social
-                'info': f"{github_username}/{top_repos[0]['name']}" if top_repos else github_username
+lemmy: lemmy.sudoxreboot.com/c/connectd
-            }
+discord: discord.gg/connectd
-
+matrix: @connectd:sudoxreboot.com
-    # mastodon activity
+reddit: reddit.com/r/connectd
-    mastodon_handle = human.get('username') if human.get('platform') == 'mastodon' else (extra.get('mastodon') or contact.get('mastodon'))
+email: connectd@sudoxreboot.com
-    if mastodon_handle:
+"""
        mastodon_score = 0
        statuses_count = extra.get('mastodon_statuses', 0) or human.get('statuses_count', 0)
        # high post count = active user
        if statuses_count > 1000:
            mastodon_score += 30
        elif statuses_count > 500:
            mastodon_score += 20
        elif statuses_count > 100:
            mastodon_score += 10
        elif statuses_count > 0:
            mastodon_score += 5
        # platform bonus for fediverse (values-aligned)
        mastodon_score += 10
        # bonus if handle was discovered via rel="me" or similar verification
        # (having a handle linked from their website = they want to be contacted there)
        handles = extra.get('handles', {})
        if handles.get('mastodon') == mastodon_handle:
            mastodon_score += 15  # verified handle bonus
        if mastodon_score > 0:
            activity_scores['mastodon'] = {'score': mastodon_score, 'info': mastodon_handle}
    # bluesky activity
    bluesky_handle = human.get('username') if human.get('platform') == 'bluesky' else (extra.get('bluesky') or contact.get('bluesky'))
    if bluesky_handle:
        bluesky_score = 0
        posts_count = extra.get('bluesky_posts', 0) or human.get('posts_count', 0)
        if posts_count > 500:
            bluesky_score += 25
        elif posts_count > 100:
            bluesky_score += 15
        elif posts_count > 0:
            bluesky_score += 5
        # newer platform, slightly lower weight
        bluesky_score += 5
        if bluesky_score > 0:
            activity_scores['bluesky'] = {'score': bluesky_score, 'info': bluesky_handle}
    # twitter activity
    twitter_handle = extra.get('twitter') or contact.get('twitter')
    if twitter_handle:
        twitter_score = 0
        tweets_count = extra.get('twitter_tweets', 0)
        if tweets_count > 1000:
            twitter_score += 20
        elif tweets_count > 100:
            twitter_score += 10
        elif tweets_count > 0:
            twitter_score += 5
        # if we found them via twitter hashtags, they're active there
        if human.get('platform') == 'twitter':
            twitter_score += 15
        if twitter_score > 0:
            activity_scores['twitter'] = {'score': twitter_score, 'info': twitter_handle}
    # NOTE: reddit is DISCOVERY ONLY, not a contact method
    # we find users on reddit but reach out via their external links (github, mastodon, etc.)
    # reddit-only users go to manual_queue for review
    # lobsters activity
    lobsters_username = extra.get('lobsters') or contact.get('lobsters')
    if lobsters_username or human.get('platform') == 'lobsters':
        lobsters_score = 0
        lobsters_username = lobsters_username or human.get('username')
        karma = extra.get('lobsters_karma', 0) or human.get('karma', 0)
        # lobsters is invite-only, high signal
        lobsters_score += 15
        if karma > 100:
            lobsters_score += 15
        elif karma > 50:
            lobsters_score += 10
        elif karma > 0:
            lobsters_score += 5
        if lobsters_score > 0:
            activity_scores['lobsters'] = {'score': lobsters_score, 'info': lobsters_username}
    # matrix activity
    matrix_id = extra.get('matrix') or contact.get('matrix')
    if matrix_id:
        matrix_score = 0
        # matrix users are typically privacy-conscious and technical
        matrix_score += 15  # platform bonus for decentralized chat
        # bonus if handle was discovered via rel="me" verification
        handles = extra.get('handles', {})
        if handles.get('matrix') == matrix_id:
            matrix_score += 10  # verified handle bonus
        if matrix_score > 0:
            activity_scores['matrix'] = {'score': matrix_score, 'info': matrix_id}
    # lemmy activity (fediverse)
    lemmy_username = human.get('username') if human.get('platform') == 'lemmy' else extra.get('lemmy')
    if lemmy_username:
        lemmy_score = 0
        # lemmy is fediverse - high values alignment
        lemmy_score += 20  # fediverse platform bonus
        post_count = extra.get('post_count', 0)
        comment_count = extra.get('comment_count', 0)
        if post_count > 100:
            lemmy_score += 15
        elif post_count > 50:
            lemmy_score += 10
        elif post_count > 10:
            lemmy_score += 5
        if comment_count > 500:
            lemmy_score += 10
        elif comment_count > 100:
            lemmy_score += 5
        if lemmy_score > 0:
            activity_scores['lemmy'] = {'score': lemmy_score, 'info': lemmy_username}
    # pick highest activity platform
    if activity_scores:
        best_platform = max(activity_scores.items(), key=lambda x: x[1]['score'])
        return best_platform[0], best_platform[1]['info']
    # fall back to email ONLY if no social activity detected
    email = extra.get('email') or contact.get('email')
    # also check emails list
    if not email:
        emails = extra.get('emails') or contact.get('emails') or []
        for e in emails:
            if e and '@' in e and 'noreply' not in e.lower():
                email = e
                break
    if email and '@' in email and 'noreply' not in email.lower():
        return 'email', email
    # last resort: manual
    return 'manual', None
-def draft_intro_with_llm(match_data, recipient='a', dry_run=False):
+def draft_intro_with_llm(match_data: dict, recipient: str = 'a', dry_run: bool = True):
    """
-    use groq llama 4 maverick to draft a personalized intro
+    draft an intro message using groq llm.
-    match_data should contain:
+    args:
-    - human_a: the first person
+        match_data: dict with human_a, human_b, overlap_score, overlap_reasons
-    - human_b: the second person
+        recipient: 'a' or 'b' - who receives the message
-    - overlap_score: numeric score
+        dry_run: if True, preview mode
    - overlap_reasons: list of why they match
-    recipient: 'a' or 'b' - who we're writing to
+    returns:
        tuple (result_dict, error_string)
        result_dict has: subject, draft_html, draft_plain
    """
-    if not GROQ_API_KEY:
+    if not client:
        return None, "GROQ_API_KEY not set"
    # determine recipient and other person
    if recipient == 'a':
        to_person = match_data.get('human_a', {})
        other_person = match_data.get('human_b', {})
    else:
        to_person = match_data.get('human_b', {})
        other_person = match_data.get('human_a', {})
    # build context
    to_name = to_person.get('name') or to_person.get('username', 'friend')
    other_name = other_person.get('name') or other_person.get('username', 'someone')
    to_signals = to_person.get('signals', [])
    if isinstance(to_signals, str):
        to_signals = json.loads(to_signals) if to_signals else []
    other_signals = other_person.get('signals', [])
    if isinstance(other_signals, str):
        other_signals = json.loads(other_signals) if other_signals else []
    overlap_reasons = match_data.get('overlap_reasons', [])
    if isinstance(overlap_reasons, str):
        overlap_reasons = json.loads(overlap_reasons) if overlap_reasons else []
    # parse extra data
    to_extra = to_person.get('extra', {})
    other_extra = other_person.get('extra', {})
    if isinstance(to_extra, str):
        to_extra = json.loads(to_extra) if to_extra else {}
    if isinstance(other_extra, str):
        other_extra = json.loads(other_extra) if other_extra else {}
    # build profile summaries
    to_profile = f"""
 name: {to_name}
 platform: {to_person.get('platform', 'unknown')}
 bio: {to_person.get('bio') or 'no bio'}
 location: {to_person.get('location') or 'unknown'}
 signals: {', '.join(to_signals[:8])}
 repos: {len(to_extra.get('top_repos', []))} public repos
 languages: {', '.join(to_extra.get('languages', {}).keys())}
 """
    other_profile = f"""
 name: {other_name}
 platform: {other_person.get('platform', 'unknown')}
 bio: {other_person.get('bio') or 'no bio'}
 location: {other_person.get('location') or 'unknown'}
 signals: {', '.join(other_signals[:8])}
 repos: {len(other_extra.get('top_repos', []))} public repos
 languages: {', '.join(other_extra.get('languages', {}).keys())}
 url: {other_person.get('url', '')}
 """
    # build prompt
    system_prompt = """you are connectd, an ai that connects isolated builders who share values but don't know each other yet.
 your job is to write a short, genuine intro message to one person about another person they might want to know.
 rules:
 - be brief (3-5 sentences max)
 - be genuine, not salesy or fake
 - focus on WHY they might want to connect, not just WHAT they have in common
 - don't be cringe or use buzzwords
 - lowercase preferred (casual tone)
 - no emojis unless the person's profile suggests they'd like them
 - mention specific things from their profiles, not generic "you both like open source"
 - end with a simple invitation, not a hard sell
 - sign off as "- connectd" (lowercase)
 bad examples:
 - "I noticed you're both passionate about..." (too formal)
 - "You two would be PERFECT for each other!" (too salesy)
 - "As a fellow privacy enthusiast..." (cringe)
 good examples:
 - "hey, saw you're building X. there's someone else working on similar stuff in Y who might be interesting to know."
 - "you might want to check out Z's work on federated systems - similar approach to what you're doing with A."
 """
    user_prompt = f"""write an intro message to {to_name} about {other_name}.
 RECIPIENT ({to_name}):
 {to_profile}
 INTRODUCING ({other_name}):
 {other_profile}
 WHY THEY MATCH (overlap score {match_data.get('overlap_score', 0)}):
 {', '.join(overlap_reasons[:5])}
 write a short intro message. remember: lowercase, genuine, not salesy."""
    try:
-        response = requests.post(
+        human_a = match_data.get('human_a', {})
-            GROQ_API_URL,
+        human_b = match_data.get('human_b', {})
-            headers={
+        reasons = match_data.get('overlap_reasons', [])
-                'Authorization': f'Bearer {GROQ_API_KEY}',
+        
-                'Content-Type': 'application/json',
+        # recipient gets the message, about_person is who we're introducing them to
-            },
+        if recipient == 'a':
-            json={
+            to_person = human_a
-                'model': MODEL,
+            about_person = human_b
-                'messages': [
+        else:
-                    {'role': 'system', 'content': system_prompt},
+            to_person = human_b
-                    {'role': 'user', 'content': user_prompt},
+            about_person = human_a
-                ],
+        
-                'temperature': 0.7,
+        to_name = to_person.get('username', 'friend')
-                'max_tokens': 300,
+        about_name = about_person.get('username', 'someone')
-            },
+        about_bio = about_person.get('extra', {}).get('bio', '')
-            timeout=30,
+        
        # extract contact info for about_person
        about_extra = about_person.get('extra', {})
        if isinstance(about_extra, str):
            import json as _json
            about_extra = _json.loads(about_extra) if about_extra else {}
        about_contact = about_person.get('contact', {})
        if isinstance(about_contact, str):
            about_contact = _json.loads(about_contact) if about_contact else {}
        # build contact link for about_person
        about_platform = about_person.get('platform', '')
        about_username = about_person.get('username', '')
        contact_link = None
        if about_platform == 'mastodon' and about_username:
            if '@' in about_username:
                parts = about_username.split('@')
                if len(parts) >= 2:
                    contact_link = f"https://{parts[1]}/@{parts[0]}"
        elif about_platform == 'github' and about_username:
            contact_link = f"https://github.com/{about_username}"
        elif about_extra.get('mastodon') or about_contact.get('mastodon'):
            handle = about_extra.get('mastodon') or about_contact.get('mastodon')
            if '@' in handle:
                parts = handle.lstrip('@').split('@')
                if len(parts) >= 2:
                    contact_link = f"https://{parts[1]}/@{parts[0]}"
        elif about_extra.get('github') or about_contact.get('github'):
            contact_link = f"https://github.com/{about_extra.get('github') or about_contact.get('github')}"
        elif about_extra.get('email'):
            contact_link = about_extra['email']
        elif about_contact.get('email'):
            contact_link = about_contact['email']
        elif about_extra.get('website'):
            contact_link = about_extra['website']
        elif about_extra.get('external_links', {}).get('website'):
            contact_link = about_extra['external_links']['website']
        elif about_extra.get('extra', {}).get('website'):
            contact_link = about_extra['extra']['website']
        elif about_platform == 'reddit' and about_username:
            contact_link = f"reddit.com/u/{about_username}"
        if not contact_link:
            contact_link = f"github.com/{about_username}" if about_username else "reach out via connectd"
        # skip if no real contact method (just reddit or generic)
        if contact_link.startswith('reddit.com') or contact_link == "reach out via connectd" or 'stackblitz' in contact_link:
            return None, f"no real contact info for {about_name} - skipping draft"
        # format the shared factors naturally
        if reasons:
            factor = ', '.join(reasons[:3]) if len(reasons) > 1 else reasons[0]
        else:
            factor = "shared values and interests"
        # load soul as guideline
        soul = load_soul()
        if not soul:
            return None, "could not load soul file"
        # build the prompt - soul is GUIDELINE not script
        prompt = f"""you are connectd, a daemon that finds isolated builders and connects them.
 write a personal message TO {to_name} telling them about {about_name}.
 here is the soul/spirit of what connectd is about - use this as a GUIDELINE for tone and message, NOT as a script to copy verbatim:
 ---
 {soul}
 ---
 key facts for this message:
 - recipient: {to_name}
 - introducing them to: {about_name}
 - their shared interests/values: {factor}
 - about {about_name}: {about_bio if about_bio else 'a builder like you'}
 - HOW TO REACH {about_name}: {contact_link}
 RULES:
 1. say their name ONCE at start, then use "you" 
 2. MUST include how to reach {about_name}: {contact_link}
 3. lowercase, raw, emotional - follow the soul
 4. end with the contact link
 return ONLY the message body. signature is added separately."""
        response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.6,
            max_tokens=1200
        )
-        if response.status_code != 200:
+        body = response.choices[0].message.content.strip()
            return None, f"groq api error: {response.status_code} - {response.text}"
-        data = response.json()
+        # generate subject
-        draft = data['choices'][0]['message']['content'].strip()
+        subject_prompt = f"""generate a short, lowercase email subject for a message to {to_name} about connecting them with {about_name} over their shared interest in {factor}.
-        # determine contact method for recipient
+no corporate speak. no clickbait. raw and real.
-        contact_method, contact_info = determine_contact_method(to_person)
+examples:
 - "found you, {to_name}"
 - "you're not alone"
 - "a door just opened"
 - "{to_name}, there's someone you should meet"
 return ONLY the subject line."""
        subject_response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": subject_prompt}],
            temperature=0.9,
            max_tokens=50
        )
        subject = subject_response.choices[0].message.content.strip().strip('"').strip("'")
        # format html
        draft_html = f"<div style='font-family: monospace; white-space: pre-wrap; color: #e0e0e0; background: #1a1a1a; padding: 20px;'>{body}</div>{SIGNATURE_HTML}"
        draft_plain = body + SIGNATURE_PLAINTEXT
        return {
-            'draft': draft,
+            'subject': subject,
-            'model': MODEL,
+            'draft_html': draft_html,
-            'to': to_name,
+            'draft_plain': draft_plain
            'about': other_name,
            'overlap_score': match_data.get('overlap_score', 0),
            'contact_method': contact_method,
            'contact_info': contact_info,
            'generated_at': datetime.now().isoformat(),
        }, None
    except Exception as e:
-        return None, f"groq error: {str(e)}"
+        return None, str(e)
-def draft_intro_batch(matches, dry_run=False):
+# for backwards compat with old code
-    """
+def draft_message(person: dict, factor: str, platform: str = "email") -> dict:
-    draft intros for multiple matches
+    """legacy function - wraps new api"""
-    returns list of (match, intro_result, error) tuples
+    match_data = {
-    """
+        'human_a': {'username': 'recipient'},
-    results = []
+        'human_b': person,
-
+        'overlap_reasons': [factor]
-    for match in matches:
+    }
-        # draft for both directions
+    result, error = draft_intro_with_llm(match_data, recipient='a')
-        intro_a, err_a = draft_intro_with_llm(match, recipient='a', dry_run=dry_run)
+    if error:
-        intro_b, err_b = draft_intro_with_llm(match, recipient='b', dry_run=dry_run)
+        raise ValueError(error)
-
+    return {
-        results.append({
+        'subject': result['subject'],
-            'match': match,
+        'body_html': result['draft_html'],
-            'intro_to_a': intro_a,
+        'body_plain': result['draft_plain']
-            'intro_to_b': intro_b,
+    }
            'errors': [err_a, err_b],
        })
    return results
-def test_groq_connection():
+if __name__ == "__main__":
-    """test that groq api is working"""
+    # test
-    if not GROQ_API_KEY:
+    test_data = {
-        return False, "GROQ_API_KEY not set"
+        'human_a': {'username': 'sudoxnym', 'extra': {'bio': 'building intentional communities'}},
-
+        'human_b': {'username': 'testuser', 'extra': {'bio': 'home assistant enthusiast'}},
-    try:
+        'overlap_reasons': ['home-assistant', 'open source', 'community building']
-        response = requests.post(
+    }
-            GROQ_API_URL,
+    result, error = draft_intro_with_llm(test_data, recipient='a')
-            headers={
+    if error:
-                'Authorization': f'Bearer {GROQ_API_KEY}',
+        print(f"error: {error}")
                'Content-Type': 'application/json',
            },
            json={
                'model': MODEL,
                'messages': [{'role': 'user', 'content': 'say "ok" and nothing else'}],
                'max_tokens': 10,
            },
            timeout=10,
        )
        if response.status_code == 200:
            return True, "groq api working"
    else:
-            return False, f"groq api error: {response.status_code}"
+        print(f"subject: {result['subject']}")
        print(f"\nbody:\n{result['draft_plain']}")
-    except Exception as e:
+# contact method ranking - USAGE BASED
-        return False, f"groq connection error: {str(e)}"
+# we rank by where the person is MOST ACTIVE, not by our preference
 def determine_contact_method(human):
    """
    determine ALL available contact methods, ranked by USER'S ACTIVITY.
    looks at activity metrics to decide where they're most engaged.
    returns: (best_method, best_info, fallbacks)
    where fallbacks is a list of (method, info) tuples in activity order
    """
    import json
    extra = human.get('extra', {})
    contact = human.get('contact', {})
    if isinstance(extra, str):
        extra = json.loads(extra) if extra else {}
    if isinstance(contact, str):
        contact = json.loads(contact) if contact else {}
    nested_extra = extra.get('extra', {})
    platform = human.get('platform', '')
    available = []
    # === ACTIVITY SCORING ===
    # each method gets scored by how active the user is there
    # EMAIL - always medium priority (we cant measure activity)
    email = extra.get('email') or contact.get('email') or nested_extra.get('email')
    if email and '@' in str(email):
        available.append(('email', email, 50))  # baseline score
    # MASTODON - score by post count / followers
    mastodon = extra.get('mastodon') or contact.get('mastodon') or nested_extra.get('mastodon')
    if mastodon:
        masto_activity = extra.get('mastodon_posts', 0) or extra.get('statuses_count', 0)
        masto_score = min(100, 30 + (masto_activity // 10))  # 30 base + 1 per 10 posts
        available.append(('mastodon', mastodon, masto_score))
    # if they CAME FROM mastodon, thats their primary
    if platform == 'mastodon':
        handle = f"@{human.get('username')}"
        instance = human.get('instance') or extra.get('instance') or ''
        if instance:
            handle = f"@{human.get('username')}@{instance}"
        activity = extra.get('statuses_count', 0) or extra.get('activity_count', 0)
        score = min(100, 50 + (activity // 5))  # higher base since its their home
        # dont dupe
        if not any(a[0] == 'mastodon' for a in available):
            available.append(('mastodon', handle, score))
        else:
            # update score if this is higher
            for i, (m, info, s) in enumerate(available):
                if m == 'mastodon' and score > s:
                    available[i] = ('mastodon', handle, score)
    # MATRIX - score by presence (binary for now)
    matrix = extra.get('matrix') or contact.get('matrix') or nested_extra.get('matrix')
    if matrix and ':' in str(matrix):
        available.append(('matrix', matrix, 40))
    # BLUESKY - score by followers/posts if available
    bluesky = extra.get('bluesky') or contact.get('bluesky') or nested_extra.get('bluesky')
    if bluesky:
        bsky_activity = extra.get('bluesky_posts', 0)
        bsky_score = min(100, 25 + (bsky_activity // 10))
        available.append(('bluesky', bluesky, bsky_score))
    # LEMMY - score by activity
    lemmy = extra.get('lemmy') or contact.get('lemmy') or nested_extra.get('lemmy')
    if lemmy:
        lemmy_activity = extra.get('lemmy_posts', 0) or extra.get('lemmy_comments', 0)
        lemmy_score = min(100, 30 + lemmy_activity)
        available.append(('lemmy', lemmy, lemmy_score))
    if platform == 'lemmy':
        handle = human.get('username')
        activity = extra.get('activity_count', 0)
        score = min(100, 50 + activity)
        if not any(a[0] == 'lemmy' for a in available):
            available.append(('lemmy', handle, score))
    # DISCORD - lower priority (hard to DM)
    discord = extra.get('discord') or contact.get('discord') or nested_extra.get('discord')
    if discord:
        available.append(('discord', discord, 20))
    # GITHUB ISSUE - for github users, score by repo activity
    if platform == 'github':
        top_repos = extra.get('top_repos', [])
        if top_repos:
            repo = top_repos[0] if isinstance(top_repos[0], str) else top_repos[0].get('name', '')
            stars = extra.get('total_stars', 0)
            repos_count = extra.get('repos_count', 0)
            # active github user = higher issue score
            gh_score = min(60, 20 + (stars // 100) + (repos_count // 5))
            if repo:
                available.append(('github_issue', f"{human.get('username')}/{repo}", gh_score))
    # FORGE ISSUE - for self-hosted git users (gitea/forgejo/gitlab/sourcehut/codeberg)
    # these are HIGH SIGNAL users - they actually selfhost
    if platform and ':' in platform:
        platform_type, instance = platform.split(':', 1)
        if platform_type in ('gitea', 'forgejo', 'gogs', 'gitlab', 'sourcehut'):
            repos = extra.get('repos', [])
            if repos:
                repo = repos[0] if isinstance(repos[0], str) else repos[0].get('name', '')
                instance_url = extra.get('instance_url', '')
                if repo and instance_url:
                    # forge users get higher priority than github (they selfhost!)
                    forge_score = 55  # higher than github_issue (50)
                    available.append(('forge_issue', {
                        'platform_type': platform_type,
                        'instance': instance,
                        'instance_url': instance_url,
                        'owner': human.get('username'),
                        'repo': repo
                    }, forge_score))
    # REDDIT - discovered people, use their other links
    if platform == 'reddit':
        reddit_activity = extra.get('reddit_activity', 0) or extra.get('activity_count', 0)
        # reddit users we reach via their external links (email, mastodon, etc)
        # boost their other methods if reddit is their main platform
        for i, (m, info, score) in enumerate(available):
            if m in ('email', 'mastodon', 'matrix', 'bluesky'):
                # boost score for reddit-discovered users' external contacts
                boost = min(30, reddit_activity // 3)
                available[i] = (m, info, score + boost)
    # sort by activity score (highest first)
    available.sort(key=lambda x: x[2], reverse=True)
    if not available:
        return 'manual', None, []
    best = available[0]
    fallbacks = [(m, i) for m, i, p in available[1:]]
    return best[0], best[1], fallbacks
 def get_ranked_contact_methods(human):
    """
    get all contact methods for a human, ranked by their activity.
    """
    method, info, fallbacks = determine_contact_method(human)
    if method == 'manual':
        return []
    return [(method, info)] + fallbacks
--- a/introd/deliver.py
+++ b/introd/deliver.py
@ -334,18 +334,24 @@ def determine_best_contact(human):
    """
    determine best contact method based on WHERE THEY'RE MOST ACTIVE
-    uses activity-based selection from groq_draft module
+    returns: (method, info, fallbacks)
    uses activity-based selection - ranks by user's actual usage
    """
    from introd.groq_draft import determine_contact_method as activity_based_contact
-    method, info = activity_based_contact(human)
+    method, info, fallbacks = activity_based_contact(human)
    # convert github_issue info to dict format for delivery
-    if method == 'github_issue' and isinstance(info, str) and '/' in info:
+    def format_info(m, i):
-        parts = info.split('/', 1)
+        if m == 'github_issue' and isinstance(i, str) and '/' in i:
-        return method, {'owner': parts[0], 'repo': parts[1]}
+            parts = i.split('/', 1)
            return {'owner': parts[0], 'repo': parts[1]}
        return i
-    return method, info
+    info = format_info(method, info)
    fallbacks = [(m, format_info(m, i)) for m, i in fallbacks]
    return method, info, fallbacks
 def deliver_intro(match_data, intro_draft, dry_run=False):
@ -362,8 +368,8 @@ def deliver_intro(match_data, intro_draft, dry_run=False):
    if already_contacted(recipient_id):
        return False, "already contacted", None
-    # determine contact method
+    # determine contact method with fallbacks
-    method, contact_info = determine_best_contact(recipient)
+    method, contact_info, fallbacks = determine_best_contact(recipient)
    log = load_delivery_log()
    result = {
@ -423,9 +429,60 @@ def deliver_intro(match_data, intro_draft, dry_run=False):
        success = True
        error = "added to manual queue"
    # if failed and we have fallbacks, try them
    if not success and fallbacks:
        for fallback_method, fallback_info in fallbacks:
            result['fallback_attempts'] = result.get('fallback_attempts', [])
            result['fallback_attempts'].append({
                'method': fallback_method,
                'contact_info': fallback_info
            })
            fb_success = False
            fb_error = None
            if fallback_method == 'email':
                subject = f"someone you might want to know - connectd"
                fb_success, fb_error = send_email(fallback_info, subject, intro_draft, dry_run)
            elif fallback_method == 'mastodon':
                fb_success, fb_error = send_mastodon_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'bluesky':
                fb_success, fb_error = send_bluesky_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'matrix':
                fb_success, fb_error = send_matrix_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'lemmy':
                from scoutd.lemmy import send_lemmy_dm
                fb_success, fb_error = send_lemmy_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'discord':
                from scoutd.discord import send_discord_dm
                fb_success, fb_error = send_discord_dm(fallback_info, intro_draft, dry_run)
            elif fallback_method == 'github_issue':
                owner = fallback_info.get('owner')
                repo = fallback_info.get('repo')
                title = "community introduction from connectd"
                github_body = f"""hey {recipient.get('name') or recipient.get('username')},
 {intro_draft}
 ---
 *automated introduction from connectd*
 """
                fb_success, fb_error = create_github_issue(owner, repo, title, github_body, dry_run)
            if fb_success:
                success = True
                method = fallback_method
                contact_info = fallback_info
                error = None
                result['fallback_succeeded'] = fallback_method
                break
            else:
                result['fallback_attempts'][-1]['error'] = fb_error
    # log result
    result['success'] = success
    result['error'] = error
    result['final_method'] = method
    if success:
        log['sent'].append(result)
--- a/introd/groq_draft.py
+++ b/introd/groq_draft.py
@ -104,6 +104,54 @@ def draft_intro_with_llm(match_data: dict, recipient: str = 'a', dry_run: bool =
        about_name = about_person.get('username', 'someone')
        about_bio = about_person.get('extra', {}).get('bio', '')
        # extract contact info for about_person
        about_extra = about_person.get('extra', {})
        if isinstance(about_extra, str):
            import json as _json
            about_extra = _json.loads(about_extra) if about_extra else {}
        about_contact = about_person.get('contact', {})
        if isinstance(about_contact, str):
            about_contact = _json.loads(about_contact) if about_contact else {}
        # build contact link for about_person
        about_platform = about_person.get('platform', '')
        about_username = about_person.get('username', '')
        contact_link = None
        if about_platform == 'mastodon' and about_username:
            if '@' in about_username:
                parts = about_username.split('@')
                if len(parts) >= 2:
                    contact_link = f"https://{parts[1]}/@{parts[0]}"
        elif about_platform == 'github' and about_username:
            contact_link = f"https://github.com/{about_username}"
        elif about_extra.get('mastodon') or about_contact.get('mastodon'):
            handle = about_extra.get('mastodon') or about_contact.get('mastodon')
            if '@' in handle:
                parts = handle.lstrip('@').split('@')
                if len(parts) >= 2:
                    contact_link = f"https://{parts[1]}/@{parts[0]}"
        elif about_extra.get('github') or about_contact.get('github'):
            contact_link = f"https://github.com/{about_extra.get('github') or about_contact.get('github')}"
        elif about_extra.get('email'):
            contact_link = about_extra['email']
        elif about_contact.get('email'):
            contact_link = about_contact['email']
        elif about_extra.get('website'):
            contact_link = about_extra['website']
        elif about_extra.get('external_links', {}).get('website'):
            contact_link = about_extra['external_links']['website']
        elif about_extra.get('extra', {}).get('website'):
            contact_link = about_extra['extra']['website']
        elif about_platform == 'reddit' and about_username:
            contact_link = f"reddit.com/u/{about_username}"
        if not contact_link:
            contact_link = f"github.com/{about_username}" if about_username else "reach out via connectd"
        # skip if no real contact method (just reddit or generic)
        if contact_link.startswith('reddit.com') or contact_link == "reach out via connectd" or 'stackblitz' in contact_link:
            return None, f"no real contact info for {about_name} - skipping draft"
        # format the shared factors naturally
        if reasons:
            factor = ', '.join(reasons[:3]) if len(reasons) > 1 else reasons[0]
@ -131,24 +179,20 @@ key facts for this message:
 - introducing them to: {about_name}
 - their shared interests/values: {factor}
 - about {about_name}: {about_bio if about_bio else 'a builder like you'}
 - HOW TO REACH {about_name}: {contact_link}
-CRITICAL RULES - DO NOT SANITIZE:
+RULES:
-1. use their name ({to_name}) throughout - make it personal
+1. say their name ONCE at start, then use "you" 
-2. format the shared factor naturally like "your shared interest in X and Y" or "your work on X"
+2. MUST include how to reach {about_name}: {contact_link}
-3. this message is TO {to_name} ABOUT {about_name} - don't confuse this
+3. lowercase, raw, emotional - follow the soul
-4. keep it lowercase, raw, emotional, real
+4. end with the contact link
 5. no corporate speak, no "best regards", no "hi there"
 6. DO NOT water down the message - keep the raw emotional energy
 7. address them directly, call them to action
 8. make them feel seen and not alone
 9. end with rallying energy about building together
 return ONLY the message body. signature is added separately."""
        response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}],
-            temperature=0.8,
+            temperature=0.6,
            max_tokens=1200
        )
@ -220,3 +264,136 @@ if __name__ == "__main__":
    else:
        print(f"subject: {result['subject']}")
        print(f"\nbody:\n{result['draft_plain']}")
 # contact method ranking - USAGE BASED
 # we rank by where the person is MOST ACTIVE, not by our preference
 def determine_contact_method(human):
    """
    determine ALL available contact methods, ranked by USER'S ACTIVITY.
    looks at activity metrics to decide where they're most engaged.
    returns: (best_method, best_info, fallbacks)
    where fallbacks is a list of (method, info) tuples in activity order
    """
    import json
    extra = human.get('extra', {})
    contact = human.get('contact', {})
    if isinstance(extra, str):
        extra = json.loads(extra) if extra else {}
    if isinstance(contact, str):
        contact = json.loads(contact) if contact else {}
    nested_extra = extra.get('extra', {})
    platform = human.get('platform', '')
    available = []
    # === ACTIVITY SCORING ===
    # each method gets scored by how active the user is there
    # EMAIL - always medium priority (we cant measure activity)
    email = extra.get('email') or contact.get('email') or nested_extra.get('email')
    if email and '@' in str(email):
        available.append(('email', email, 50))  # baseline score
    # MASTODON - score by post count / followers
    mastodon = extra.get('mastodon') or contact.get('mastodon') or nested_extra.get('mastodon')
    if mastodon:
        masto_activity = extra.get('mastodon_posts', 0) or extra.get('statuses_count', 0)
        masto_score = min(100, 30 + (masto_activity // 10))  # 30 base + 1 per 10 posts
        available.append(('mastodon', mastodon, masto_score))
    # if they CAME FROM mastodon, thats their primary
    if platform == 'mastodon':
        handle = f"@{human.get('username')}"
        instance = human.get('instance') or extra.get('instance') or ''
        if instance:
            handle = f"@{human.get('username')}@{instance}"
        activity = extra.get('statuses_count', 0) or extra.get('activity_count', 0)
        score = min(100, 50 + (activity // 5))  # higher base since its their home
        # dont dupe
        if not any(a[0] == 'mastodon' for a in available):
            available.append(('mastodon', handle, score))
        else:
            # update score if this is higher
            for i, (m, info, s) in enumerate(available):
                if m == 'mastodon' and score > s:
                    available[i] = ('mastodon', handle, score)
    # MATRIX - score by presence (binary for now)
    matrix = extra.get('matrix') or contact.get('matrix') or nested_extra.get('matrix')
    if matrix and ':' in str(matrix):
        available.append(('matrix', matrix, 40))
    # BLUESKY - score by followers/posts if available
    bluesky = extra.get('bluesky') or contact.get('bluesky') or nested_extra.get('bluesky')
    if bluesky:
        bsky_activity = extra.get('bluesky_posts', 0)
        bsky_score = min(100, 25 + (bsky_activity // 10))
        available.append(('bluesky', bluesky, bsky_score))
    # LEMMY - score by activity
    lemmy = extra.get('lemmy') or contact.get('lemmy') or nested_extra.get('lemmy')
    if lemmy:
        lemmy_activity = extra.get('lemmy_posts', 0) or extra.get('lemmy_comments', 0)
        lemmy_score = min(100, 30 + lemmy_activity)
        available.append(('lemmy', lemmy, lemmy_score))
    if platform == 'lemmy':
        handle = human.get('username')
        activity = extra.get('activity_count', 0)
        score = min(100, 50 + activity)
        if not any(a[0] == 'lemmy' for a in available):
            available.append(('lemmy', handle, score))
    # DISCORD - lower priority (hard to DM)
    discord = extra.get('discord') or contact.get('discord') or nested_extra.get('discord')
    if discord:
        available.append(('discord', discord, 20))
    # GITHUB ISSUE - for github users, score by repo activity
    if platform == 'github':
        top_repos = extra.get('top_repos', [])
        if top_repos:
            repo = top_repos[0] if isinstance(top_repos[0], str) else top_repos[0].get('name', '')
            stars = extra.get('total_stars', 0)
            repos_count = extra.get('repos_count', 0)
            # active github user = higher issue score
            gh_score = min(60, 20 + (stars // 100) + (repos_count // 5))
            if repo:
                available.append(('github_issue', f"{human.get('username')}/{repo}", gh_score))
    # REDDIT - discovered people, use their other links
    if platform == 'reddit':
        reddit_activity = extra.get('reddit_activity', 0) or extra.get('activity_count', 0)
        # reddit users we reach via their external links (email, mastodon, etc)
        # boost their other methods if reddit is their main platform
        for i, (m, info, score) in enumerate(available):
            if m in ('email', 'mastodon', 'matrix', 'bluesky'):
                # boost score for reddit-discovered users' external contacts
                boost = min(30, reddit_activity // 3)
                available[i] = (m, info, score + boost)
    # sort by activity score (highest first)
    available.sort(key=lambda x: x[2], reverse=True)
    if not available:
        return 'manual', None, []
    best = available[0]
    fallbacks = [(m, i) for m, i, p in available[1:]]
    return best[0], best[1], fallbacks
 def get_ranked_contact_methods(human):
    """
    get all contact methods for a human, ranked by their activity.
    """
    method, info, fallbacks = determine_contact_method(human)
    if method == 'manual':
        return []
    return [(method, info)] + fallbacks
--- a/matchd/overlap.py
+++ b/matchd/overlap.py
@ -1,15 +1,20 @@
 """
 matchd/overlap.py - find pairs with alignment
 CRITICAL: blocks users with disqualifying negative signals (maga, conspiracy, conservative)
 """
 import json
 from .fingerprint import fingerprint_similarity
 # signals that HARD BLOCK matching - no exceptions
 DISQUALIFYING_SIGNALS = {'maga', 'conspiracy', 'conservative', 'antivax', 'sovcit'}
 def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
    """
    analyze overlap between two humans
-    returns overlap details: score, shared values, complementary skills
+    returns None if either has disqualifying signals
    """
    # parse stored json if needed
    signals_a = human_a.get('signals', [])
@ -20,13 +25,49 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
    if isinstance(signals_b, str):
        signals_b = json.loads(signals_b)
    # === HARD BLOCK: check for disqualifying negative signals ===
    neg_a = human_a.get('negative_signals', [])
    if isinstance(neg_a, str):
        neg_a = json.loads(neg_a) if neg_a else []
    neg_b = human_b.get('negative_signals', [])
    if isinstance(neg_b, str):
        neg_b = json.loads(neg_b) if neg_b else []
    # also check 'reasons' field for WARNING entries
    reasons_a = human_a.get('reasons', '')
    if isinstance(reasons_a, str) and 'WARNING' in reasons_a:
        # extract signals from WARNING: x, y, z
        import re
        warn_match = re.search(r'WARNING[:\s]+([^"\]]+)', reasons_a)
        if warn_match:
            warn_signals = [s.strip().lower() for s in warn_match.group(1).split(',')]
            neg_a = list(set(neg_a + warn_signals))
    reasons_b = human_b.get('reasons', '')
    if isinstance(reasons_b, str) and 'WARNING' in reasons_b:
        import re
        warn_match = re.search(r'WARNING[:\s]+([^"\]]+)', reasons_b)
        if warn_match:
            warn_signals = [s.strip().lower() for s in warn_match.group(1).split(',')]
            neg_b = list(set(neg_b + warn_signals))
    # block if either has disqualifying signals
    disq_a = set(neg_a) & DISQUALIFYING_SIGNALS
    disq_b = set(neg_b) & DISQUALIFYING_SIGNALS
    if disq_a:
        return None  # blocked
    if disq_b:
        return None  # blocked
    extra_a = human_a.get('extra', {})
    if isinstance(extra_a, str):
-        extra_a = json.loads(extra_a)
+        extra_a = json.loads(extra_a) if extra_a else {}
    extra_b = human_b.get('extra', {})
    if isinstance(extra_b, str):
-        extra_b = json.loads(extra_b)
+        extra_b = json.loads(extra_b) if extra_b else {}
    # shared signals
    shared_signals = list(set(signals_a) & set(signals_b))
@ -36,7 +77,7 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
    topics_b = set(extra_b.get('topics', []))
    shared_topics = list(topics_a & topics_b)
-    # complementary skills (what one has that the other doesn't)
+    # complementary skills
    langs_a = set(extra_a.get('languages', {}).keys())
    langs_b = set(extra_b.get('languages', {}).keys())
    complementary_langs = list((langs_a - langs_b) | (langs_b - langs_a))
@ -68,38 +109,30 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
    # calculate overlap score
    base_score = 0
    # shared values (most important)
    base_score += len(shared_signals) * 10
    # shared interests
    base_score += len(shared_topics) * 5
    # complementary skills bonus (they can help each other)
    if complementary_langs:
        base_score += min(len(complementary_langs), 5) * 3
    # geographic bonus
    if geographic_match:
        base_score += 20
    # fingerprint similarity if available
    fp_score = 0
    if fp_a and fp_b:
        fp_score = fingerprint_similarity(fp_a, fp_b) * 50
    total_score = base_score + fp_score
    # build reasons
    overlap_reasons = []
    if shared_signals:
-        overlap_reasons.append(f"shared values: {', '.join(shared_signals[:5])}")
+        overlap_reasons.append(f"shared: {', '.join(shared_signals[:5])}")
    if shared_topics:
-        overlap_reasons.append(f"shared interests: {', '.join(shared_topics[:5])}")
+        overlap_reasons.append(f"interests: {', '.join(shared_topics[:5])}")
    if geo_reason:
        overlap_reasons.append(geo_reason)
    if complementary_langs:
-        overlap_reasons.append(f"complementary skills: {', '.join(complementary_langs[:5])}")
+        overlap_reasons.append(f"complementary: {', '.join(complementary_langs[:5])}")
    return {
        'overlap_score': total_score,
@ -114,36 +147,28 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
 def is_same_person(human_a, human_b):
-    """
+    """check if two records might be the same person (cross-platform)"""
    check if two records might be the same person (cross-platform)
    """
    # same platform = definitely different records
    if human_a['platform'] == human_b['platform']:
        return False
    # check username similarity
    user_a = human_a.get('username', '').lower().split('@')[0]
    user_b = human_b.get('username', '').lower().split('@')[0]
    if user_a == user_b:
        return True
    # check if github username matches
    contact_a = human_a.get('contact', {})
    contact_b = human_b.get('contact', {})
    if isinstance(contact_a, str):
-        contact_a = json.loads(contact_a)
+        contact_a = json.loads(contact_a) if contact_a else {}
    if isinstance(contact_b, str):
-        contact_b = json.loads(contact_b)
+        contact_b = json.loads(contact_b) if contact_b else {}
    # github cross-reference
    if contact_a.get('github') and contact_a.get('github') == contact_b.get('github'):
        return True
    if contact_a.get('github') == user_b or contact_b.get('github') == user_a:
        return True
    # email cross-reference
    if contact_a.get('email') and contact_a.get('email') == contact_b.get('email'):
        return True
--- a/scoutd/forges.py
+++ b/scoutd/forges.py
@ -0,0 +1,491 @@
 """
 scoutd/forges.py - scrape self-hosted git forges
 these people = highest signal. they actually selfhost.
 supported platforms:
 - gitea (and forks like forgejo)
 - gogs
 - gitlab ce
 - sourcehut
 - codeberg (gitea-based)
 scrapes users AND extracts contact info for outreach.
 """
 import os
 import re
 import json
 import time
 import requests
 from typing import List, Dict, Optional, Tuple
 from datetime import datetime
 from .signals import analyze_text
 # rate limiting
 REQUEST_DELAY = 1.0
 # known public instances to scrape
 # format: (name, url, platform_type)
 KNOWN_INSTANCES = [
    # === PUBLIC INSTANCES ===
    # local/private instances can be added via LOCAL_FORGE_INSTANCES env var
    # codeberg (largest gitea instance)
    ('codeberg', 'https://codeberg.org', 'gitea'),
    # sourcehut
    ('sourcehut', 'https://sr.ht', 'sourcehut'),
    # notable gitea/forgejo instances
    ('gitea.com', 'https://gitea.com', 'gitea'),
    ('git.disroot.org', 'https://git.disroot.org', 'gitea'),
    ('git.gay', 'https://git.gay', 'forgejo'),
    ('git.envs.net', 'https://git.envs.net', 'forgejo'),
    ('tildegit', 'https://tildegit.org', 'gitea'),
    ('git.sr.ht', 'https://git.sr.ht', 'sourcehut'),
    # gitlab ce instances
    ('framagit', 'https://framagit.org', 'gitlab'),
    ('gitlab.gnome.org', 'https://gitlab.gnome.org', 'gitlab'),
    ('invent.kde.org', 'https://invent.kde.org', 'gitlab'),
    ('salsa.debian.org', 'https://salsa.debian.org', 'gitlab'),
 ]
 # headers
 HEADERS = {
    'User-Agent': 'connectd/1.0 (finding builders with aligned values)',
    'Accept': 'application/json',
 }
 def log(msg):
    print(f"  forges: {msg}")
 # === GITEA/FORGEJO/GOGS API ===
 # these share the same API structure
 def scrape_gitea_users(instance_url: str, limit: int = 100) -> List[Dict]:
    """
    scrape users from a gitea/forgejo/gogs instance.
    uses the explore/users page or API if available.
    """
    users = []
    # try API first (gitea 1.x+)
    try:
        api_url = f"{instance_url}/api/v1/users/search"
        params = {'q': '', 'limit': min(limit, 50)}
        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=15)
        if resp.status_code == 200:
            data = resp.json()
            user_list = data.get('data', []) or data.get('users', []) or data
            if isinstance(user_list, list):
                for u in user_list[:limit]:
                    users.append({
                        'username': u.get('login') or u.get('username'),
                        'full_name': u.get('full_name'),
                        'avatar': u.get('avatar_url'),
                        'website': u.get('website'),
                        'location': u.get('location'),
                        'bio': u.get('description') or u.get('bio'),
                    })
                log(f"  got {len(users)} users via API")
    except Exception as e:
        log(f"  API failed: {e}")
    # fallback: scrape explore page
    if not users:
        try:
            explore_url = f"{instance_url}/explore/users"
            resp = requests.get(explore_url, headers=HEADERS, timeout=15)
            if resp.status_code == 200:
                # parse HTML for usernames
                usernames = re.findall(r'href="/([^/"]+)"[^>]*class="[^"]*user[^"]*"', resp.text)
                usernames += re.findall(r'<a[^>]+href="/([^/"]+)"[^>]*title="[^"]*"', resp.text)
                usernames = list(set(usernames))[:limit]
                for username in usernames:
                    if username and not username.startswith(('explore', 'api', 'user', 'repo')):
                        users.append({'username': username})
                log(f"  got {len(users)} users via scrape")
        except Exception as e:
            log(f"  scrape failed: {e}")
    return users
 def get_gitea_user_details(instance_url: str, username: str) -> Optional[Dict]:
    """get detailed user info from gitea/forgejo/gogs"""
    try:
        # API endpoint
        api_url = f"{instance_url}/api/v1/users/{username}"
        resp = requests.get(api_url, headers=HEADERS, timeout=10)
        if resp.status_code == 200:
            u = resp.json()
            return {
                'username': u.get('login') or u.get('username'),
                'full_name': u.get('full_name'),
                'email': u.get('email'),  # may be hidden
                'website': u.get('website'),
                'location': u.get('location'),
                'bio': u.get('description') or u.get('bio'),
                'created': u.get('created'),
                'followers': u.get('followers_count', 0),
                'following': u.get('following_count', 0),
            }
    except:
        pass
    return None
 def get_gitea_user_repos(instance_url: str, username: str, limit: int = 10) -> List[Dict]:
    """get user's repos from gitea/forgejo/gogs"""
    repos = []
    try:
        api_url = f"{instance_url}/api/v1/users/{username}/repos"
        resp = requests.get(api_url, headers=HEADERS, timeout=10)
        if resp.status_code == 200:
            for r in resp.json()[:limit]:
                repos.append({
                    'name': r.get('name'),
                    'full_name': r.get('full_name'),
                    'description': r.get('description'),
                    'stars': r.get('stars_count', 0),
                    'forks': r.get('forks_count', 0),
                    'language': r.get('language'),
                    'updated': r.get('updated_at'),
                })
    except:
        pass
    return repos
 # === GITLAB CE API ===
 def scrape_gitlab_users(instance_url: str, limit: int = 100) -> List[Dict]:
    """scrape users from a gitlab ce instance"""
    users = []
    try:
        # gitlab API - public users endpoint
        api_url = f"{instance_url}/api/v4/users"
        params = {'per_page': min(limit, 100), 'active': True}
        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=15)
        if resp.status_code == 200:
            for u in resp.json()[:limit]:
                users.append({
                    'username': u.get('username'),
                    'full_name': u.get('name'),
                    'avatar': u.get('avatar_url'),
                    'website': u.get('website_url'),
                    'location': u.get('location'),
                    'bio': u.get('bio'),
                    'public_email': u.get('public_email'),
                })
            log(f"  got {len(users)} gitlab users")
    except Exception as e:
        log(f"  gitlab API failed: {e}")
    return users
 def get_gitlab_user_details(instance_url: str, username: str) -> Optional[Dict]:
    """get detailed gitlab user info"""
    try:
        api_url = f"{instance_url}/api/v4/users"
        params = {'username': username}
        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=10)
        if resp.status_code == 200:
            users = resp.json()
            if users:
                u = users[0]
                return {
                    'username': u.get('username'),
                    'full_name': u.get('name'),
                    'email': u.get('public_email'),
                    'website': u.get('website_url'),
                    'location': u.get('location'),
                    'bio': u.get('bio'),
                    'created': u.get('created_at'),
                }
    except:
        pass
    return None
 def get_gitlab_user_projects(instance_url: str, username: str, limit: int = 10) -> List[Dict]:
    """get user's projects from gitlab"""
    repos = []
    try:
        # first get user id
        api_url = f"{instance_url}/api/v4/users"
        params = {'username': username}
        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=10)
        if resp.status_code == 200 and resp.json():
            user_id = resp.json()[0].get('id')
            # get projects
            proj_url = f"{instance_url}/api/v4/users/{user_id}/projects"
            resp = requests.get(proj_url, headers=HEADERS, timeout=10)
            if resp.status_code == 200:
                for p in resp.json()[:limit]:
                    repos.append({
                        'name': p.get('name'),
                        'full_name': p.get('path_with_namespace'),
                        'description': p.get('description'),
                        'stars': p.get('star_count', 0),
                        'forks': p.get('forks_count', 0),
                        'updated': p.get('last_activity_at'),
                    })
    except:
        pass
    return repos
 # === SOURCEHUT API ===
 def scrape_sourcehut_users(limit: int = 100) -> List[Dict]:
    """
    scrape users from sourcehut.
    sourcehut doesn't have a public user list, so we scrape from:
    - recent commits
    - mailing lists
    - project pages
    """
    users = []
    seen = set()
    try:
        # scrape from git.sr.ht explore
        resp = requests.get('https://git.sr.ht/projects', headers=HEADERS, timeout=15)
        if resp.status_code == 200:
            # extract usernames from repo paths like ~username/repo
            usernames = re.findall(r'href="/~([^/"]+)', resp.text)
            for username in usernames:
                if username not in seen:
                    seen.add(username)
                    users.append({'username': username})
                    if len(users) >= limit:
                        break
        log(f"  got {len(users)} sourcehut users")
    except Exception as e:
        log(f"  sourcehut scrape failed: {e}")
    return users
 def get_sourcehut_user_details(username: str) -> Optional[Dict]:
    """get sourcehut user details"""
    try:
        # scrape profile page
        profile_url = f"https://sr.ht/~{username}"
        resp = requests.get(profile_url, headers=HEADERS, timeout=10)
        if resp.status_code == 200:
            bio = ''
            # extract bio from page
            bio_match = re.search(r'<div class="container">\s*<p>([^<]+)</p>', resp.text)
            if bio_match:
                bio = bio_match.group(1).strip()
            return {
                'username': username,
                'bio': bio,
                'profile_url': profile_url,
            }
    except:
        pass
    return None
 def get_sourcehut_user_repos(username: str, limit: int = 10) -> List[Dict]:
    """get sourcehut user's repos"""
    repos = []
    try:
        git_url = f"https://git.sr.ht/~{username}"
        resp = requests.get(git_url, headers=HEADERS, timeout=10)
        if resp.status_code == 200:
            # extract repo names
            repo_matches = re.findall(rf'href="/~{username}/([^"]+)"', resp.text)
            for repo in repo_matches[:limit]:
                if repo and not repo.startswith(('refs', 'log', 'tree')):
                    repos.append({
                        'name': repo,
                        'full_name': f"~{username}/{repo}",
                    })
    except:
        pass
    return repos
 # === UNIFIED SCRAPER ===
 def scrape_forge(instance_name: str, instance_url: str, platform_type: str, limit: int = 50) -> List[Dict]:
    """
    scrape users from any forge type.
    returns list of human dicts ready for database.
    """
    log(f"scraping {instance_name} ({platform_type})...")
    humans = []
    # get user list based on platform type
    if platform_type in ('gitea', 'forgejo', 'gogs'):
        users = scrape_gitea_users(instance_url, limit)
        get_details = lambda u: get_gitea_user_details(instance_url, u)
        get_repos = lambda u: get_gitea_user_repos(instance_url, u)
    elif platform_type == 'gitlab':
        users = scrape_gitlab_users(instance_url, limit)
        get_details = lambda u: get_gitlab_user_details(instance_url, u)
        get_repos = lambda u: get_gitlab_user_projects(instance_url, u)
    elif platform_type == 'sourcehut':
        users = scrape_sourcehut_users(limit)
        get_details = get_sourcehut_user_details
        get_repos = get_sourcehut_user_repos
    else:
        log(f"  unknown platform type: {platform_type}")
        return []
    for user in users:
        username = user.get('username')
        if not username:
            continue
        time.sleep(REQUEST_DELAY)
        # get detailed info
        details = get_details(username)
        if details:
            user.update(details)
        # get repos
        repos = get_repos(username)
        # build human record
        bio = user.get('bio', '') or ''
        website = user.get('website', '') or ''
        # analyze signals from bio
        score, signals, reasons = analyze_text(bio + ' ' + website)
        # BOOST: self-hosted git = highest signal
        score += 25
        signals.append('selfhosted_git')
        reasons.append(f'uses self-hosted git ({instance_name})')
        # extract contact info
        contact = {}
        email = user.get('email') or user.get('public_email')
        if email and '@' in email:
            contact['email'] = email
        if website:
            contact['website'] = website
        # build human dict
        human = {
            'platform': f'{platform_type}:{instance_name}',
            'username': username,
            'name': user.get('full_name'),
            'bio': bio,
            'url': f"{instance_url}/{username}" if platform_type != 'sourcehut' else f"https://sr.ht/~{username}",
            'score': score,
            'signals': json.dumps(signals),
            'reasons': json.dumps(reasons),
            'contact': json.dumps(contact),
            'extra': json.dumps({
                'instance': instance_name,
                'instance_url': instance_url,
                'platform_type': platform_type,
                'repos': repos[:5],
                'followers': user.get('followers', 0),
                'email': email,
                'website': website,
            }),
            'user_type': 'builder' if repos else 'none',
        }
        humans.append(human)
        log(f"    {username}: score={score}, repos={len(repos)}")
    return humans
 def scrape_all_forges(limit_per_instance: int = 30) -> List[Dict]:
    """scrape all known forge instances"""
    all_humans = []
    for instance_name, instance_url, platform_type in KNOWN_INSTANCES:
        try:
            humans = scrape_forge(instance_name, instance_url, platform_type, limit_per_instance)
            all_humans.extend(humans)
            log(f"  {instance_name}: {len(humans)} humans")
        except Exception as e:
            log(f"  {instance_name} failed: {e}")
        time.sleep(2)  # be nice between instances
    log(f"total: {len(all_humans)} humans from {len(KNOWN_INSTANCES)} forges")
    return all_humans
 # === OUTREACH METHODS ===
 def can_message_on_forge(instance_url: str, platform_type: str) -> bool:
    """check if we can send messages on this forge"""
    # gitea/forgejo don't have DMs
    # gitlab has merge request comments
    # sourcehut has mailing lists
    return platform_type in ('gitlab', 'sourcehut')
 def open_forge_issue(instance_url: str, platform_type: str,
                     owner: str, repo: str, title: str, body: str) -> Tuple[bool, str]:
    """
    open an issue on a forge as outreach method.
    requires API token for authenticated requests.
    """
    # would need tokens per instance - for now return False
    # this is a fallback method, email is preferred
    return False, "forge issue creation not implemented yet"
 # === DISCOVERY ===
 def discover_forge_instances() -> List[Tuple[str, str, str]]:
    """
    discover new forge instances from:
    - fediverse (they often announce)
    - known lists
    - DNS patterns
    returns list of (name, url, platform_type)
    """
    # start with known instances
    instances = list(KNOWN_INSTANCES)
    # could add discovery logic here:
    # - scrape https://codeberg.org/forgejo/forgejo/issues for instance mentions
    # - check fediverse for git.* domains
    # - crawl gitea/forgejo awesome lists
    return instances
 if __name__ == '__main__':
    # test
    print("testing forge scrapers...")
    # test codeberg
    humans = scrape_forge('codeberg', 'https://codeberg.org', 'gitea', limit=5)
    print(f"codeberg: {len(humans)} humans")
    for h in humans[:2]:
        print(f"  {h['username']}: {h['score']} - {h.get('signals')}")
--- a/scoutd/handles.py
+++ b/scoutd/handles.py
@ -103,6 +103,15 @@ PLATFORM_PATTERNS = {
    'devto': [
        (r'https?://dev\.to/([^/?#]+)', lambda m: m.group(1)),
    ],
    # reddit/lobsters
    'reddit': [
        (r'https?://(?:www\.)?reddit\.com/u(?:ser)?/([^/?#]+)', lambda m: f"u/{m.group(1)}"),
        (r'https?://(?:old|new)\.reddit\.com/u(?:ser)?/([^/?#]+)', lambda m: f"u/{m.group(1)}"),
    ],
    'lobsters': [
        (r'https?://lobste\.rs/u/([^/?#]+)', lambda m: m.group(1)),
    ],
    # funding
    'kofi': [
--- a/scoutd/reddit.py
+++ b/scoutd/reddit.py
@ -1,24 +1,14 @@
 """
-scoutd/reddit.py - reddit discovery (DISCOVERY ONLY, NOT OUTREACH)
+scoutd/reddit.py - reddit discovery with TAVILY web search
-reddit is a SIGNAL SOURCE, not a contact channel.
+CRITICAL: always quote usernames in tavily searches to avoid fuzzy matching
 flow:
 1. scrape reddit for users active in target subs
 2. extract their reddit profile
 3. look for links TO other platforms (github, mastodon, website, etc.)
 4. add to scout database with reddit as signal source
 5. reach out via their OTHER platforms, never reddit
 if reddit user has no external links:
   - add to manual_queue with note "reddit-only, needs manual review"
 also detects lost builders - stuck in learnprogramming for years, imposter syndrome, etc.
 """
 import requests
 import json
 import time
 import re
 import os
 from datetime import datetime
 from pathlib import Path
 from collections import defaultdict
@ -35,43 +25,14 @@ from .lost import (
 HEADERS = {'User-Agent': 'connectd:v1.0 (community discovery)'}
 CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'reddit'
-# patterns for extracting external platform links
+GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
-PLATFORM_PATTERNS = {
+TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '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')
    'github': [
        r'github\.com/([a-zA-Z0-9_-]+)',
        r'gh:\s*@?([a-zA-Z0-9_-]+)',
    ],
    'mastodon': [
        r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
        r'mastodon\.social/@([a-zA-Z0-9_]+)',
        r'fosstodon\.org/@([a-zA-Z0-9_]+)',
        r'hachyderm\.io/@([a-zA-Z0-9_]+)',
        r'tech\.lgbt/@([a-zA-Z0-9_]+)',
    ],
    'twitter': [
        r'twitter\.com/([a-zA-Z0-9_]+)',
        r'x\.com/([a-zA-Z0-9_]+)',
        r'(?:^|\s)@([a-zA-Z0-9_]{1,15})(?:\s|$)',  # bare @handle
    ],
    'bluesky': [
        r'bsky\.app/profile/([a-zA-Z0-9_.-]+)',
        r'([a-zA-Z0-9_-]+)\.bsky\.social',
    ],
    'website': [
        r'https?://([a-zA-Z0-9_-]+\.[a-zA-Z]{2,}[a-zA-Z0-9./_-]*)',
    ],
    'matrix': [
        r'@([a-zA-Z0-9_-]+):([a-zA-Z0-9.-]+)',
    ],
 }
-def _api_get(url, params=None):
+def _api_get(url, params=None, headers=None):
    """rate-limited request"""
    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    if cache_file.exists():
        try:
            data = json.loads(cache_file.read_text())
@ -79,142 +40,263 @@ def _api_get(url, params=None):
                return data.get('_data')
        except:
            pass
-
+    time.sleep(1)
-    time.sleep(2)  # reddit rate limit
+    req_headers = {**HEADERS, **(headers or {})}
    try:
-        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp = requests.get(url, headers=req_headers, params=params, timeout=30)
        resp.raise_for_status()
        result = resp.json()
        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
        return result
-    except requests.exceptions.RequestException as e:
+    except:
        print(f"  reddit api error: {e}")
        return None
-def extract_external_links(text):
+def tavily_search(query, max_results=10):
-    """extract links to other platforms from text"""
+    if not TAVILY_API_KEY:
-    links = {}
+        return []
    try:
        resp = requests.post(
            'https://api.tavily.com/search',
            json={'api_key': TAVILY_API_KEY, 'query': query, 'max_results': max_results},
            timeout=30
        )
        if resp.status_code == 200:
            return resp.json().get('results', [])
    except Exception as e:
        print(f"      tavily error: {e}")
    return []
 def extract_links_from_text(text, username=None):
    found = {}
    if not text:
-        return links
+        return found
    text_lower = text.lower()
    username_lower = username.lower() if username else None
-    for platform, patterns in PLATFORM_PATTERNS.items():
+    # email
-        for pattern in patterns:
+    for email in re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', text):
-            matches = re.findall(pattern, text, re.IGNORECASE)
+        if any(x in email.lower() for x in ['noreply', 'example', '@reddit', 'info@', 'support@', 'contact@', 'admin@']):
-            if matches:
+            continue
-                if platform == 'mastodon' and isinstance(matches[0], tuple):
+        if username_lower and username_lower in email.lower():
-                    # full fediverse handle
+            found['email'] = email
                    links[platform] = f"@{matches[0][0]}@{matches[0][1]}"
                elif platform == 'matrix' and isinstance(matches[0], tuple):
                    links[platform] = f"@{matches[0][0]}:{matches[0][1]}"
                elif platform == 'website':
                    # skip reddit/imgur/etc
                    for match in matches:
                        if not any(x in match.lower() for x in ['reddit', 'imgur', 'redd.it', 'i.redd']):
                            links[platform] = f"https://{match}"
            break
-                else:
+        if 'email' not in found:
-                    links[platform] = matches[0]
+            found['email'] = email
    # github
    for gh in re.findall(r'github\.com/([a-zA-Z0-9_-]+)', text):
        if gh.lower() in ['topics', 'explore', 'trending', 'sponsors', 'orgs']:
            continue
        if username_lower and gh.lower() == username_lower:
            found['github'] = gh
            break
-    return links
+    # mastodon
    masto = re.search(r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text)
    if masto:
        found['mastodon'] = f"@{masto.group(1)}@{masto.group(2)}"
    for inst in ['mastodon.social', 'fosstodon.org', 'hachyderm.io', 'tech.lgbt']:
        m = re.search(f'{inst}/@([a-zA-Z0-9_]+)', text)
        if m:
            found['mastodon'] = f"@{m.group(1)}@{inst}"
            break
    # bluesky
    bsky = re.search(r'bsky\.app/profile/([a-zA-Z0-9_.-]+)', text)
    if bsky:
        found['bluesky'] = bsky.group(1)
    # twitter
    tw = re.search(r'(?:twitter|x)\.com/([a-zA-Z0-9_]+)', text)
    if tw and tw.group(1).lower() not in ['home', 'explore', 'search']:
        found['twitter'] = tw.group(1)
    # linkedin
    li = re.search(r'linkedin\.com/in/([a-zA-Z0-9_-]+)', text)
    if li:
        found['linkedin'] = f"https://linkedin.com/in/{li.group(1)}"
    # twitch
    twitch = re.search(r'twitch\.tv/([a-zA-Z0-9_]+)', text)
    if twitch:
        found['twitch'] = f"https://twitch.tv/{twitch.group(1)}"
    # itch.io
    itch = re.search(r'itch\.io/profile/([a-zA-Z0-9_-]+)', text)
    if itch:
        found['itch'] = f"https://itch.io/profile/{itch.group(1)}"
    # website
    for url in re.findall(r'https?://([a-zA-Z0-9_-]+\.[a-zA-Z]{2,}[a-zA-Z0-9./_-]*)', text):
        skip = ['reddit', 'imgur', 'google', 'facebook', 'twitter', 'youtube', 'wikipedia', 'amazon']
        if not any(x in url.lower() for x in skip):
            if username_lower and username_lower in url.lower():
                found['website'] = f"https://{url}"
                break
            if 'website' not in found:
                found['website'] = f"https://{url}"
    return found
 def cross_platform_discovery(username, full_text=''):
    """
    search the ENTIRE internet using TAVILY.
    CRITICAL: always quote username to avoid fuzzy matching!
    """
    found = {}
    all_content = full_text
    username_lower = username.lower()
    print(f"    🔍 cross-platform search for {username}...")
    # ALWAYS QUOTE THE USERNAME - critical for exact matching
    searches = [
        f'"{username}"',                          # just username, quoted
        f'"{username}" github',                   # github
        f'"{username}" developer programmer',     # dev context
        f'"{username}" email contact',            # contact
        f'"{username}" mastodon',                 # fediverse
    ]
    for query in searches:
        print(f"      🌐 tavily: {query}")
        results = tavily_search(query, max_results=5)
        for result in results:
            url = result.get('url', '').lower()
            title = result.get('title', '')
            content = result.get('content', '')
            combined = f"{url} {title} {content}"
            # validate username appears
            if username_lower not in combined.lower():
                continue
            all_content += f" {combined}"
            # extract from URL directly
            if f'github.com/{username_lower}' in url and not found.get('github'):
                found['github'] = username
                print(f"        ✓ github: {username}")
            if f'twitch.tv/{username_lower}' in url and not found.get('twitch'):
                found['twitch'] = f"https://twitch.tv/{username}"
                print(f"        ✓ twitch")
            if 'itch.io/profile/' in url and username_lower in url and not found.get('itch'):
                found['itch'] = url if url.startswith('http') else f"https://{url}"
                print(f"        ✓ itch.io")
            if 'linkedin.com/in/' in url and not found.get('linkedin'):
                li = re.search(r'linkedin\.com/in/([a-zA-Z0-9_-]+)', url)
                if li:
                    found['linkedin'] = f"https://linkedin.com/in/{li.group(1)}"
                    print(f"        ✓ linkedin")
        # extract from content
        extracted = extract_links_from_text(all_content, username)
        for k, v in extracted.items():
            if k not in found:
                found[k] = v
                print(f"        ✓ {k}")
        # good contact found? stop searching
        if found.get('email') or found.get('github') or found.get('mastodon') or found.get('twitch'):
            break
    # === API CHECKS ===
    if not found.get('github'):
        headers = {'Authorization': f'token {GITHUB_TOKEN}'} if GITHUB_TOKEN else {}
        try:
            resp = requests.get(f'https://api.github.com/users/{username}', headers=headers, timeout=10)
            if resp.status_code == 200:
                data = resp.json()
                found['github'] = username
                print(f"        ✓ github API")
                if data.get('email') and 'email' not in found:
                    found['email'] = data['email']
                if data.get('blog') and 'website' not in found:
                    found['website'] = data['blog'] if data['blog'].startswith('http') else f"https://{data['blog']}"
        except:
            pass
    if not found.get('mastodon'):
        for inst in ['mastodon.social', 'fosstodon.org', 'hachyderm.io', 'tech.lgbt']:
            try:
                resp = requests.get(f'https://{inst}/api/v1/accounts/lookup', params={'acct': username}, timeout=5)
                if resp.status_code == 200:
                    found['mastodon'] = f"@{username}@{inst}"
                    print(f"        ✓ mastodon: {found['mastodon']}")
                    break
            except:
                continue
    if not found.get('bluesky'):
        try:
            resp = requests.get('https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile', 
                              params={'actor': f'{username}.bsky.social'}, timeout=10)
            if resp.status_code == 200:
                found['bluesky'] = resp.json().get('handle')
                print(f"        ✓ bluesky")
        except:
            pass
    return found
 def get_user_profile(username):
    """get user profile including bio/description"""
    url = f'https://www.reddit.com/user/{username}/about.json'
    data = _api_get(url)
    if not data or 'data' not in data:
        return None
    profile = data['data']
    return {
        'username': username,
        'name': profile.get('name'),
        'bio': profile.get('subreddit', {}).get('public_description', ''),
        'title': profile.get('subreddit', {}).get('title', ''),
        'icon': profile.get('icon_img'),
        'created_utc': profile.get('created_utc'),
        'total_karma': profile.get('total_karma', 0),
        'link_karma': profile.get('link_karma', 0),
        'comment_karma': profile.get('comment_karma', 0),
    }
 def get_subreddit_users(subreddit, limit=100):
    """get recent posters/commenters from a subreddit"""
    users = set()
-
+    for endpoint in ['new', 'comments']:
-    # posts
+        url = f'https://www.reddit.com/r/{subreddit}/{endpoint}.json'
    url = f'https://www.reddit.com/r/{subreddit}/new.json'
        data = _api_get(url, {'limit': limit})
        if data and 'data' in data:
-        for post in data['data'].get('children', []):
+            for item in data['data'].get('children', []):
-            author = post['data'].get('author')
+                author = item['data'].get('author')
                if author and author not in ['[deleted]', 'AutoModerator']:
                    users.add(author)
    # comments
    url = f'https://www.reddit.com/r/{subreddit}/comments.json'
    data = _api_get(url, {'limit': limit})
    if data and 'data' in data:
        for comment in data['data'].get('children', []):
            author = comment['data'].get('author')
            if author and author not in ['[deleted]', 'AutoModerator']:
                users.add(author)
    return users
 def get_user_activity(username):
    """get user's posts and comments"""
    activity = []
-
+    for endpoint in ['submitted', 'comments']:
-    # posts
+        url = f'https://www.reddit.com/user/{username}/{endpoint}.json'
    url = f'https://www.reddit.com/user/{username}/submitted.json'
        data = _api_get(url, {'limit': 100})
        if data and 'data' in data:
-        for post in data['data'].get('children', []):
+            for item in data['data'].get('children', []):
                activity.append({
-                'type': 'post',
+                    'type': 'post' if endpoint == 'submitted' else 'comment',
-                'subreddit': post['data'].get('subreddit'),
+                    'subreddit': item['data'].get('subreddit'),
-                'title': post['data'].get('title', ''),
+                    'title': item['data'].get('title', ''),
-                'body': post['data'].get('selftext', ''),
+                    'body': item['data'].get('selftext', '') or item['data'].get('body', ''),
-                'score': post['data'].get('score', 0),
+                    'score': item['data'].get('score', 0),
                })
    # comments
    url = f'https://www.reddit.com/user/{username}/comments.json'
    data = _api_get(url, {'limit': 100})
    if data and 'data' in data:
        for comment in data['data'].get('children', []):
            activity.append({
                'type': 'comment',
                'subreddit': comment['data'].get('subreddit'),
                'body': comment['data'].get('body', ''),
                'score': comment['data'].get('score', 0),
            })
    return activity
 def analyze_reddit_user(username):
    """
    analyze a reddit user for alignment and extract external platform links.
    reddit is DISCOVERY ONLY - we find users here but contact them elsewhere.
    """
    activity = get_user_activity(username)
    if not activity:
        return None
    # get profile for bio
    profile = get_user_profile(username)
    # count subreddit activity
    sub_activity = defaultdict(int)
    text_parts = []
    total_karma = 0
@ -232,20 +314,16 @@ def analyze_reddit_user(username):
    full_text = ' '.join(text_parts)
    text_score, positive_signals, negative_signals = analyze_text(full_text)
    # EXTRACT EXTERNAL LINKS - this is the key part
    # check profile bio first
    external_links = {}
    if profile:
-        bio_text = f"{profile.get('bio', '')} {profile.get('title', '')}"
+        external_links.update(extract_links_from_text(f"{profile.get('bio', '')} {profile.get('title', '')}", username))
-        external_links.update(extract_external_links(bio_text))
+    external_links.update(extract_links_from_text(full_text, username))
-    # also scan posts/comments for links (people often share their github etc)
+    # TAVILY search
-    activity_links = extract_external_links(full_text)
+    discovered = cross_platform_discovery(username, full_text)
-    for platform, link in activity_links.items():
+    external_links.update(discovered)
        if platform not in external_links:
            external_links[platform] = link
-    # subreddit scoring
+    # scoring
    sub_score = 0
    aligned_subs = []
    for sub, count in sub_activity.items():
@ -254,13 +332,11 @@ def analyze_reddit_user(username):
            sub_score += weight * min(count, 5)
            aligned_subs.append(sub)
    # multi-sub bonus
    if len(aligned_subs) >= 5:
        sub_score += 30
    elif len(aligned_subs) >= 3:
        sub_score += 15
    # negative sub penalty
    for sub in sub_activity:
        if sub.lower() in [n.lower() for n in NEGATIVE_SUBREDDITS]:
            sub_score -= 50
@ -268,77 +344,33 @@ def analyze_reddit_user(username):
    total_score = text_score + sub_score
    # bonus if they have external links (we can actually contact them)
    if external_links.get('github'):
        total_score += 10
-        positive_signals.append('has github')
+        positive_signals.append('github')
    if external_links.get('mastodon'):
        total_score += 10
-        positive_signals.append('has mastodon')
+        positive_signals.append('mastodon')
-    if external_links.get('website'):
+    if external_links.get('email'):
        total_score += 15
        positive_signals.append('email')
    if external_links.get('twitch'):
        total_score += 5
-        positive_signals.append('has website')
+        positive_signals.append('twitch')
-    # === LOST BUILDER DETECTION ===
+    # lost builder
    # reddit is HIGH SIGNAL for lost builders - stuck in learnprogramming,
    # imposter syndrome posts, "i wish i could" language, etc.
    subreddits_list = list(sub_activity.keys())
    lost_signals, lost_weight = analyze_reddit_for_lost_signals(activity, subreddits_list)
-
+    text_lost_signals, _ = analyze_text_for_lost_signals(full_text)
    # also check full text for lost patterns (already done partially in analyze_reddit_for_lost_signals)
    text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
    for sig in text_lost_signals:
        if sig not in lost_signals:
            lost_signals.append(sig)
            lost_weight += text_lost_weight
-    lost_potential_score = lost_weight
+    builder_activity = 20 if external_links.get('github') else 0
    user_type = classify_user(lost_weight, builder_activity, total_score)
-    # classify: builder, lost, both, or none
+    confidence = min(0.95, 0.3 + (0.2 if len(activity) > 20 else 0) + (0.2 if len(aligned_subs) >= 2 else 0) + (0.1 if external_links else 0))
    # for reddit, builder_score is based on having external links + high karma
    builder_activity = 0
    if external_links.get('github'):
        builder_activity += 20
    if total_karma > 1000:
        builder_activity += 15
    elif total_karma > 500:
        builder_activity += 10
-    user_type = classify_user(lost_potential_score, builder_activity, total_score)
+    reddit_only = not any([external_links.get(k) for k in ['github', 'mastodon', 'bluesky', 'email', 'matrix', 'linkedin', 'twitch', 'itch']])
    # confidence
    confidence = 0.3
    if len(activity) > 20:
        confidence += 0.2
    if len(aligned_subs) >= 2:
        confidence += 0.2
    if len(text_parts) > 10:
        confidence += 0.2
    # higher confidence if we have contact methods
    if external_links:
        confidence += 0.1
    confidence = min(confidence, 0.95)
    reasons = []
    if aligned_subs:
        reasons.append(f"active in: {', '.join(aligned_subs[:5])}")
    if positive_signals:
        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
    if negative_signals:
        reasons.append(f"WARNING: {', '.join(negative_signals)}")
    if external_links:
        reasons.append(f"external: {', '.join(external_links.keys())}")
    # add lost reasons if applicable
    if user_type == 'lost' or user_type == 'both':
        lost_descriptions = get_signal_descriptions(lost_signals)
        if lost_descriptions:
            reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
    # determine if this is reddit-only (needs manual review)
    reddit_only = len(external_links) == 0
    if reddit_only:
        reasons.append("REDDIT-ONLY: needs manual review for outreach")
    return {
        'platform': 'reddit',
@ -351,153 +383,46 @@ def analyze_reddit_user(username):
        'subreddits': aligned_subs,
        'activity_count': len(activity),
        'karma': total_karma,
-        'reasons': reasons,
+        'reasons': [f"contact: {', '.join(external_links.keys())}"] if external_links else [],
        'scraped_at': datetime.now().isoformat(),
        # external platform links for outreach
        'external_links': external_links,
        'reddit_only': reddit_only,
-        'extra': {
+        'extra': external_links,
-            'github': external_links.get('github'),
+        'lost_potential_score': lost_weight,
            'mastodon': external_links.get('mastodon'),
            'twitter': external_links.get('twitter'),
            'bluesky': external_links.get('bluesky'),
            'website': external_links.get('website'),
            'matrix': external_links.get('matrix'),
            'reddit_karma': total_karma,
            'reddit_activity': len(activity),
        },
        # lost builder fields
        'lost_potential_score': lost_potential_score,
        'lost_signals': lost_signals,
        'user_type': user_type,
    }
 def scrape_reddit(db, limit_per_sub=50):
-    """
+    print("scoutd/reddit: scraping (TAVILY enabled)...")
    full reddit scrape - DISCOVERY ONLY
    finds aligned users, extracts external links for outreach.
    reddit-only users go to manual queue.
    """
    print("scoutd/reddit: starting scrape (discovery only, not outreach)...")
    # find users in multiple aligned subs
    user_subs = defaultdict(set)
-
+    for sub in ['intentionalcommunity', 'cohousing', 'selfhosted', 'homeassistant', 'solarpunk', 'cooperatives', 'privacy', 'localllama', 'homelab', 'learnprogramming']:
    # aligned subs - active builders
    priority_subs = ['intentionalcommunity', 'cohousing', 'selfhosted',
                     'homeassistant', 'solarpunk', 'cooperatives', 'privacy',
                     'localllama', 'homelab', 'degoogle', 'pihole', 'unraid']
    # lost builder subs - people who need encouragement
    # these folks might be stuck, but they have aligned interests
    lost_subs = ['learnprogramming', 'findapath', 'getdisciplined',
                 'careerguidance', 'cscareerquestions', 'decidingtobebetter']
    # scrape both - we want to find lost builders with aligned interests
    all_subs = priority_subs + lost_subs
    for sub in all_subs:
        print(f"  scraping r/{sub}...")
        users = get_subreddit_users(sub, limit=limit_per_sub)
        for user in users:
            user_subs[user].add(sub)
        print(f"    found {len(users)} users")
    # filter for multi-sub users
    multi_sub = {u: subs for u, subs in user_subs.items() if len(subs) >= 2}
-    print(f"  {len(multi_sub)} users in 2+ aligned subs")
+    print(f"  {len(multi_sub)} users in 2+ subs")
    # analyze
    results = []
    reddit_only_count = 0
    external_link_count = 0
    builders_found = 0
    lost_found = 0
    for username in multi_sub:
        try:
            result = analyze_reddit_user(username)
            if result and result['score'] > 0:
                results.append(result)
                db.save_human(result)
                user_type = result.get('user_type', 'none')
                # track lost builders - reddit is high signal for these
                if user_type == 'lost':
                    lost_found += 1
                    lost_score = result.get('lost_potential_score', 0)
                    if lost_score >= 40:
                        print(f"    💔 u/{username}: lost_score={lost_score}, values={result['score']} pts")
                        # lost builders also go to manual queue if reddit-only
                        if result.get('reddit_only'):
                            _add_to_manual_queue(result)
                elif user_type == 'builder':
                    builders_found += 1
                elif user_type == 'both':
                    builders_found += 1
                    lost_found += 1
                    print(f"    ⚡ u/{username}: recovering builder")
                # track external links
                if result.get('reddit_only'):
                    reddit_only_count += 1
                    # add high-value users to manual queue for review
                    if result['score'] >= 50 and user_type != 'lost':  # lost already added above
                        _add_to_manual_queue(result)
                        print(f"    📋 u/{username}: {result['score']} pts (reddit-only → manual queue)")
                else:
                    external_link_count += 1
                    if result['score'] >= 50 and user_type == 'builder':
                        links = list(result.get('external_links', {}).keys())
                        print(f"    ★ u/{username}: {result['score']} pts → {', '.join(links)}")
        except Exception as e:
-            print(f"    error on {username}: {e}")
+            print(f"  error: {username}: {e}")
-    print(f"scoutd/reddit: found {len(results)} aligned humans")
+    print(f"scoutd/reddit: {len(results)} humans")
    print(f"  - {builders_found} active builders")
    print(f"  - {lost_found} lost builders (need encouragement)")
    print(f"  - {external_link_count} with external links (reachable)")
    print(f"  - {reddit_only_count} reddit-only (manual queue)")
    return results
 def _add_to_manual_queue(result):
    """add reddit-only user to manual queue for review"""
    from pathlib import Path
    import json
    queue_file = Path(__file__).parent.parent / 'data' / 'manual_queue.json'
    queue_file.parent.mkdir(parents=True, exist_ok=True)
-
+    queue = json.loads(queue_file.read_text()) if queue_file.exists() else []
-    queue = []
+    if not any(q.get('username') == result['username'] for q in queue):
-    if queue_file.exists():
+        queue.append({'platform': 'reddit', 'username': result['username'], 'url': result['url'], 'score': result['score'], 'queued_at': datetime.now().isoformat()})
        try:
            queue = json.loads(queue_file.read_text())
        except:
            pass
    # check if already in queue
    existing = [q for q in queue if q.get('username') == result['username'] and q.get('platform') == 'reddit']
    if existing:
        return
    queue.append({
        'platform': 'reddit',
        'username': result['username'],
        'url': result['url'],
        'score': result['score'],
        'subreddits': result.get('subreddits', []),
        'signals': result.get('signals', []),
        'reasons': result.get('reasons', []),
        'note': 'reddit-only user - no external links found. DM manually if promising.',
        'queued_at': datetime.now().isoformat(),
        'status': 'pending',
    })
        queue_file.write_text(json.dumps(queue, indent=2))
--- a/soul.txt
+++ b/soul.txt
@ -31,9 +31,8 @@ there's a better way and we are going to build it together."
 you can reach *person* at *preffered contact method*
 - connectd daemon
 hope it goes well!
 -connectd
  CONNECTD_ICONS (line 33-44):
  CONNECTD_ICONS = '''<div style="display:flex;gap:16px;flex-wrap:wrap">
  <a href="https://github.com/connectd-daemon" title="GitHub" style="color:#888"><svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"/></svg></a>