add forge support, central coordination, lost builder detection

- central API client for distributed instance coordination - forge scraper: gitea, forgejo, gogs, gitlab, sourcehut, codeberg - forge issue delivery as outreach method - usage-based contact method ranking with fallback chain - lost builder detection and targeted outreach - reddit and lobsters handle discovery - deep scrape for handle/email discovery from profiles
2026-04-14 03:27:24 +00:00 · 2025-12-16 21:30:05 +00:00 · 2025-12-16 21:30:05 +00:00 · f33409ceda
commit f33409ceda
parent 99946bfef5
15 changed files with 2102 additions and 837 deletions
--- a/api.py
+++ b/api.py
@ -116,6 +116,7 @@ DASHBOARD_HTML = """<!DOCTYPE html>
    <div id="queue" class="pnl"></div>
    <div id="sent" class="pnl"></div>
    <div id="failed" class="pnl"></div>
+    <div id="lost" class="pnl"></div>

 <script>
 var currentTab = 'host';
@ -130,7 +131,8 @@ function initTabs() {
        {id: 'host', label: 'you'},
        {id: 'queue', label: 'queue'},
        {id: 'sent', label: 'sent'},
-        {id: 'failed', label: 'failed'}
+        {id: 'failed', label: 'failed'},
+        {id: 'lost', label: 'lost builders'}
    ];

    tabs.forEach(function(t) {
@ -319,6 +321,31 @@ async function loadFailed() {

    $('failed').innerHTML = html;
 }
+async function loadLost() {
+    var res = await fetch("/api/lost_builders");
+    var data = await res.json();
+
+    var html = "<h2>lost builders (" + (data.total || 0) + ")</h2>";
+    html += "<p style=\"color:#c792ea;font-size:0.8em;margin-bottom:10px\">people who need to see that someone like them made it</p>";
+
+    if (!data.matches || data.matches.length === 0) {
+        html += "<div class=\"meta\">no lost builders found</div>";
+    }
+
+    for (var i = 0; i < (data.matches || []).length; i++) {
+        var m = data.matches[i];
+        html += "<div class=\"card\">";
+        html += "<div class=\"card-hdr\"><span class=\"to\">LOST: " + m.lost_user + "</span><span class=\"score\">" + m.match_score + "</span></div>";
+        html += "<div class=\"meta\">lost: " + m.lost_score + " | values: " + m.values_score + "</div>";
+        html += "<div class=\"meta\" style=\"color:#0f8\">BUILDER: " + m.builder + " (" + m.builder_platform + ")</div>";
+        html += "<div class=\"meta\">score: " + m.builder_score + " | repos: " + m.builder_repos + " | stars: " + m.builder_stars + "</div>";
+        html += "<div class=\"meta\">shared: " + (m.shared || []).join(", ") + "</div>";
+        html += "</div>";
+    }
+
+    $("lost").innerHTML = html;
+}
+

 function load() {
    loadStats();
@ -326,6 +353,7 @@ function load() {
    loadQueue();
    loadSent();
    loadFailed();
+    loadLost();
 }

 document.addEventListener('click', function(e) {
@ -438,6 +466,8 @@ class APIHandler(BaseHTTPRequestHandler):
            self._handle_top_humans()
        elif path == '/api/user':
            self._handle_user()
+        elif path == '/api/lost_builders':
+            self._handle_lost_builders()
        else:
            self._send_json({'error': 'not found'}, 404)
    def _handle_favicon(self):
@ -1171,6 +1201,44 @@ class APIHandler(BaseHTTPRequestHandler):
            self._send_json({'error': str(e)}, 500)


+
+    def _handle_lost_builders(self):
+        """return lost builders with their inspiring matches"""
+        try:
+            from matchd.lost import find_matches_for_lost_builders
+            db = Database()
+            matches, error = find_matches_for_lost_builders(db, min_lost_score=30, min_values_score=15, limit=50)
+            
+            result = {
+                'total': len(matches) if matches else 0,
+                'error': error,
+                'matches': []
+            }
+            
+            if matches:
+                for m in matches:
+                    lost = m.get('lost_user', {})
+                    builder = m.get('inspiring_builder', {})
+                    result['matches'].append({
+                        'lost_user': lost.get('username'),
+                        'lost_platform': lost.get('platform'),
+                        'lost_score': lost.get('lost_potential_score', 0),
+                        'values_score': lost.get('score', 0),
+                        'builder': builder.get('username'),
+                        'builder_platform': builder.get('platform'),
+                        'builder_score': builder.get('score', 0),
+                        'builder_repos': m.get('builder_repos', 0),
+                        'builder_stars': m.get('builder_stars', 0),
+                        'match_score': m.get('match_score', 0),
+                        'shared': m.get('shared_interests', [])[:5],
+                    })
+            
+            db.close()
+            self._send_json(result)
+        except Exception as e:
+            self._send_json({'error': str(e)}, 500)
+
+
 def run_api_server():
    """run the API server in a thread"""
    server = HTTPServer(('0.0.0.0', API_PORT), APIHandler)
--- a/central_client.py
+++ b/central_client.py
@ -0,0 +1,183 @@
+"""
+connectd/central_client.py - client for connectd-central API
+
+provides similar interface to local Database class but uses remote API.
+allows distributed instances to share data and coordinate outreach.
+"""
+
+import os
+import json
+import requests
+from typing import Optional, List, Dict, Any, Tuple
+from datetime import datetime
+
+CENTRAL_API = os.environ.get('CONNECTD_CENTRAL_API', '')
+API_KEY = os.environ.get('CONNECTD_API_KEY', '')
+INSTANCE_ID = os.environ.get('CONNECTD_INSTANCE_ID', 'default')
+
+
+class CentralClient:
+    """client for connectd-central API"""
+    
+    def __init__(self, api_url: str = None, api_key: str = None, instance_id: str = None):
+        self.api_url = api_url or CENTRAL_API
+        self.api_key = api_key or API_KEY
+        self.instance_id = instance_id or INSTANCE_ID
+        self.headers = {
+            'X-API-Key': self.api_key,
+            'Content-Type': 'application/json'
+        }
+        
+        if not self.api_key:
+            raise ValueError('CONNECTD_API_KEY environment variable required')
+    
+    def _get(self, endpoint: str, params: dict = None) -> dict:
+        resp = requests.get(f'{self.api_url}{endpoint}', headers=self.headers, params=params)
+        resp.raise_for_status()
+        return resp.json()
+    
+    def _post(self, endpoint: str, data: dict) -> dict:
+        resp = requests.post(f'{self.api_url}{endpoint}', headers=self.headers, json=data)
+        resp.raise_for_status()
+        return resp.json()
+    
+    # === HUMANS ===
+    
+    def get_human(self, human_id: int) -> Optional[dict]:
+        try:
+            return self._get(f'/humans/{human_id}')
+        except:
+            return None
+    
+    def get_humans(self, platform: str = None, user_type: str = None, 
+                   min_score: float = 0, limit: int = 100, offset: int = 0) -> List[dict]:
+        params = {'min_score': min_score, 'limit': limit, 'offset': offset}
+        if platform:
+            params['platform'] = platform
+        if user_type:
+            params['user_type'] = user_type
+        result = self._get('/humans', params)
+        return result.get('humans', [])
+    
+    def get_all_humans(self, min_score: float = 0, limit: int = 100000) -> List[dict]:
+        """get all humans (for matching)"""
+        return self.get_humans(min_score=min_score, limit=limit)
+    
+    def get_lost_builders(self, min_score: float = 30, limit: int = 100) -> List[dict]:
+        """get lost builders for outreach"""
+        return self.get_humans(user_type='lost', min_score=min_score, limit=limit)
+    
+    def get_builders(self, min_score: float = 50, limit: int = 100) -> List[dict]:
+        """get active builders"""
+        return self.get_humans(user_type='builder', min_score=min_score, limit=limit)
+    
+    def upsert_human(self, human: dict) -> int:
+        """create or update human, returns id"""
+        result = self._post('/humans', human)
+        return result.get('id')
+    
+    def upsert_humans_bulk(self, humans: List[dict]) -> Tuple[int, int]:
+        """bulk upsert humans, returns (created, updated)"""
+        result = self._post('/humans/bulk', humans)
+        return result.get('created', 0), result.get('updated', 0)
+    
+    # === MATCHES ===
+    
+    def get_matches(self, min_score: float = 0, limit: int = 100, offset: int = 0) -> List[dict]:
+        params = {'min_score': min_score, 'limit': limit, 'offset': offset}
+        result = self._get('/matches', params)
+        return result.get('matches', [])
+    
+    def create_match(self, human_a_id: int, human_b_id: int, 
+                     overlap_score: float, overlap_reasons: str = None) -> int:
+        """create match, returns id"""
+        result = self._post('/matches', {
+            'human_a_id': human_a_id,
+            'human_b_id': human_b_id,
+            'overlap_score': overlap_score,
+            'overlap_reasons': overlap_reasons
+        })
+        return result.get('id')
+    
+    def create_matches_bulk(self, matches: List[dict]) -> int:
+        """bulk create matches, returns count"""
+        result = self._post('/matches/bulk', matches)
+        return result.get('created', 0)
+    
+    # === OUTREACH COORDINATION ===
+    
+    def get_pending_outreach(self, outreach_type: str = None, limit: int = 50) -> List[dict]:
+        """get pending outreach that hasn't been claimed"""
+        params = {'limit': limit}
+        if outreach_type:
+            params['outreach_type'] = outreach_type
+        result = self._get('/outreach/pending', params)
+        return result.get('pending', [])
+    
+    def claim_outreach(self, human_id: int, match_id: int = None, 
+                       outreach_type: str = 'intro') -> Optional[int]:
+        """claim outreach for a human, returns outreach_id or None if already claimed"""
+        try:
+            result = self._post('/outreach/claim', {
+                'human_id': human_id,
+                'match_id': match_id,
+                'outreach_type': outreach_type
+            })
+            return result.get('outreach_id')
+        except requests.exceptions.HTTPError as e:
+            if e.response.status_code == 409:
+                return None  # already claimed by another instance
+            raise
+    
+    def complete_outreach(self, outreach_id: int, status: str,
+                          sent_via: str = None, draft: str = None, error: str = None):
+        """mark outreach as complete"""
+        self._post('/outreach/complete', {
+            'outreach_id': outreach_id,
+            'status': status,
+            'sent_via': sent_via,
+            'draft': draft,
+            'error': error
+        })
+    
+    def get_outreach_history(self, status: str = None, limit: int = 100) -> List[dict]:
+        params = {'limit': limit}
+        if status:
+            params['status'] = status
+        result = self._get('/outreach/history', params)
+        return result.get('history', [])
+    
+    def already_contacted(self, human_id: int) -> bool:
+        """check if human has been contacted"""
+        history = self._get('/outreach/history', {'limit': 10000})
+        sent = history.get('history', [])
+        return any(h['human_id'] == human_id and h['status'] == 'sent' for h in sent)
+    
+    # === STATS ===
+    
+    def get_stats(self) -> dict:
+        return self._get('/stats')
+    
+    # === INSTANCE MANAGEMENT ===
+    
+    def register_instance(self, name: str, host: str):
+        """register this instance with central"""
+        self._post(f'/instances/register?name={name}&host={host}', {})
+    
+    def get_instances(self) -> List[dict]:
+        result = self._get('/instances')
+        return result.get('instances', [])
+    
+    # === HEALTH ===
+    
+    def health_check(self) -> bool:
+        try:
+            result = self._get('/health')
+            return result.get('status') == 'ok'
+        except:
+            return False
+
+
+# convenience function
+def get_client() -> CentralClient:
+    return CentralClient()
--- a/config.py
+++ b/config.py
@ -22,7 +22,7 @@ CACHE_DIR.mkdir(exist_ok=True)
 SCOUT_INTERVAL = 3600 * 4       # full scout every 4 hours
 MATCH_INTERVAL = 3600           # check matches every hour
 INTRO_INTERVAL = 1800       # send intros every 2 hours
-MAX_INTROS_PER_DAY = 250         # rate limit builder-to-builder outreach
+MAX_INTROS_PER_DAY = 1000         # rate limit builder-to-builder outreach


 # === MATCHING CONFIG ===
@ -42,7 +42,7 @@ LOST_CONFIG = {

    # outreach settings
    'enabled': True,
-    'max_per_day': 20,               # lower volume, higher care
+    'max_per_day': 100,               # lower volume, higher care
    'require_review': False,        # fully autonomous
    'cooldown_days': 90,            # don't spam struggling people

@ -70,6 +70,47 @@ GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
 GROQ_MODEL = os.environ.get('GROQ_MODEL', 'llama-3.3-70b-versatile')

 GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
+
+# === FORGE TOKENS ===
+# for creating issues on self-hosted git forges
+# each forge needs its own token from that instance
+#
+# CODEBERG: Settings -> Applications -> Generate Token (repo:write scope)
+# GITEA/FORGEJO: Settings -> Applications -> Generate Token
+# GITLAB: Settings -> Access Tokens -> Personal Access Token (api scope)
+# SOURCEHUT: Settings -> Personal Access Tokens (uses email instead)
+
+CODEBERG_TOKEN = os.environ.get('CODEBERG_TOKEN', '')
+GITEA_TOKENS = {}  # instance_url -> token, loaded from env
+GITLAB_TOKENS = {}  # instance_url -> token, loaded from env
+
+# parse GITEA_TOKENS from env
+# format: GITEA_TOKEN_192_168_1_8_3259=token -> http://192.168.1.8:3259
+# format: GITEA_TOKEN_codeberg_org=token -> https://codeberg.org
+def _parse_instance_url(env_key, prefix):
+    """convert env key to instance URL"""
+    raw = env_key.replace(prefix, '')
+    parts = raw.split('_')
+    
+    # check if last part is a port number
+    if parts[-1].isdigit() and len(parts[-1]) <= 5:
+        port = parts[-1]
+        host = '.'.join(parts[:-1])
+        # local IPs use http
+        if host.startswith('192.168.') or host.startswith('10.') or host == 'localhost':
+            return f'http://{host}:{port}'
+        return f'https://{host}:{port}'
+    else:
+        host = '.'.join(parts)
+        return f'https://{host}'
+
+for key, value in os.environ.items():
+    if key.startswith('GITEA_TOKEN_'):
+        url = _parse_instance_url(key, 'GITEA_TOKEN_')
+        GITEA_TOKENS[url] = value
+    elif key.startswith('GITLAB_TOKEN_'):
+        url = _parse_instance_url(key, 'GITLAB_TOKEN_')
+        GITLAB_TOKENS[url] = value
 MASTODON_TOKEN = os.environ.get('MASTODON_TOKEN', '')
 MASTODON_INSTANCE = os.environ.get('MASTODON_INSTANCE', '')

--- a/daemon.py
+++ b/daemon.py
@ -12,6 +12,7 @@ runs continuously, respects rate limits, sends intros automatically
 import time
 import json
 import signal
+import os
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
@ -20,13 +21,14 @@ from db import Database
 from db.users import (init_users_table, get_priority_users, save_priority_match,
                      get_priority_user_matches, discover_host_user)
 from scoutd import scrape_github, scrape_reddit, scrape_mastodon, scrape_lobsters, scrape_lemmy, scrape_discord
-from config import HOST_USER, INTRO_INTERVAL, MAX_INTROS_PER_DAY, SCOUT_INTERVAL, MATCH_INTERVAL
+from scoutd.forges import scrape_all_forges
+from config import HOST_USER
 from scoutd.github import analyze_github_user, get_github_user
 from scoutd.signals import analyze_text
 from matchd.fingerprint import generate_fingerprint, fingerprint_similarity
 from matchd.overlap import find_overlap
 from matchd.lost import find_matches_for_lost_builders
-from introd.groq_draft import draft_intro_with_llm as draft_intro
+from introd.draft import draft_intro, summarize_human, summarize_overlap
 from introd.lost_intro import draft_lost_intro, get_lost_intro_config
 from introd.send import send_email
 from introd.deliver import deliver_intro, determine_best_contact
@ -34,7 +36,19 @@ from config import get_lost_config
 from api import start_api_thread, update_daemon_state

 # daemon config
+SCOUT_INTERVAL = 3600 * 4      # full scout every 4 hours
+MATCH_INTERVAL = 3600          # check matches every hour
+INTRO_INTERVAL = 3600 * 2      # send intros every 2 hours
 LOST_INTERVAL = 3600 * 6       # lost builder outreach every 6 hours (lower volume)
+from config import MAX_INTROS_PER_DAY
+
+# central coordination (optional - for distributed instances)
+try:
+    from central_client import CentralClient
+    CENTRAL_ENABLED = bool(os.environ.get('CONNECTD_API_KEY'))
+except ImportError:
+    CENTRAL_ENABLED = False
+    CentralClient = None  # from config.py
 MIN_OVERLAP_PRIORITY = 30      # min score for priority user matches
 MIN_OVERLAP_STRANGERS = 50     # higher bar for stranger intros

@ -43,6 +57,9 @@ class ConnectDaemon:
    def __init__(self, dry_run=False):
        self.db = Database()
        init_users_table(self.db.conn)
+        purged = self.db.purge_disqualified()
+        if any(purged.values()):
+            self.log(f"purged disqualified: {purged}")
        self.running = True
        self.dry_run = dry_run
        self.started_at = datetime.now()
@ -52,6 +69,18 @@ class ConnectDaemon:
        self.last_lost = None
        self.intros_today = 0
        self.lost_intros_today = 0
+
+        # central coordination
+        self.central = None
+        if CENTRAL_ENABLED:
+            try:
+                self.central = CentralClient()
+                instance_id = os.environ.get('CONNECTD_INSTANCE_ID', 'unknown')
+                self.central.register_instance(instance_id, os.environ.get('CONNECTD_INSTANCE_IP', 'unknown'))
+                self.log(f"connected to central API as {instance_id}")
+            except Exception as e:
+                self.log(f"central API unavailable: {e}")
+                self.central = None
        self.today = datetime.now().date()

        # handle shutdown gracefully
@ -108,6 +137,18 @@ class ConnectDaemon:
            self.today = datetime.now().date()
            self.intros_today = 0
            self.lost_intros_today = 0
+
+        # central coordination
+        self.central = None
+        if CENTRAL_ENABLED:
+            try:
+                self.central = CentralClient()
+                instance_id = os.environ.get('CONNECTD_INSTANCE_ID', 'unknown')
+                self.central.register_instance(instance_id, os.environ.get('CONNECTD_INSTANCE_IP', 'unknown'))
+                self.log(f"connected to central API as {instance_id}")
+            except Exception as e:
+                self.log(f"central API unavailable: {e}")
+                self.central = None
            self.log("reset daily intro limits")

    def scout_cycle(self):
@ -126,6 +167,16 @@ class ConnectDaemon:

        try:
            scrape_mastodon(self.db, limit_per_instance=30)
+
+            # scrape self-hosted git forges (highest signal)
+            self.log("scraping self-hosted git forges...")
+            try:
+                forge_humans = scrape_all_forges(limit_per_instance=30)
+                for h in forge_humans:
+                    self.db.upsert_human(h)
+                self.log(f"  forges: {len(forge_humans)} humans")
+            except Exception as e:
+                self.log(f"  forge scrape error: {e}")
        except Exception as e:
            self.log(f"mastodon scout error: {e}")

@ -157,7 +208,7 @@ class ConnectDaemon:

        self.log(f"matching for {len(priority_users)} priority users...")

-        humans = self.db.get_all_humans(min_score=20, limit=500)
+        humans = self.db.get_all_humans(min_score=20)

        for puser in priority_users:
            # build priority user's fingerprint from their linked profiles
@ -230,7 +281,7 @@ class ConnectDaemon:
        """find matches between discovered humans (altruistic)"""
        self.log("matching strangers...")

-        humans = self.db.get_all_humans(min_score=40, limit=200)
+        humans = self.db.get_all_humans(min_score=40)

        if len(humans) < 2:
            return
@ -256,7 +307,7 @@ class ConnectDaemon:

            overlap = find_overlap(human_a, human_b, fp_a, fp_b)

-            if overlap['overlap_score'] >= MIN_OVERLAP_STRANGERS:
+            if overlap and overlap["overlap_score"] >= MIN_OVERLAP_STRANGERS:
                # save match
                self.db.save_match(human_a['id'], human_b['id'], overlap)
                matches_found += 1
@ -266,6 +317,37 @@ class ConnectDaemon:

        self.last_match = datetime.now()

+    def claim_from_central(self, human_id, match_id=None, outreach_type='intro'):
+        """claim outreach from central - returns outreach_id or None if already claimed"""
+        if not self.central:
+            return -1  # local mode, always allow
+        try:
+            return self.central.claim_outreach(human_id, match_id, outreach_type)
+        except Exception as e:
+            self.log(f"central claim error: {e}")
+            return -1  # allow local if central fails
+
+    def complete_on_central(self, outreach_id, status, sent_via=None, draft=None, error=None):
+        """mark outreach complete on central"""
+        if not self.central or outreach_id == -1:
+            return
+        try:
+            self.central.complete_outreach(outreach_id, status, sent_via, draft, error)
+        except Exception as e:
+            self.log(f"central complete error: {e}")
+
+    def sync_to_central(self, humans=None, matches=None):
+        """sync local data to central"""
+        if not self.central:
+            return
+        try:
+            if humans:
+                self.central.upsert_humans_bulk(humans)
+            if matches:
+                self.central.create_matches_bulk(matches)
+        except Exception as e:
+            self.log(f"central sync error: {e}")
+
    def send_stranger_intros(self):
        """send intros to connect strangers (or preview in dry-run mode)"""
        self.reset_daily_limits()
@ -331,29 +413,18 @@ class ConnectDaemon:
                'overlap_reasons': match['overlap_reasons'],
            }

-            # ACTIVITY-BASED CONTACT SELECTION
-            # use deliver_intro which calls determine_best_contact
-            # picks method based on WHERE they're most active:
-            # - mastodon DM if active on fediverse
-            # - github issue if actively committing
-            # - email ONLY as last resort
-
+            # try to send intro to person with email
            for recipient, other in [(human_a, human_b), (human_b, human_a)]:
-                # draft intro using groq LLM
-                # retry groq up to 3 times with 10s wait
-                intro_result, intro_error = None, None
-                for retry in range(3):
-                    intro_result, intro_error = draft_intro(match_data, recipient='a' if recipient == human_a else 'b')
-                    if not intro_error:
-                        break
-                    self.log(f"groq retry {retry+1}/3: {intro_error}")
-                    import time
-                    time.sleep(10)
+                contact = recipient.get('contact', {})
+                if isinstance(contact, str):
+                    contact = json.loads(contact)

-                if intro_error:
-                    self.log(f"failed to draft intro after retries: {intro_error}")
+                email = contact.get('email')
+                if not email:
                    continue
-                intro = {'draft': intro_result.get('draft', '')}
+
+                # draft intro
+                intro = draft_intro(match_data, recipient='a' if recipient == human_a else 'b')

                # parse overlap reasons for display
                reasons = match['overlap_reasons']
@ -361,13 +432,12 @@ class ConnectDaemon:
                    reasons = json.loads(reasons)
                reason_summary = ', '.join(reasons[:3]) if reasons else 'aligned values'

-                # determine best contact method based on activity
-                method, contact_info = determine_best_contact(recipient)
-
                if self.dry_run:
+                    # print preview
                    print("\n" + "=" * 60)
                    print(f"TO: {recipient['username']} ({recipient['platform']})")
-                    print(f"METHOD: {method} -> {contact_info}")
+                    print(f"EMAIL: {email}")
+                    print(f"SUBJECT: you might want to meet {other['username']}")
                    print(f"SCORE: {match['overlap_score']:.0f} ({reason_summary})")
                    print("-" * 60)
                    print("MESSAGE:")
@ -377,12 +447,23 @@ class ConnectDaemon:
                    print("=" * 60)
                    break
                else:
-                    # deliver via activity-based method selection
-                    success, error, delivery_method = deliver_intro(match_data, intro['draft'], intro.get('subject'))
+                    # claim from central first
+                    outreach_id = self.claim_from_central(recipient['id'], match['id'], 'intro')
+                    if outreach_id is None:
+                        self.log(f"skipping {recipient['username']} - already claimed by another instance")
+                        continue
+
+                    # actually send
+                    success, error = send_email(
+                        email,
+                        f"connectd: you might want to meet {other['username']}",
+                        intro['draft']
+                    )

                    if success:
-                        self.log(f"sent intro to {recipient['username']} via {delivery_method}")
+                        self.log(f"sent intro to {recipient['username']} ({email})")
                        self.intros_today += 1
+                        self.complete_on_central(outreach_id, 'sent', 'email', intro['draft'])

                        # mark match as intro_sent
                        c.execute('UPDATE matches SET status = "intro_sent" WHERE id = ?',
@ -390,7 +471,8 @@ class ConnectDaemon:
                        self.db.conn.commit()
                        break
                    else:
-                        self.log(f"failed to reach {recipient['username']} via {delivery_method}: {error}")
+                        self.log(f"failed to send to {email}: {error}")
+                        self.complete_on_central(outreach_id, 'failed', error=error)

        self.last_intro = datetime.now()

@ -475,7 +557,7 @@ class ConnectDaemon:
                    'overlap_reasons': match.get('shared_interests', []),
                }

-                success, error, delivery_method = deliver_intro(match_data, draft, None)
+                success, error, delivery_method = deliver_intro(match_data, draft)

                if success:
                    self.log(f"sent lost builder intro to {lost_name} via {delivery_method}")
--- a/db_init.py
+++ b/db_init.py
@ -183,7 +183,7 @@ class Database:
        row = c.fetchone()
        return dict(row) if row else None

-    def get_all_humans(self, min_score=0, limit=1000):
+    def get_all_humans(self, min_score=0, limit=100000):
        """get all humans above score threshold"""
        c = self.conn.cursor()
        c.execute('''SELECT * FROM humans
@ -373,3 +373,64 @@ class Database:

    def close(self):
        self.conn.close()
+
+    def purge_disqualified(self):
+        """
+        auto-cleanup: remove all matches/intros involving users with disqualifying signals
+        DISQUALIFYING: maga, conspiracy, conservative, antivax, sovcit
+        """
+        c = self.conn.cursor()
+        purged = {}
+        
+        # patterns to match disqualifying signals
+        disq_patterns = ["maga", "conspiracy", "conservative", "antivax", "sovcit"]
+        
+        # build WHERE clause for negative_signals check
+        neg_check = " OR ".join([f"negative_signals LIKE '%{p}%'" for p in disq_patterns])
+        
+        # 1. delete from intros where recipient is disqualified
+        c.execute(f"""
+            DELETE FROM intros WHERE recipient_human_id IN (
+                SELECT id FROM humans WHERE {neg_check}
+            )
+        """)
+        purged["intros"] = c.rowcount
+        
+        # 2. delete from priority_matches where matched_human is disqualified
+        c.execute(f"""
+            DELETE FROM priority_matches WHERE matched_human_id IN (
+                SELECT id FROM humans WHERE {neg_check}
+            )
+        """)
+        purged["priority_matches"] = c.rowcount
+        
+        # 3. delete from matches where either human is disqualified
+        c.execute(f"""
+            DELETE FROM matches WHERE 
+                human_a_id IN (SELECT id FROM humans WHERE {neg_check})
+                OR human_b_id IN (SELECT id FROM humans WHERE {neg_check})
+        """)
+        purged["matches"] = c.rowcount
+        
+        # 4. cleanup orphaned records (humans deleted but refs remain)
+        c.execute("""
+            DELETE FROM matches WHERE 
+                NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = human_a_id)
+                OR NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = human_b_id)
+        """)
+        purged["orphaned_matches"] = c.rowcount
+        
+        c.execute("""
+            DELETE FROM priority_matches WHERE 
+                NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = matched_human_id)
+        """)
+        purged["orphaned_priority"] = c.rowcount
+        
+        c.execute("""
+            DELETE FROM intros WHERE 
+                NOT EXISTS (SELECT 1 FROM humans h WHERE h.id = recipient_human_id)
+        """)
+        purged["orphaned_intros"] = c.rowcount
+        
+        self.conn.commit()
+        return purged
--- a/deliver.py
+++ b/deliver.py
@ -147,6 +147,87 @@ def create_github_issue(owner, repo, title, body, dry_run=False):
        return False, str(e)


+def create_forge_issue(platform_type, instance_url, owner, repo, title, body, dry_run=False):
+    """
+    create issue on self-hosted git forge.
+    supports gitea/forgejo/gogs (same API) and gitlab.
+    """
+    from config import CODEBERG_TOKEN, GITEA_TOKENS, GITLAB_TOKENS
+
+    if dry_run:
+        print(f"  [dry run] would create issue on {platform_type}:{instance_url}/{owner}/{repo}")
+        return True, None
+
+    try:
+        if platform_type in ('gitea', 'forgejo', 'gogs'):
+            # get token for this instance
+            token = None
+            if 'codeberg.org' in instance_url:
+                token = CODEBERG_TOKEN
+            else:
+                token = GITEA_TOKENS.get(instance_url)
+
+            if not token:
+                return False, f"no auth token for {instance_url}"
+
+            # gitea API
+            api_url = f"{instance_url}/api/v1/repos/{owner}/{repo}/issues"
+            headers = {
+                'Content-Type': 'application/json',
+                'Authorization': f'token {token}'
+            }
+            data = {'title': title, 'body': body}
+
+            resp = requests.post(api_url, headers=headers, json=data, timeout=15)
+            if resp.status_code in (200, 201):
+                return True, resp.json().get('html_url')
+            else:
+                return False, f"gitea api error: {resp.status_code} - {resp.text[:200]}"
+
+        elif platform_type == 'gitlab':
+            token = GITLAB_TOKENS.get(instance_url)
+            if not token:
+                return False, f"no auth token for {instance_url}"
+
+            # need to get project ID first
+            search_url = f"{instance_url}/api/v4/projects"
+            headers = {'PRIVATE-TOKEN': token}
+            params = {'search': repo}
+
+            resp = requests.get(search_url, headers=headers, params=params, timeout=15)
+            if resp.status_code != 200:
+                return False, f"gitlab project lookup failed: {resp.status_code}"
+
+            projects = resp.json()
+            project_id = None
+            for p in projects:
+                if p.get('path') == repo or p.get('name') == repo:
+                    project_id = p.get('id')
+                    break
+
+            if not project_id:
+                return False, f"project {repo} not found"
+
+            # create issue
+            issue_url = f"{instance_url}/api/v4/projects/{project_id}/issues"
+            data = {'title': title, 'description': body}
+            resp = requests.post(issue_url, headers=headers, json=data, timeout=15)
+
+            if resp.status_code in (200, 201):
+                return True, resp.json().get('web_url')
+            else:
+                return False, f"gitlab api error: {resp.status_code}"
+
+        elif platform_type == 'sourcehut':
+            return False, "sourcehut uses mailing lists - use email instead"
+
+        else:
+            return False, f"unknown forge type: {platform_type}"
+
+    except Exception as e:
+        return False, str(e)
+
+
 def send_mastodon_dm(recipient_acct, message, dry_run=False):
    """send mastodon direct message"""
    if not MASTODON_TOKEN:
@ -419,14 +500,94 @@ def deliver_intro(match_data, intro_draft, subject=None, dry_run=False):
 """
        success, error = create_github_issue(owner, repo, title, github_body, dry_run)

+    elif method == 'forge_issue':
+        # self-hosted git forge issue (gitea/forgejo/gitlab/sourcehut)
+        platform_type = contact_info.get('platform_type')
+        instance_url = contact_info.get('instance_url')
+        owner = contact_info.get('owner')
+        repo = contact_info.get('repo')
+        title = subject or "community introduction from connectd"
+
+        # get the other person's contact info for bidirectional link
+        sender = match_data.get('human_a', {})
+        sender_name = sender.get('name') or sender.get('username') or 'someone'
+        sender_platform = sender.get('platform', '')
+        sender_url = sender.get('url', '')
+
+        if not sender_url:
+            if sender_platform == 'github':
+                sender_url = f"https://github.com/{sender.get('username')}"
+            elif sender_platform == 'mastodon':
+                sender_url = f"https://fosstodon.org/@{sender.get('username')}"
+            elif ':' in sender_platform:  # forge platform
+                extra = sender.get('extra', {})
+                if isinstance(extra, str):
+                    import json as _json
+                    extra = _json.loads(extra) if extra else {}
+                sender_url = extra.get('instance_url', '') + '/' + sender.get('username', '')
+
+        forge_body = f"""hey {recipient.get('name') or recipient.get('username')},
+
+{intro_draft}
+
+**reach them at:** {sender_url or 'see their profile'}
+
+---
+*this is an automated introduction from [connectd](https://github.com/connectd-daemon) - a daemon that finds isolated builders with aligned values and connects them.*
+
+*if this feels spammy, close this issue and we won't reach out again.*
+"""
+        success, error = create_forge_issue(platform_type, instance_url, owner, repo, title, forge_body, dry_run)
+
    elif method == 'manual':
        # skip - no longer using manual queue
        success = False
        error = "manual method deprecated - skipping"

+    # FALLBACK CHAIN: if primary method failed, try fallbacks
+    if not success and fallbacks:
+        for fallback_method, fallback_info in fallbacks:
+            result['fallback_attempts'] = result.get('fallback_attempts', [])
+            result['fallback_attempts'].append({'method': fallback_method})
+
+            fb_success = False
+            fb_error = None
+
+            if fallback_method == 'email':
+                fb_success, fb_error = send_email(fallback_info, email_subject, intro_draft, dry_run)
+            elif fallback_method == 'mastodon':
+                fb_success, fb_error = send_mastodon_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'bluesky':
+                fb_success, fb_error = send_bluesky_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'matrix':
+                fb_success, fb_error = send_matrix_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'github_issue':
+                owner = fallback_info.get('owner') if isinstance(fallback_info, dict) else fallback_info.split('/')[0]
+                repo = fallback_info.get('repo') if isinstance(fallback_info, dict) else fallback_info.split('/')[1]
+                fb_success, fb_error = create_github_issue(owner, repo, email_subject, intro_draft, dry_run)
+            elif fallback_method == 'forge_issue':
+                fb_success, fb_error = create_forge_issue(
+                    fallback_info.get('platform_type'),
+                    fallback_info.get('instance_url'),
+                    fallback_info.get('owner'),
+                    fallback_info.get('repo'),
+                    email_subject, intro_draft, dry_run
+                )
+
+            if fb_success:
+                success = True
+                method = fallback_method
+                contact_info = fallback_info
+                error = None
+                result['fallback_succeeded'] = fallback_method
+                break
+            else:
+                result['fallback_attempts'][-1]['error'] = fb_error
+
    # log result
    result['success'] = success
    result['error'] = error
+    result['final_method'] = method

    if success:
        log['sent'].append(result)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -21,3 +21,7 @@ services:
      - ./api.py:/app/api.py:ro
      - ./deliver.py:/app/introd/deliver.py:ro
      - ./soul.txt:/app/soul.txt:ro
+      - ./scoutd/reddit.py:/app/scoutd/reddit.py:ro
+      - ./matchd/overlap.py:/app/matchd/overlap.py:ro
+      - ./central_client.py:/app/central_client.py:ro
+      - ./scoutd/forges.py:/app/scoutd/forges.py:ro
--- a/groq_draft.py
+++ b/groq_draft.py
@ -1,437 +1,419 @@
 """
-introd/groq_draft.py - groq llama 4 maverick for smart intro drafting
-
-uses groq api to generate personalized, natural intro messages
-that don't sound like ai-generated slop
+connectd - groq message drafting
+reads soul from file, uses as guideline for llm to personalize
 """

 import os
 import json
-import requests
-from datetime import datetime
+from groq import Groq

-GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
-GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
-MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")

+client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None

-def determine_contact_method(human):
-    """
-    determine best contact method based on WHERE THEY'RE MOST ACTIVE
-
-    don't use fixed hierarchy - analyze activity per platform:
-    - count posts/commits/activity
-    - weight by recency (last 30 days matters more)
-    - contact them where they already are
-    - fall back to email only if no social activity
-    """
-    from datetime import datetime, timedelta
-
-    extra = human.get('extra', {})
-    if isinstance(extra, str):
-        extra = json.loads(extra) if extra else {}
-
-    # handle nested extra.extra from old save format
-    if 'extra' in extra and isinstance(extra['extra'], dict):
-        extra = {**extra, **extra['extra']}
-
-    contact = human.get('contact', {})
-    if isinstance(contact, str):
-        contact = json.loads(contact) if contact else {}
-
-    # collect activity scores per platform
-    activity_scores = {}
-    now = datetime.now()
-    thirty_days_ago = now - timedelta(days=30)
-    ninety_days_ago = now - timedelta(days=90)
-
-    # github activity
-    github_username = human.get('username') if human.get('platform') == 'github' else extra.get('github')
-    if github_username:
-        github_score = 0
-        top_repos = extra.get('top_repos', [])
-
-        for repo in top_repos:
-            # recent commits weight more
-            pushed_at = repo.get('pushed_at', '')
-            if pushed_at:
+# load soul from file (guideline, not script)
+SOUL_PATH = os.getenv("SOUL_PATH", "/app/soul.txt")
+def load_soul():
    try:
-                    push_date = datetime.fromisoformat(pushed_at.replace('Z', '+00:00')).replace(tzinfo=None)
-                    if push_date > thirty_days_ago:
-                        github_score += 10  # very recent
-                    elif push_date > ninety_days_ago:
-                        github_score += 5   # somewhat recent
-                    else:
-                        github_score += 1   # old but exists
+        with open(SOUL_PATH, 'r') as f:
+            return f.read().strip()
    except:
-                    github_score += 1
+        return None

-            # stars indicate engagement
-            github_score += min(repo.get('stars', 0) // 10, 5)
+SIGNATURE_HTML = """
+<div style="margin-top: 24px; padding-top: 16px; border-top: 1px solid #333;">
+  <div style="margin-bottom: 12px;">
+    <a href="https://github.com/sudoxnym/connectd" style="color: #8b5cf6; text-decoration: none; font-size: 14px;">github.com/sudoxnym/connectd</a>
+    <span style="color: #666; font-size: 12px; margin-left: 8px;">(main repo)</span>
+  </div>
+  <div style="display: flex; gap: 16px; align-items: center;">
+    <a href="https://github.com/connectd-daemon" title="GitHub" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"/></svg>
+    </a>
+    <a href="https://mastodon.sudoxreboot.com/@connectd" title="Mastodon" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M23.268 5.313c-.35-2.578-2.617-4.61-5.304-5.004C17.51.242 15.792 0 11.813 0h-.03c-3.98 0-4.835.242-5.288.309C3.882.692 1.496 2.518.917 5.127.64 6.412.61 7.837.661 9.143c.074 1.874.088 3.745.26 5.611.118 1.24.325 2.47.62 3.68.55 2.237 2.777 4.098 4.96 4.857 2.336.792 4.849.923 7.256.38.265-.061.527-.132.786-.213.585-.184 1.27-.39 1.774-.753a.057.057 0 0 0 .023-.043v-1.809a.052.052 0 0 0-.02-.041.053.053 0 0 0-.046-.01 20.282 20.282 0 0 1-4.709.545c-2.73 0-3.463-1.284-3.674-1.818a5.593 5.593 0 0 1-.319-1.433.053.053 0 0 1 .066-.054c1.517.363 3.072.546 4.632.546.376 0 .75 0 1.125-.01 1.57-.044 3.224-.124 4.768-.422.038-.008.077-.015.11-.024 2.435-.464 4.753-1.92 4.989-5.604.008-.145.03-1.52.03-1.67.002-.512.167-3.63-.024-5.545zm-3.748 9.195h-2.561V8.29c0-1.309-.55-1.976-1.67-1.976-1.23 0-1.846.79-1.846 2.35v3.403h-2.546V8.663c0-1.56-.617-2.35-1.848-2.35-1.112 0-1.668.668-1.67 1.977v6.218H4.822V8.102c0-1.31.337-2.35 1.011-3.12.696-.77 1.608-1.164 2.74-1.164 1.311 0 2.302.5 2.962 1.498l.638 1.06.638-1.06c.66-.999 1.65-1.498 2.96-1.498 1.13 0 2.043.395 2.74 1.164.675.77 1.012 1.81 1.012 3.12z"/></svg>
+    </a>
+    <a href="https://bsky.app/profile/connectd.bsky.social" title="Bluesky" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M5.202 2.857C7.954 4.922 10.913 9.11 12 11.358c1.087-2.247 4.046-6.436 6.798-8.501C20.783 1.366 24 .213 24 3.883c0 .732-.42 6.156-.667 7.037-.856 3.061-3.978 3.842-6.755 3.37 4.854.826 6.089 3.562 3.422 6.299-5.065 5.196-7.28-1.304-7.847-2.97-.104-.305-.152-.448-.153-.327 0-.121-.05.022-.153.327-.568 1.666-2.782 8.166-7.847 2.97-2.667-2.737-1.432-5.473 3.422-6.3-2.777.473-5.899-.308-6.755-3.369C.42 10.04 0 4.615 0 3.883c0-3.67 3.217-2.517 5.202-1.026"/></svg>
+    </a>
+    <a href="https://lemmy.sudoxreboot.com/c/connectd" title="Lemmy" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M2.9595 4.2228a3.9132 3.9132 0 0 0-.332.019c-.8781.1012-1.67.5699-2.155 1.3862-.475.8-.5922 1.6809-.35 2.4971.2421.8162.8297 1.5575 1.6982 2.1449.0053.0035.0106.0076.0163.0114.746.4498 1.492.7431 2.2877.8994-.02.3318-.0272.6689-.006 1.0181.0634 1.0432.4368 2.0006.996 2.8492l-2.0061.8189a.4163.4163 0 0 0-.2276.2239.416.416 0 0 0 .0879.455.415.415 0 0 0 .2941.1231.4156.4156 0 0 0 .1595-.0312l2.2093-.9035c.408.4859.8695.9315 1.3723 1.318.0196.0151.0407.0264.0603.0423l-1.2918 1.7103a.416.416 0 0 0 .664.501l1.314-1.7385c.7185.4548 1.4782.7927 2.2294 1.0242.3833.7209 1.1379 1.1871 2.0202 1.1871.8907 0 1.6442-.501 2.0242-1.2072.744-.2347 1.4959-.5729 2.2073-1.0262l1.332 1.7606a.4157.4157 0 0 0 .7439-.1936.4165.4165 0 0 0-.0799-.3074l-1.3099-1.7345c.0083-.0075.0178-.0113.0261-.0188.4968-.3803.9549-.8175 1.3622-1.2939l2.155.8794a.4156.4156 0 0 0 .5412-.2276.4151.4151 0 0 0-.2273-.5432l-1.9438-.7928c.577-.8538.9697-1.8183 1.0504-2.8693.0268-.3507.0242-.6914.0079-1.0262.7905-.1572 1.5321-.4502 2.2737-.8974.0053-.0033.011-.0076.0163-.0113.8684-.5874 1.456-1.3287 1.6982-2.145.2421-.8161.125-1.697-.3501-2.497-.4849-.8163-1.2768-1.2852-2.155-1.3863a3.2175 3.2175 0 0 0-.332-.0189c-.7852-.0151-1.6231.229-2.4286.6942-.5926.342-1.1252.867-1.5433 1.4387-1.1699-.6703-2.6923-1.0476-4.5635-1.0785a15.5768 15.5768 0 0 0-.5111 0c-2.085.034-3.7537.43-5.0142 1.1449-.0033-.0038-.0045-.0114-.008-.0152-.4233-.5916-.973-1.1365-1.5835-1.489-.8055-.465-1.6434-.7083-2.4286-.6941Zm.2858.7365c.5568.042 1.1696.2358 1.7787.5875.485.28.9757.7554 1.346 1.2696a5.6875 5.6875 0 0 0-.4969.4085c-.9201.8516-1.4615 1.9597-1.668 3.2335-.6809-.1402-1.3183-.3945-1.984-.7948-.7553-.5128-1.2159-1.1225-1.4004-1.7445-.1851-.624-.1074-1.2712.2776-1.9196.3743-.63.9275-.9534 1.6118-1.0322a2.796 2.796 0 0 1 .5352-.0076Zm17.5094 0a2.797 2.797 0 0 1 .5353.0075c.6842.0786 1.2374.4021 1.6117 1.0322.385.6484.4627 1.2957.2776 1.9196-.1845.622-.645 1.2317-1.4004 1.7445-.6578.3955-1.2881.6472-1.9598.7888-.1942-1.2968-.7375-2.4338-1.666-3.302a5.5639 5.5639 0 0 0-.4709-.3923c.3645-.49.8287-.9428 1.2938-1.2113.6091-.3515 1.2219-.5454 1.7787-.5875ZM12.006 6.0036a14.832 14.832 0 0 1 .487 0c2.3901.0393 4.0848.67 5.1631 1.678 1.1501 1.0754 1.6423 2.6006 1.499 4.467-.1311 1.7079-1.2203 3.2281-2.652 4.324-.694.5313-1.4626.9354-2.2254 1.2294.0031-.0453.014-.0888.014-.1349.0029-1.1964-.9313-2.2133-2.2918-2.2133-1.3606 0-2.3222 1.0154-2.2918 2.2213.0013.0507.014.0972.0181.1471-.781-.2933-1.5696-.7013-2.2777-1.2456-1.4239-1.0945-2.4997-2.6129-2.6037-4.322-.1129-1.8567.3778-3.3382 1.5212-4.3965C7.5094 6.7 9.352 6.047 12.006 6.0036Zm-3.6419 6.8291c-.6053 0-1.0966.4903-1.0966 1.0966 0 .6063.4913 1.0986 1.0966 1.0986s1.0966-.4923 1.0966-1.0986c0-.6063-.4913-1.0966-1.0966-1.0966zm7.2819.0113c-.5998 0-1.0866.4859-1.0866 1.0866s.4868 1.0885 1.0866 1.0885c.5997 0 1.0865-.4878 1.0865-1.0885s-.4868-1.0866-1.0865-1.0866zM12 16.0835c1.0237 0 1.5654.638 1.5634 1.4829-.0018.7849-.6723 1.485-1.5634 1.485-.9167 0-1.54-.5629-1.5634-1.493-.0212-.8347.5397-1.4749 1.5634-1.4749Z"/></svg>
+    </a>
+    <a href="https://discord.gg/connectd" title="Discord" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M20.317 4.3698a19.7913 19.7913 0 00-4.8851-1.5152.0741.0741 0 00-.0785.0371c-.211.3753-.4447.8648-.6083 1.2495-1.8447-.2762-3.68-.2762-5.4868 0-.1636-.3933-.4058-.8742-.6177-1.2495a.077.077 0 00-.0785-.037 19.7363 19.7363 0 00-4.8852 1.515.0699.0699 0 00-.0321.0277C.5334 9.0458-.319 13.5799.0992 18.0578a.0824.0824 0 00.0312.0561c2.0528 1.5076 4.0413 2.4228 5.9929 3.0294a.0777.0777 0 00.0842-.0276c.4616-.6304.8731-1.2952 1.226-1.9942a.076.076 0 00-.0416-.1057c-.6528-.2476-1.2743-.5495-1.8722-.8923a.077.077 0 01-.0076-.1277c.1258-.0943.2517-.1923.3718-.2914a.0743.0743 0 01.0776-.0105c3.9278 1.7933 8.18 1.7933 12.0614 0a.0739.0739 0 01.0785.0095c.1202.099.246.1981.3728.2924a.077.077 0 01-.0066.1276 12.2986 12.2986 0 01-1.873.8914.0766.0766 0 00-.0407.1067c.3604.698.7719 1.3628 1.225 1.9932a.076.076 0 00.0842.0286c1.961-.6067 3.9495-1.5219 6.0023-3.0294a.077.077 0 00.0313-.0552c.5004-5.177-.8382-9.6739-3.5485-13.6604a.061.061 0 00-.0312-.0286zM8.02 15.3312c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9555-2.4189 2.157-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.9555 2.4189-2.1569 2.4189zm7.9748 0c-1.1825 0-2.1569-1.0857-2.1569-2.419 0-1.3332.9554-2.4189 2.1569-2.4189 1.2108 0 2.1757 1.0952 2.1568 2.419 0 1.3332-.946 2.4189-2.1568 2.4189Z"/></svg>
+    </a>
+    <a href="https://matrix.to/#/@connectd:sudoxreboot.com" title="Matrix" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M.632.55v22.9H2.28V24H0V0h2.28v.55zm7.043 7.26v1.157h.033c.309-.443.683-.784 1.117-1.024.433-.245.936-.365 1.5-.365.54 0 1.033.107 1.481.314.448.208.785.582 1.02 1.108.254-.374.6-.706 1.034-.992.434-.287.95-.43 1.546-.43.453 0 .872.056 1.26.167.388.11.716.286.993.53.276.245.489.559.646.951.152.392.23.863.23 1.417v5.728h-2.349V11.52c0-.286-.01-.559-.032-.812a1.755 1.755 0 0 0-.18-.66 1.106 1.106 0 0 0-.438-.448c-.194-.11-.457-.166-.785-.166-.332 0-.6.064-.803.189a1.38 1.38 0 0 0-.48.499 1.946 1.946 0 0 0-.231.696 5.56 5.56 0 0 0-.06.785v4.768h-2.35v-4.8c0-.254-.004-.503-.018-.752a2.074 2.074 0 0 0-.143-.688 1.052 1.052 0 0 0-.415-.503c-.194-.125-.476-.19-.854-.19-.111 0-.259.024-.439.074-.18.051-.36.143-.53.282-.171.138-.319.337-.439.595-.12.259-.18.6-.18 1.02v4.966H5.46V7.81zm15.693 15.64V.55H21.72V0H24v24h-2.28v-.55z"/></svg>
+    </a>
+    <a href="https://reddit.com/r/connectd" title="Reddit" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0C5.373 0 0 5.373 0 12c0 3.314 1.343 6.314 3.515 8.485l-2.286 2.286C.775 23.225 1.097 24 1.738 24H12c6.627 0 12-5.373 12-12S18.627 0 12 0Zm4.388 3.199c1.104 0 1.999.895 1.999 1.999 0 1.105-.895 2-1.999 2-.946 0-1.739-.657-1.947-1.539v.002c-1.147.162-2.032 1.15-2.032 2.341v.007c1.776.067 3.4.567 4.686 1.363.473-.363 1.064-.58 1.707-.58 1.547 0 2.802 1.254 2.802 2.802 0 1.117-.655 2.081-1.601 2.531-.088 3.256-3.637 5.876-7.997 5.876-4.361 0-7.905-2.617-7.998-5.87-.954-.447-1.614-1.415-1.614-2.538 0-1.548 1.255-2.802 2.803-2.802.645 0 1.239.218 1.712.585 1.275-.79 2.881-1.291 4.64-1.365v-.01c0-1.663 1.263-3.034 2.88-3.207.188-.911.993-1.595 1.959-1.595Zm-8.085 8.376c-.784 0-1.459.78-1.506 1.797-.047 1.016.64 1.429 1.426 1.429.786 0 1.371-.369 1.418-1.385.047-1.017-.553-1.841-1.338-1.841Zm7.406 0c-.786 0-1.385.824-1.338 1.841.047 1.017.634 1.385 1.418 1.385.785 0 1.473-.413 1.426-1.429-.046-1.017-.721-1.797-1.506-1.797Zm-3.703 4.013c-.974 0-1.907.048-2.77.135-.147.015-.241.168-.183.305.483 1.154 1.622 1.964 2.953 1.964 1.33 0 2.47-.81 2.953-1.964.057-.137-.037-.29-.184-.305-.863-.087-1.795-.135-2.769-.135Z"/></svg>
+    </a>
+    <a href="mailto:connectd@sudoxreboot.com" title="Email" style="color: #888; text-decoration: none;">
+      <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M1.5 8.67v8.58a3 3 0 003 3h15a3 3 0 003-3V8.67l-8.928 5.493a3 3 0 01-3.144 0L1.5 8.67z"/><path d="M22.5 6.908V6.75a3 3 0 00-3-3h-15a3 3 0 00-3 3v.158l9.714 5.978a1.5 1.5 0 001.572 0L22.5 6.908z"/></svg>
+    </a>
+  </div>
+</div>
+"""

-        # commit activity from deep scrape
-        commit_count = extra.get('commit_count', 0)
-        github_score += min(commit_count // 10, 20)
+SIGNATURE_PLAINTEXT = """
+---
+github.com/sudoxnym/connectd (main repo)

-        if github_score > 0:
-            activity_scores['github_issue'] = {
-                'score': github_score,
-                'info': f"{github_username}/{top_repos[0]['name']}" if top_repos else github_username
-            }
-
-    # mastodon activity
-    mastodon_handle = human.get('username') if human.get('platform') == 'mastodon' else (extra.get('mastodon') or contact.get('mastodon'))
-    if mastodon_handle:
-        mastodon_score = 0
-        statuses_count = extra.get('mastodon_statuses', 0) or human.get('statuses_count', 0)
-
-        # high post count = active user
-        if statuses_count > 1000:
-            mastodon_score += 30
-        elif statuses_count > 500:
-            mastodon_score += 20
-        elif statuses_count > 100:
-            mastodon_score += 10
-        elif statuses_count > 0:
-            mastodon_score += 5
-
-        # platform bonus for fediverse (values-aligned)
-        mastodon_score += 10
-
-        # bonus if handle was discovered via rel="me" or similar verification
-        # (having a handle linked from their website = they want to be contacted there)
-        handles = extra.get('handles', {})
-        if handles.get('mastodon') == mastodon_handle:
-            mastodon_score += 15  # verified handle bonus
-
-        if mastodon_score > 0:
-            activity_scores['mastodon'] = {'score': mastodon_score, 'info': mastodon_handle}
-
-    # bluesky activity
-    bluesky_handle = human.get('username') if human.get('platform') == 'bluesky' else (extra.get('bluesky') or contact.get('bluesky'))
-    if bluesky_handle:
-        bluesky_score = 0
-        posts_count = extra.get('bluesky_posts', 0) or human.get('posts_count', 0)
-
-        if posts_count > 500:
-            bluesky_score += 25
-        elif posts_count > 100:
-            bluesky_score += 15
-        elif posts_count > 0:
-            bluesky_score += 5
-
-        # newer platform, slightly lower weight
-        bluesky_score += 5
-
-        if bluesky_score > 0:
-            activity_scores['bluesky'] = {'score': bluesky_score, 'info': bluesky_handle}
-
-    # twitter activity
-    twitter_handle = extra.get('twitter') or contact.get('twitter')
-    if twitter_handle:
-        twitter_score = 0
-        tweets_count = extra.get('twitter_tweets', 0)
-
-        if tweets_count > 1000:
-            twitter_score += 20
-        elif tweets_count > 100:
-            twitter_score += 10
-        elif tweets_count > 0:
-            twitter_score += 5
-
-        # if we found them via twitter hashtags, they're active there
-        if human.get('platform') == 'twitter':
-            twitter_score += 15
-
-        if twitter_score > 0:
-            activity_scores['twitter'] = {'score': twitter_score, 'info': twitter_handle}
-
-    # NOTE: reddit is DISCOVERY ONLY, not a contact method
-    # we find users on reddit but reach out via their external links (github, mastodon, etc.)
-    # reddit-only users go to manual_queue for review
-
-    # lobsters activity
-    lobsters_username = extra.get('lobsters') or contact.get('lobsters')
-    if lobsters_username or human.get('platform') == 'lobsters':
-        lobsters_score = 0
-        lobsters_username = lobsters_username or human.get('username')
-
-        karma = extra.get('lobsters_karma', 0) or human.get('karma', 0)
-
-        # lobsters is invite-only, high signal
-        lobsters_score += 15
-
-        if karma > 100:
-            lobsters_score += 15
-        elif karma > 50:
-            lobsters_score += 10
-        elif karma > 0:
-            lobsters_score += 5
-
-        if lobsters_score > 0:
-            activity_scores['lobsters'] = {'score': lobsters_score, 'info': lobsters_username}
-
-    # matrix activity
-    matrix_id = extra.get('matrix') or contact.get('matrix')
-    if matrix_id:
-        matrix_score = 0
-
-        # matrix users are typically privacy-conscious and technical
-        matrix_score += 15  # platform bonus for decentralized chat
-
-        # bonus if handle was discovered via rel="me" verification
-        handles = extra.get('handles', {})
-        if handles.get('matrix') == matrix_id:
-            matrix_score += 10  # verified handle bonus
-
-        if matrix_score > 0:
-            activity_scores['matrix'] = {'score': matrix_score, 'info': matrix_id}
-
-    # lemmy activity (fediverse)
-    lemmy_username = human.get('username') if human.get('platform') == 'lemmy' else extra.get('lemmy')
-    if lemmy_username:
-        lemmy_score = 0
-
-        # lemmy is fediverse - high values alignment
-        lemmy_score += 20  # fediverse platform bonus
-
-        post_count = extra.get('post_count', 0)
-        comment_count = extra.get('comment_count', 0)
-
-        if post_count > 100:
-            lemmy_score += 15
-        elif post_count > 50:
-            lemmy_score += 10
-        elif post_count > 10:
-            lemmy_score += 5
-
-        if comment_count > 500:
-            lemmy_score += 10
-        elif comment_count > 100:
-            lemmy_score += 5
-
-        if lemmy_score > 0:
-            activity_scores['lemmy'] = {'score': lemmy_score, 'info': lemmy_username}
-
-    # pick highest activity platform
-    if activity_scores:
-        best_platform = max(activity_scores.items(), key=lambda x: x[1]['score'])
-        return best_platform[0], best_platform[1]['info']
-
-    # fall back to email ONLY if no social activity detected
-    email = extra.get('email') or contact.get('email')
-    # also check emails list
-    if not email:
-        emails = extra.get('emails') or contact.get('emails') or []
-        for e in emails:
-            if e and '@' in e and 'noreply' not in e.lower():
-                email = e
-                break
-
-    if email and '@' in email and 'noreply' not in email.lower():
-        return 'email', email
-
-    # last resort: manual
-    return 'manual', None
+github: github.com/connectd-daemon
+mastodon: @connectd@mastodon.sudoxreboot.com
+bluesky: connectd.bsky.social
+lemmy: lemmy.sudoxreboot.com/c/connectd
+discord: discord.gg/connectd
+matrix: @connectd:sudoxreboot.com
+reddit: reddit.com/r/connectd
+email: connectd@sudoxreboot.com
+"""


-def draft_intro_with_llm(match_data, recipient='a', dry_run=False):
+def draft_intro_with_llm(match_data: dict, recipient: str = 'a', dry_run: bool = True):
    """
-    use groq llama 4 maverick to draft a personalized intro
+    draft an intro message using groq llm.
    
-    match_data should contain:
-    - human_a: the first person
-    - human_b: the second person
-    - overlap_score: numeric score
-    - overlap_reasons: list of why they match
+    args:
+        match_data: dict with human_a, human_b, overlap_score, overlap_reasons
+        recipient: 'a' or 'b' - who receives the message
+        dry_run: if True, preview mode
    
-    recipient: 'a' or 'b' - who we're writing to
+    returns:
+        tuple (result_dict, error_string)
+        result_dict has: subject, draft_html, draft_plain
    """
-    if not GROQ_API_KEY:
+    if not client:
        return None, "GROQ_API_KEY not set"
    
-    # determine recipient and other person
-    if recipient == 'a':
-        to_person = match_data.get('human_a', {})
-        other_person = match_data.get('human_b', {})
-    else:
-        to_person = match_data.get('human_b', {})
-        other_person = match_data.get('human_a', {})
-
-    # build context
-    to_name = to_person.get('name') or to_person.get('username', 'friend')
-    other_name = other_person.get('name') or other_person.get('username', 'someone')
-
-    to_signals = to_person.get('signals', [])
-    if isinstance(to_signals, str):
-        to_signals = json.loads(to_signals) if to_signals else []
-
-    other_signals = other_person.get('signals', [])
-    if isinstance(other_signals, str):
-        other_signals = json.loads(other_signals) if other_signals else []
-
-    overlap_reasons = match_data.get('overlap_reasons', [])
-    if isinstance(overlap_reasons, str):
-        overlap_reasons = json.loads(overlap_reasons) if overlap_reasons else []
-
-    # parse extra data
-    to_extra = to_person.get('extra', {})
-    other_extra = other_person.get('extra', {})
-    if isinstance(to_extra, str):
-        to_extra = json.loads(to_extra) if to_extra else {}
-    if isinstance(other_extra, str):
-        other_extra = json.loads(other_extra) if other_extra else {}
-
-    # build profile summaries
-    to_profile = f"""
-name: {to_name}
-platform: {to_person.get('platform', 'unknown')}
-bio: {to_person.get('bio') or 'no bio'}
-location: {to_person.get('location') or 'unknown'}
-signals: {', '.join(to_signals[:8])}
-repos: {len(to_extra.get('top_repos', []))} public repos
-languages: {', '.join(to_extra.get('languages', {}).keys())}
-"""
-
-    other_profile = f"""
-name: {other_name}
-platform: {other_person.get('platform', 'unknown')}
-bio: {other_person.get('bio') or 'no bio'}
-location: {other_person.get('location') or 'unknown'}
-signals: {', '.join(other_signals[:8])}
-repos: {len(other_extra.get('top_repos', []))} public repos
-languages: {', '.join(other_extra.get('languages', {}).keys())}
-url: {other_person.get('url', '')}
-"""
-
-    # build prompt
-    system_prompt = """you are connectd, an ai that connects isolated builders who share values but don't know each other yet.
-
-your job is to write a short, genuine intro message to one person about another person they might want to know.
-
-rules:
- be brief (3-5 sentences max)
- be genuine, not salesy or fake
- focus on WHY they might want to connect, not just WHAT they have in common
- don't be cringe or use buzzwords
- lowercase preferred (casual tone)
- no emojis unless the person's profile suggests they'd like them
- mention specific things from their profiles, not generic "you both like open source"
- end with a simple invitation, not a hard sell
- sign off as "- connectd" (lowercase)
-
-bad examples:
- "I noticed you're both passionate about..." (too formal)
- "You two would be PERFECT for each other!" (too salesy)
- "As a fellow privacy enthusiast..." (cringe)
-
-good examples:
- "hey, saw you're building X. there's someone else working on similar stuff in Y who might be interesting to know."
- "you might want to check out Z's work on federated systems - similar approach to what you're doing with A."
-"""
-
-    user_prompt = f"""write an intro message to {to_name} about {other_name}.
-
-RECIPIENT ({to_name}):
-{to_profile}
-
-INTRODUCING ({other_name}):
-{other_profile}
-
-WHY THEY MATCH (overlap score {match_data.get('overlap_score', 0)}):
-{', '.join(overlap_reasons[:5])}
-
-write a short intro message. remember: lowercase, genuine, not salesy."""
-
    try:
-        response = requests.post(
-            GROQ_API_URL,
-            headers={
-                'Authorization': f'Bearer {GROQ_API_KEY}',
-                'Content-Type': 'application/json',
-            },
-            json={
-                'model': MODEL,
-                'messages': [
-                    {'role': 'system', 'content': system_prompt},
-                    {'role': 'user', 'content': user_prompt},
-                ],
-                'temperature': 0.7,
-                'max_tokens': 300,
-            },
-            timeout=30,
+        human_a = match_data.get('human_a', {})
+        human_b = match_data.get('human_b', {})
+        reasons = match_data.get('overlap_reasons', [])
+        
+        # recipient gets the message, about_person is who we're introducing them to
+        if recipient == 'a':
+            to_person = human_a
+            about_person = human_b
+        else:
+            to_person = human_b
+            about_person = human_a
+        
+        to_name = to_person.get('username', 'friend')
+        about_name = about_person.get('username', 'someone')
+        about_bio = about_person.get('extra', {}).get('bio', '')
+        
+        # extract contact info for about_person
+        about_extra = about_person.get('extra', {})
+        if isinstance(about_extra, str):
+            import json as _json
+            about_extra = _json.loads(about_extra) if about_extra else {}
+        about_contact = about_person.get('contact', {})
+        if isinstance(about_contact, str):
+            about_contact = _json.loads(about_contact) if about_contact else {}
+        
+        # build contact link for about_person
+        about_platform = about_person.get('platform', '')
+        about_username = about_person.get('username', '')
+        contact_link = None
+        if about_platform == 'mastodon' and about_username:
+            if '@' in about_username:
+                parts = about_username.split('@')
+                if len(parts) >= 2:
+                    contact_link = f"https://{parts[1]}/@{parts[0]}"
+        elif about_platform == 'github' and about_username:
+            contact_link = f"https://github.com/{about_username}"
+        elif about_extra.get('mastodon') or about_contact.get('mastodon'):
+            handle = about_extra.get('mastodon') or about_contact.get('mastodon')
+            if '@' in handle:
+                parts = handle.lstrip('@').split('@')
+                if len(parts) >= 2:
+                    contact_link = f"https://{parts[1]}/@{parts[0]}"
+        elif about_extra.get('github') or about_contact.get('github'):
+            contact_link = f"https://github.com/{about_extra.get('github') or about_contact.get('github')}"
+        elif about_extra.get('email'):
+            contact_link = about_extra['email']
+        elif about_contact.get('email'):
+            contact_link = about_contact['email']
+        elif about_extra.get('website'):
+            contact_link = about_extra['website']
+        elif about_extra.get('external_links', {}).get('website'):
+            contact_link = about_extra['external_links']['website']
+        elif about_extra.get('extra', {}).get('website'):
+            contact_link = about_extra['extra']['website']
+        elif about_platform == 'reddit' and about_username:
+            contact_link = f"reddit.com/u/{about_username}"
+        
+        if not contact_link:
+            contact_link = f"github.com/{about_username}" if about_username else "reach out via connectd"
+        
+        # skip if no real contact method (just reddit or generic)
+        if contact_link.startswith('reddit.com') or contact_link == "reach out via connectd" or 'stackblitz' in contact_link:
+            return None, f"no real contact info for {about_name} - skipping draft"
+        
+        # format the shared factors naturally
+        if reasons:
+            factor = ', '.join(reasons[:3]) if len(reasons) > 1 else reasons[0]
+        else:
+            factor = "shared values and interests"
+        
+        # load soul as guideline
+        soul = load_soul()
+        if not soul:
+            return None, "could not load soul file"
+        
+        # build the prompt - soul is GUIDELINE not script
+        prompt = f"""you are connectd, a daemon that finds isolated builders and connects them.
+
+write a personal message TO {to_name} telling them about {about_name}.
+
+here is the soul/spirit of what connectd is about - use this as a GUIDELINE for tone and message, NOT as a script to copy verbatim:
+
+---
+{soul}
+---
+
+key facts for this message:
+- recipient: {to_name}
+- introducing them to: {about_name}
+- their shared interests/values: {factor}
+- about {about_name}: {about_bio if about_bio else 'a builder like you'}
+- HOW TO REACH {about_name}: {contact_link}
+
+RULES:
+1. say their name ONCE at start, then use "you" 
+2. MUST include how to reach {about_name}: {contact_link}
+3. lowercase, raw, emotional - follow the soul
+4. end with the contact link
+
+return ONLY the message body. signature is added separately."""
+
+        response = client.chat.completions.create(
+            model=GROQ_MODEL,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.6,
+            max_tokens=1200
        )
        
-        if response.status_code != 200:
-            return None, f"groq api error: {response.status_code} - {response.text}"
+        body = response.choices[0].message.content.strip()
        
-        data = response.json()
-        draft = data['choices'][0]['message']['content'].strip()
+        # generate subject
+        subject_prompt = f"""generate a short, lowercase email subject for a message to {to_name} about connecting them with {about_name} over their shared interest in {factor}.

-        # determine contact method for recipient
-        contact_method, contact_info = determine_contact_method(to_person)
+no corporate speak. no clickbait. raw and real.
+examples:
+- "found you, {to_name}"
+- "you're not alone"
+- "a door just opened"
+- "{to_name}, there's someone you should meet"
+
+return ONLY the subject line."""
+
+        subject_response = client.chat.completions.create(
+            model=GROQ_MODEL,
+            messages=[{"role": "user", "content": subject_prompt}],
+            temperature=0.9,
+            max_tokens=50
+        )
+        
+        subject = subject_response.choices[0].message.content.strip().strip('"').strip("'")
+        
+        # format html
+        draft_html = f"<div style='font-family: monospace; white-space: pre-wrap; color: #e0e0e0; background: #1a1a1a; padding: 20px;'>{body}</div>{SIGNATURE_HTML}"
+        draft_plain = body + SIGNATURE_PLAINTEXT
        
        return {
-            'draft': draft,
-            'model': MODEL,
-            'to': to_name,
-            'about': other_name,
-            'overlap_score': match_data.get('overlap_score', 0),
-            'contact_method': contact_method,
-            'contact_info': contact_info,
-            'generated_at': datetime.now().isoformat(),
+            'subject': subject,
+            'draft_html': draft_html,
+            'draft_plain': draft_plain
        }, None
        
    except Exception as e:
-        return None, f"groq error: {str(e)}"
+        return None, str(e)


-def draft_intro_batch(matches, dry_run=False):
-    """
-    draft intros for multiple matches
-    returns list of (match, intro_result, error) tuples
-    """
-    results = []
-
-    for match in matches:
-        # draft for both directions
-        intro_a, err_a = draft_intro_with_llm(match, recipient='a', dry_run=dry_run)
-        intro_b, err_b = draft_intro_with_llm(match, recipient='b', dry_run=dry_run)
-
-        results.append({
-            'match': match,
-            'intro_to_a': intro_a,
-            'intro_to_b': intro_b,
-            'errors': [err_a, err_b],
-        })
-
-    return results
+# for backwards compat with old code
+def draft_message(person: dict, factor: str, platform: str = "email") -> dict:
+    """legacy function - wraps new api"""
+    match_data = {
+        'human_a': {'username': 'recipient'},
+        'human_b': person,
+        'overlap_reasons': [factor]
+    }
+    result, error = draft_intro_with_llm(match_data, recipient='a')
+    if error:
+        raise ValueError(error)
+    return {
+        'subject': result['subject'],
+        'body_html': result['draft_html'],
+        'body_plain': result['draft_plain']
+    }


-def test_groq_connection():
-    """test that groq api is working"""
-    if not GROQ_API_KEY:
-        return False, "GROQ_API_KEY not set"
-
-    try:
-        response = requests.post(
-            GROQ_API_URL,
-            headers={
-                'Authorization': f'Bearer {GROQ_API_KEY}',
-                'Content-Type': 'application/json',
-            },
-            json={
-                'model': MODEL,
-                'messages': [{'role': 'user', 'content': 'say "ok" and nothing else'}],
-                'max_tokens': 10,
-            },
-            timeout=10,
-        )
-
-        if response.status_code == 200:
-            return True, "groq api working"
+if __name__ == "__main__":
+    # test
+    test_data = {
+        'human_a': {'username': 'sudoxnym', 'extra': {'bio': 'building intentional communities'}},
+        'human_b': {'username': 'testuser', 'extra': {'bio': 'home assistant enthusiast'}},
+        'overlap_reasons': ['home-assistant', 'open source', 'community building']
+    }
+    result, error = draft_intro_with_llm(test_data, recipient='a')
+    if error:
+        print(f"error: {error}")
    else:
-            return False, f"groq api error: {response.status_code}"
+        print(f"subject: {result['subject']}")
+        print(f"\nbody:\n{result['draft_plain']}")

-    except Exception as e:
-        return False, f"groq connection error: {str(e)}"
+# contact method ranking - USAGE BASED
+# we rank by where the person is MOST ACTIVE, not by our preference
+
+def determine_contact_method(human):
+    """
+    determine ALL available contact methods, ranked by USER'S ACTIVITY.
+    
+    looks at activity metrics to decide where they're most engaged.
+    returns: (best_method, best_info, fallbacks)
+    where fallbacks is a list of (method, info) tuples in activity order
+    """
+    import json
+    
+    extra = human.get('extra', {})
+    contact = human.get('contact', {})
+    
+    if isinstance(extra, str):
+        extra = json.loads(extra) if extra else {}
+    if isinstance(contact, str):
+        contact = json.loads(contact) if contact else {}
+    
+    nested_extra = extra.get('extra', {})
+    platform = human.get('platform', '')
+    
+    available = []
+    
+    # === ACTIVITY SCORING ===
+    # each method gets scored by how active the user is there
+    
+    # EMAIL - always medium priority (we cant measure activity)
+    email = extra.get('email') or contact.get('email') or nested_extra.get('email')
+    if email and '@' in str(email):
+        available.append(('email', email, 50))  # baseline score
+    
+    # MASTODON - score by post count / followers
+    mastodon = extra.get('mastodon') or contact.get('mastodon') or nested_extra.get('mastodon')
+    if mastodon:
+        masto_activity = extra.get('mastodon_posts', 0) or extra.get('statuses_count', 0)
+        masto_score = min(100, 30 + (masto_activity // 10))  # 30 base + 1 per 10 posts
+        available.append(('mastodon', mastodon, masto_score))
+    
+    # if they CAME FROM mastodon, thats their primary
+    if platform == 'mastodon':
+        handle = f"@{human.get('username')}"
+        instance = human.get('instance') or extra.get('instance') or ''
+        if instance:
+            handle = f"@{human.get('username')}@{instance}"
+        activity = extra.get('statuses_count', 0) or extra.get('activity_count', 0)
+        score = min(100, 50 + (activity // 5))  # higher base since its their home
+        # dont dupe
+        if not any(a[0] == 'mastodon' for a in available):
+            available.append(('mastodon', handle, score))
+        else:
+            # update score if this is higher
+            for i, (m, info, s) in enumerate(available):
+                if m == 'mastodon' and score > s:
+                    available[i] = ('mastodon', handle, score)
+    
+    # MATRIX - score by presence (binary for now)
+    matrix = extra.get('matrix') or contact.get('matrix') or nested_extra.get('matrix')
+    if matrix and ':' in str(matrix):
+        available.append(('matrix', matrix, 40))
+    
+    # BLUESKY - score by followers/posts if available
+    bluesky = extra.get('bluesky') or contact.get('bluesky') or nested_extra.get('bluesky')
+    if bluesky:
+        bsky_activity = extra.get('bluesky_posts', 0)
+        bsky_score = min(100, 25 + (bsky_activity // 10))
+        available.append(('bluesky', bluesky, bsky_score))
+    
+    # LEMMY - score by activity
+    lemmy = extra.get('lemmy') or contact.get('lemmy') or nested_extra.get('lemmy')
+    if lemmy:
+        lemmy_activity = extra.get('lemmy_posts', 0) or extra.get('lemmy_comments', 0)
+        lemmy_score = min(100, 30 + lemmy_activity)
+        available.append(('lemmy', lemmy, lemmy_score))
+    
+    if platform == 'lemmy':
+        handle = human.get('username')
+        activity = extra.get('activity_count', 0)
+        score = min(100, 50 + activity)
+        if not any(a[0] == 'lemmy' for a in available):
+            available.append(('lemmy', handle, score))
+    
+    # DISCORD - lower priority (hard to DM)
+    discord = extra.get('discord') or contact.get('discord') or nested_extra.get('discord')
+    if discord:
+        available.append(('discord', discord, 20))
+    
+    # GITHUB ISSUE - for github users, score by repo activity
+    if platform == 'github':
+        top_repos = extra.get('top_repos', [])
+        if top_repos:
+            repo = top_repos[0] if isinstance(top_repos[0], str) else top_repos[0].get('name', '')
+            stars = extra.get('total_stars', 0)
+            repos_count = extra.get('repos_count', 0)
+            # active github user = higher issue score
+            gh_score = min(60, 20 + (stars // 100) + (repos_count // 5))
+            if repo:
+                available.append(('github_issue', f"{human.get('username')}/{repo}", gh_score))
+
+    # FORGE ISSUE - for self-hosted git users (gitea/forgejo/gitlab/sourcehut/codeberg)
+    # these are HIGH SIGNAL users - they actually selfhost
+    if platform and ':' in platform:
+        platform_type, instance = platform.split(':', 1)
+        if platform_type in ('gitea', 'forgejo', 'gogs', 'gitlab', 'sourcehut'):
+            repos = extra.get('repos', [])
+            if repos:
+                repo = repos[0] if isinstance(repos[0], str) else repos[0].get('name', '')
+                instance_url = extra.get('instance_url', '')
+                if repo and instance_url:
+                    # forge users get higher priority than github (they selfhost!)
+                    forge_score = 55  # higher than github_issue (50)
+                    available.append(('forge_issue', {
+                        'platform_type': platform_type,
+                        'instance': instance,
+                        'instance_url': instance_url,
+                        'owner': human.get('username'),
+                        'repo': repo
+                    }, forge_score))
+    
+    # REDDIT - discovered people, use their other links
+    if platform == 'reddit':
+        reddit_activity = extra.get('reddit_activity', 0) or extra.get('activity_count', 0)
+        # reddit users we reach via their external links (email, mastodon, etc)
+        # boost their other methods if reddit is their main platform
+        for i, (m, info, score) in enumerate(available):
+            if m in ('email', 'mastodon', 'matrix', 'bluesky'):
+                # boost score for reddit-discovered users' external contacts
+                boost = min(30, reddit_activity // 3)
+                available[i] = (m, info, score + boost)
+    
+    # sort by activity score (highest first)
+    available.sort(key=lambda x: x[2], reverse=True)
+    
+    if not available:
+        return 'manual', None, []
+    
+    best = available[0]
+    fallbacks = [(m, i) for m, i, p in available[1:]]
+    
+    return best[0], best[1], fallbacks
+
+
+def get_ranked_contact_methods(human):
+    """
+    get all contact methods for a human, ranked by their activity.
+    """
+    method, info, fallbacks = determine_contact_method(human)
+    if method == 'manual':
+        return []
+    return [(method, info)] + fallbacks
--- a/introd/deliver.py
+++ b/introd/deliver.py
@ -334,18 +334,24 @@ def determine_best_contact(human):
    """
    determine best contact method based on WHERE THEY'RE MOST ACTIVE
    
-    uses activity-based selection from groq_draft module
+    returns: (method, info, fallbacks)
+    uses activity-based selection - ranks by user's actual usage
    """
    from introd.groq_draft import determine_contact_method as activity_based_contact
    
-    method, info = activity_based_contact(human)
+    method, info, fallbacks = activity_based_contact(human)
    
    # convert github_issue info to dict format for delivery
-    if method == 'github_issue' and isinstance(info, str) and '/' in info:
-        parts = info.split('/', 1)
-        return method, {'owner': parts[0], 'repo': parts[1]}
+    def format_info(m, i):
+        if m == 'github_issue' and isinstance(i, str) and '/' in i:
+            parts = i.split('/', 1)
+            return {'owner': parts[0], 'repo': parts[1]}
+        return i
    
-    return method, info
+    info = format_info(method, info)
+    fallbacks = [(m, format_info(m, i)) for m, i in fallbacks]
+    
+    return method, info, fallbacks


 def deliver_intro(match_data, intro_draft, dry_run=False):
@ -362,8 +368,8 @@ def deliver_intro(match_data, intro_draft, dry_run=False):
    if already_contacted(recipient_id):
        return False, "already contacted", None

-    # determine contact method
-    method, contact_info = determine_best_contact(recipient)
+    # determine contact method with fallbacks
+    method, contact_info, fallbacks = determine_best_contact(recipient)

    log = load_delivery_log()
    result = {
@ -423,9 +429,60 @@ def deliver_intro(match_data, intro_draft, dry_run=False):
        success = True
        error = "added to manual queue"

+    # if failed and we have fallbacks, try them
+    if not success and fallbacks:
+        for fallback_method, fallback_info in fallbacks:
+            result['fallback_attempts'] = result.get('fallback_attempts', [])
+            result['fallback_attempts'].append({
+                'method': fallback_method,
+                'contact_info': fallback_info
+            })
+            
+            fb_success = False
+            fb_error = None
+            
+            if fallback_method == 'email':
+                subject = f"someone you might want to know - connectd"
+                fb_success, fb_error = send_email(fallback_info, subject, intro_draft, dry_run)
+            elif fallback_method == 'mastodon':
+                fb_success, fb_error = send_mastodon_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'bluesky':
+                fb_success, fb_error = send_bluesky_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'matrix':
+                fb_success, fb_error = send_matrix_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'lemmy':
+                from scoutd.lemmy import send_lemmy_dm
+                fb_success, fb_error = send_lemmy_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'discord':
+                from scoutd.discord import send_discord_dm
+                fb_success, fb_error = send_discord_dm(fallback_info, intro_draft, dry_run)
+            elif fallback_method == 'github_issue':
+                owner = fallback_info.get('owner')
+                repo = fallback_info.get('repo')
+                title = "community introduction from connectd"
+                github_body = f"""hey {recipient.get('name') or recipient.get('username')},
+
+{intro_draft}
+
+---
+*automated introduction from connectd*
+"""
+                fb_success, fb_error = create_github_issue(owner, repo, title, github_body, dry_run)
+            
+            if fb_success:
+                success = True
+                method = fallback_method
+                contact_info = fallback_info
+                error = None
+                result['fallback_succeeded'] = fallback_method
+                break
+            else:
+                result['fallback_attempts'][-1]['error'] = fb_error
+    
    # log result
    result['success'] = success
    result['error'] = error
+    result['final_method'] = method
    
    if success:
        log['sent'].append(result)
--- a/introd/groq_draft.py
+++ b/introd/groq_draft.py
@ -104,6 +104,54 @@ def draft_intro_with_llm(match_data: dict, recipient: str = 'a', dry_run: bool =
        about_name = about_person.get('username', 'someone')
        about_bio = about_person.get('extra', {}).get('bio', '')
        
+        # extract contact info for about_person
+        about_extra = about_person.get('extra', {})
+        if isinstance(about_extra, str):
+            import json as _json
+            about_extra = _json.loads(about_extra) if about_extra else {}
+        about_contact = about_person.get('contact', {})
+        if isinstance(about_contact, str):
+            about_contact = _json.loads(about_contact) if about_contact else {}
+        
+        # build contact link for about_person
+        about_platform = about_person.get('platform', '')
+        about_username = about_person.get('username', '')
+        contact_link = None
+        if about_platform == 'mastodon' and about_username:
+            if '@' in about_username:
+                parts = about_username.split('@')
+                if len(parts) >= 2:
+                    contact_link = f"https://{parts[1]}/@{parts[0]}"
+        elif about_platform == 'github' and about_username:
+            contact_link = f"https://github.com/{about_username}"
+        elif about_extra.get('mastodon') or about_contact.get('mastodon'):
+            handle = about_extra.get('mastodon') or about_contact.get('mastodon')
+            if '@' in handle:
+                parts = handle.lstrip('@').split('@')
+                if len(parts) >= 2:
+                    contact_link = f"https://{parts[1]}/@{parts[0]}"
+        elif about_extra.get('github') or about_contact.get('github'):
+            contact_link = f"https://github.com/{about_extra.get('github') or about_contact.get('github')}"
+        elif about_extra.get('email'):
+            contact_link = about_extra['email']
+        elif about_contact.get('email'):
+            contact_link = about_contact['email']
+        elif about_extra.get('website'):
+            contact_link = about_extra['website']
+        elif about_extra.get('external_links', {}).get('website'):
+            contact_link = about_extra['external_links']['website']
+        elif about_extra.get('extra', {}).get('website'):
+            contact_link = about_extra['extra']['website']
+        elif about_platform == 'reddit' and about_username:
+            contact_link = f"reddit.com/u/{about_username}"
+        
+        if not contact_link:
+            contact_link = f"github.com/{about_username}" if about_username else "reach out via connectd"
+        
+        # skip if no real contact method (just reddit or generic)
+        if contact_link.startswith('reddit.com') or contact_link == "reach out via connectd" or 'stackblitz' in contact_link:
+            return None, f"no real contact info for {about_name} - skipping draft"
+        
        # format the shared factors naturally
        if reasons:
            factor = ', '.join(reasons[:3]) if len(reasons) > 1 else reasons[0]
@ -131,24 +179,20 @@ key facts for this message:
 - introducing them to: {about_name}
 - their shared interests/values: {factor}
 - about {about_name}: {about_bio if about_bio else 'a builder like you'}
+- HOW TO REACH {about_name}: {contact_link}

-CRITICAL RULES - DO NOT SANITIZE:
-1. use their name ({to_name}) throughout - make it personal
-2. format the shared factor naturally like "your shared interest in X and Y" or "your work on X"
-3. this message is TO {to_name} ABOUT {about_name} - don't confuse this
-4. keep it lowercase, raw, emotional, real
-5. no corporate speak, no "best regards", no "hi there"
-6. DO NOT water down the message - keep the raw emotional energy
-7. address them directly, call them to action
-8. make them feel seen and not alone
-9. end with rallying energy about building together
+RULES:
+1. say their name ONCE at start, then use "you" 
+2. MUST include how to reach {about_name}: {contact_link}
+3. lowercase, raw, emotional - follow the soul
+4. end with the contact link

 return ONLY the message body. signature is added separately."""

        response = client.chat.completions.create(
            model=GROQ_MODEL,
            messages=[{"role": "user", "content": prompt}],
-            temperature=0.8,
+            temperature=0.6,
            max_tokens=1200
        )
        
@ -220,3 +264,136 @@ if __name__ == "__main__":
    else:
        print(f"subject: {result['subject']}")
        print(f"\nbody:\n{result['draft_plain']}")
+
+# contact method ranking - USAGE BASED
+# we rank by where the person is MOST ACTIVE, not by our preference
+
+def determine_contact_method(human):
+    """
+    determine ALL available contact methods, ranked by USER'S ACTIVITY.
+    
+    looks at activity metrics to decide where they're most engaged.
+    returns: (best_method, best_info, fallbacks)
+    where fallbacks is a list of (method, info) tuples in activity order
+    """
+    import json
+    
+    extra = human.get('extra', {})
+    contact = human.get('contact', {})
+    
+    if isinstance(extra, str):
+        extra = json.loads(extra) if extra else {}
+    if isinstance(contact, str):
+        contact = json.loads(contact) if contact else {}
+    
+    nested_extra = extra.get('extra', {})
+    platform = human.get('platform', '')
+    
+    available = []
+    
+    # === ACTIVITY SCORING ===
+    # each method gets scored by how active the user is there
+    
+    # EMAIL - always medium priority (we cant measure activity)
+    email = extra.get('email') or contact.get('email') or nested_extra.get('email')
+    if email and '@' in str(email):
+        available.append(('email', email, 50))  # baseline score
+    
+    # MASTODON - score by post count / followers
+    mastodon = extra.get('mastodon') or contact.get('mastodon') or nested_extra.get('mastodon')
+    if mastodon:
+        masto_activity = extra.get('mastodon_posts', 0) or extra.get('statuses_count', 0)
+        masto_score = min(100, 30 + (masto_activity // 10))  # 30 base + 1 per 10 posts
+        available.append(('mastodon', mastodon, masto_score))
+    
+    # if they CAME FROM mastodon, thats their primary
+    if platform == 'mastodon':
+        handle = f"@{human.get('username')}"
+        instance = human.get('instance') or extra.get('instance') or ''
+        if instance:
+            handle = f"@{human.get('username')}@{instance}"
+        activity = extra.get('statuses_count', 0) or extra.get('activity_count', 0)
+        score = min(100, 50 + (activity // 5))  # higher base since its their home
+        # dont dupe
+        if not any(a[0] == 'mastodon' for a in available):
+            available.append(('mastodon', handle, score))
+        else:
+            # update score if this is higher
+            for i, (m, info, s) in enumerate(available):
+                if m == 'mastodon' and score > s:
+                    available[i] = ('mastodon', handle, score)
+    
+    # MATRIX - score by presence (binary for now)
+    matrix = extra.get('matrix') or contact.get('matrix') or nested_extra.get('matrix')
+    if matrix and ':' in str(matrix):
+        available.append(('matrix', matrix, 40))
+    
+    # BLUESKY - score by followers/posts if available
+    bluesky = extra.get('bluesky') or contact.get('bluesky') or nested_extra.get('bluesky')
+    if bluesky:
+        bsky_activity = extra.get('bluesky_posts', 0)
+        bsky_score = min(100, 25 + (bsky_activity // 10))
+        available.append(('bluesky', bluesky, bsky_score))
+    
+    # LEMMY - score by activity
+    lemmy = extra.get('lemmy') or contact.get('lemmy') or nested_extra.get('lemmy')
+    if lemmy:
+        lemmy_activity = extra.get('lemmy_posts', 0) or extra.get('lemmy_comments', 0)
+        lemmy_score = min(100, 30 + lemmy_activity)
+        available.append(('lemmy', lemmy, lemmy_score))
+    
+    if platform == 'lemmy':
+        handle = human.get('username')
+        activity = extra.get('activity_count', 0)
+        score = min(100, 50 + activity)
+        if not any(a[0] == 'lemmy' for a in available):
+            available.append(('lemmy', handle, score))
+    
+    # DISCORD - lower priority (hard to DM)
+    discord = extra.get('discord') or contact.get('discord') or nested_extra.get('discord')
+    if discord:
+        available.append(('discord', discord, 20))
+    
+    # GITHUB ISSUE - for github users, score by repo activity
+    if platform == 'github':
+        top_repos = extra.get('top_repos', [])
+        if top_repos:
+            repo = top_repos[0] if isinstance(top_repos[0], str) else top_repos[0].get('name', '')
+            stars = extra.get('total_stars', 0)
+            repos_count = extra.get('repos_count', 0)
+            # active github user = higher issue score
+            gh_score = min(60, 20 + (stars // 100) + (repos_count // 5))
+            if repo:
+                available.append(('github_issue', f"{human.get('username')}/{repo}", gh_score))
+    
+    # REDDIT - discovered people, use their other links
+    if platform == 'reddit':
+        reddit_activity = extra.get('reddit_activity', 0) or extra.get('activity_count', 0)
+        # reddit users we reach via their external links (email, mastodon, etc)
+        # boost their other methods if reddit is their main platform
+        for i, (m, info, score) in enumerate(available):
+            if m in ('email', 'mastodon', 'matrix', 'bluesky'):
+                # boost score for reddit-discovered users' external contacts
+                boost = min(30, reddit_activity // 3)
+                available[i] = (m, info, score + boost)
+    
+    # sort by activity score (highest first)
+    available.sort(key=lambda x: x[2], reverse=True)
+    
+    if not available:
+        return 'manual', None, []
+    
+    best = available[0]
+    fallbacks = [(m, i) for m, i, p in available[1:]]
+    
+    return best[0], best[1], fallbacks
+
+
+def get_ranked_contact_methods(human):
+    """
+    get all contact methods for a human, ranked by their activity.
+    """
+    method, info, fallbacks = determine_contact_method(human)
+    if method == 'manual':
+        return []
+    return [(method, info)] + fallbacks
--- a/matchd/overlap.py
+++ b/matchd/overlap.py
@ -1,15 +1,20 @@
 """
 matchd/overlap.py - find pairs with alignment
+
+CRITICAL: blocks users with disqualifying negative signals (maga, conspiracy, conservative)
 """

 import json
 from .fingerprint import fingerprint_similarity

+# signals that HARD BLOCK matching - no exceptions
+DISQUALIFYING_SIGNALS = {'maga', 'conspiracy', 'conservative', 'antivax', 'sovcit'}
+

 def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
    """
    analyze overlap between two humans
-    returns overlap details: score, shared values, complementary skills
+    returns None if either has disqualifying signals
    """
    # parse stored json if needed
    signals_a = human_a.get('signals', [])
@ -20,13 +25,49 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
    if isinstance(signals_b, str):
        signals_b = json.loads(signals_b)

+    # === HARD BLOCK: check for disqualifying negative signals ===
+    neg_a = human_a.get('negative_signals', [])
+    if isinstance(neg_a, str):
+        neg_a = json.loads(neg_a) if neg_a else []
+    
+    neg_b = human_b.get('negative_signals', [])
+    if isinstance(neg_b, str):
+        neg_b = json.loads(neg_b) if neg_b else []
+    
+    # also check 'reasons' field for WARNING entries
+    reasons_a = human_a.get('reasons', '')
+    if isinstance(reasons_a, str) and 'WARNING' in reasons_a:
+        # extract signals from WARNING: x, y, z
+        import re
+        warn_match = re.search(r'WARNING[:\s]+([^"\]]+)', reasons_a)
+        if warn_match:
+            warn_signals = [s.strip().lower() for s in warn_match.group(1).split(',')]
+            neg_a = list(set(neg_a + warn_signals))
+    
+    reasons_b = human_b.get('reasons', '')
+    if isinstance(reasons_b, str) and 'WARNING' in reasons_b:
+        import re
+        warn_match = re.search(r'WARNING[:\s]+([^"\]]+)', reasons_b)
+        if warn_match:
+            warn_signals = [s.strip().lower() for s in warn_match.group(1).split(',')]
+            neg_b = list(set(neg_b + warn_signals))
+    
+    # block if either has disqualifying signals
+    disq_a = set(neg_a) & DISQUALIFYING_SIGNALS
+    disq_b = set(neg_b) & DISQUALIFYING_SIGNALS
+    
+    if disq_a:
+        return None  # blocked
+    if disq_b:
+        return None  # blocked
+
    extra_a = human_a.get('extra', {})
    if isinstance(extra_a, str):
-        extra_a = json.loads(extra_a)
+        extra_a = json.loads(extra_a) if extra_a else {}

    extra_b = human_b.get('extra', {})
    if isinstance(extra_b, str):
-        extra_b = json.loads(extra_b)
+        extra_b = json.loads(extra_b) if extra_b else {}

    # shared signals
    shared_signals = list(set(signals_a) & set(signals_b))
@ -36,7 +77,7 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
    topics_b = set(extra_b.get('topics', []))
    shared_topics = list(topics_a & topics_b)

-    # complementary skills (what one has that the other doesn't)
+    # complementary skills
    langs_a = set(extra_a.get('languages', {}).keys())
    langs_b = set(extra_b.get('languages', {}).keys())
    complementary_langs = list((langs_a - langs_b) | (langs_b - langs_a))
@ -68,38 +109,30 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):

    # calculate overlap score
    base_score = 0
-
-    # shared values (most important)
    base_score += len(shared_signals) * 10
-
-    # shared interests
    base_score += len(shared_topics) * 5

-    # complementary skills bonus (they can help each other)
    if complementary_langs:
        base_score += min(len(complementary_langs), 5) * 3

-    # geographic bonus
    if geographic_match:
        base_score += 20

-    # fingerprint similarity if available
    fp_score = 0
    if fp_a and fp_b:
        fp_score = fingerprint_similarity(fp_a, fp_b) * 50

    total_score = base_score + fp_score

-    # build reasons
    overlap_reasons = []
    if shared_signals:
-        overlap_reasons.append(f"shared values: {', '.join(shared_signals[:5])}")
+        overlap_reasons.append(f"shared: {', '.join(shared_signals[:5])}")
    if shared_topics:
-        overlap_reasons.append(f"shared interests: {', '.join(shared_topics[:5])}")
+        overlap_reasons.append(f"interests: {', '.join(shared_topics[:5])}")
    if geo_reason:
        overlap_reasons.append(geo_reason)
    if complementary_langs:
-        overlap_reasons.append(f"complementary skills: {', '.join(complementary_langs[:5])}")
+        overlap_reasons.append(f"complementary: {', '.join(complementary_langs[:5])}")

    return {
        'overlap_score': total_score,
@ -114,36 +147,28 @@ def find_overlap(human_a, human_b, fp_a=None, fp_b=None):


 def is_same_person(human_a, human_b):
-    """
-    check if two records might be the same person (cross-platform)
-    """
-    # same platform = definitely different records
+    """check if two records might be the same person (cross-platform)"""
    if human_a['platform'] == human_b['platform']:
        return False

-    # check username similarity
    user_a = human_a.get('username', '').lower().split('@')[0]
    user_b = human_b.get('username', '').lower().split('@')[0]

    if user_a == user_b:
        return True

-    # check if github username matches
    contact_a = human_a.get('contact', {})
    contact_b = human_b.get('contact', {})

    if isinstance(contact_a, str):
-        contact_a = json.loads(contact_a)
+        contact_a = json.loads(contact_a) if contact_a else {}
    if isinstance(contact_b, str):
-        contact_b = json.loads(contact_b)
+        contact_b = json.loads(contact_b) if contact_b else {}

-    # github cross-reference
    if contact_a.get('github') and contact_a.get('github') == contact_b.get('github'):
        return True
    if contact_a.get('github') == user_b or contact_b.get('github') == user_a:
        return True
-
-    # email cross-reference
    if contact_a.get('email') and contact_a.get('email') == contact_b.get('email'):
        return True

--- a/scoutd/forges.py
+++ b/scoutd/forges.py
@ -0,0 +1,491 @@
+"""
+scoutd/forges.py - scrape self-hosted git forges
+
+these people = highest signal. they actually selfhost.
+
+supported platforms:
+- gitea (and forks like forgejo)
+- gogs
+- gitlab ce
+- sourcehut
+- codeberg (gitea-based)
+
+scrapes users AND extracts contact info for outreach.
+"""
+
+import os
+import re
+import json
+import time
+import requests
+from typing import List, Dict, Optional, Tuple
+from datetime import datetime
+
+from .signals import analyze_text
+
+# rate limiting
+REQUEST_DELAY = 1.0
+
+# known public instances to scrape
+# format: (name, url, platform_type)
+KNOWN_INSTANCES = [
+    # === PUBLIC INSTANCES ===
+    # local/private instances can be added via LOCAL_FORGE_INSTANCES env var
+    # codeberg (largest gitea instance)
+    ('codeberg', 'https://codeberg.org', 'gitea'),
+
+    # sourcehut
+    ('sourcehut', 'https://sr.ht', 'sourcehut'),
+
+    # notable gitea/forgejo instances
+    ('gitea.com', 'https://gitea.com', 'gitea'),
+    ('git.disroot.org', 'https://git.disroot.org', 'gitea'),
+    ('git.gay', 'https://git.gay', 'forgejo'),
+    ('git.envs.net', 'https://git.envs.net', 'forgejo'),
+    ('tildegit', 'https://tildegit.org', 'gitea'),
+    ('git.sr.ht', 'https://git.sr.ht', 'sourcehut'),
+
+    # gitlab ce instances
+    ('framagit', 'https://framagit.org', 'gitlab'),
+    ('gitlab.gnome.org', 'https://gitlab.gnome.org', 'gitlab'),
+    ('invent.kde.org', 'https://invent.kde.org', 'gitlab'),
+    ('salsa.debian.org', 'https://salsa.debian.org', 'gitlab'),
+]
+
+# headers
+HEADERS = {
+    'User-Agent': 'connectd/1.0 (finding builders with aligned values)',
+    'Accept': 'application/json',
+}
+
+
+def log(msg):
+    print(f"  forges: {msg}")
+
+
+# === GITEA/FORGEJO/GOGS API ===
+# these share the same API structure
+
+def scrape_gitea_users(instance_url: str, limit: int = 100) -> List[Dict]:
+    """
+    scrape users from a gitea/forgejo/gogs instance.
+    uses the explore/users page or API if available.
+    """
+    users = []
+
+    # try API first (gitea 1.x+)
+    try:
+        api_url = f"{instance_url}/api/v1/users/search"
+        params = {'q': '', 'limit': min(limit, 50)}
+        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=15)
+
+        if resp.status_code == 200:
+            data = resp.json()
+            user_list = data.get('data', []) or data.get('users', []) or data
+            if isinstance(user_list, list):
+                for u in user_list[:limit]:
+                    users.append({
+                        'username': u.get('login') or u.get('username'),
+                        'full_name': u.get('full_name'),
+                        'avatar': u.get('avatar_url'),
+                        'website': u.get('website'),
+                        'location': u.get('location'),
+                        'bio': u.get('description') or u.get('bio'),
+                    })
+                log(f"  got {len(users)} users via API")
+    except Exception as e:
+        log(f"  API failed: {e}")
+
+    # fallback: scrape explore page
+    if not users:
+        try:
+            explore_url = f"{instance_url}/explore/users"
+            resp = requests.get(explore_url, headers=HEADERS, timeout=15)
+            if resp.status_code == 200:
+                # parse HTML for usernames
+                usernames = re.findall(r'href="/([^/"]+)"[^>]*class="[^"]*user[^"]*"', resp.text)
+                usernames += re.findall(r'<a[^>]+href="/([^/"]+)"[^>]*title="[^"]*"', resp.text)
+                usernames = list(set(usernames))[:limit]
+                for username in usernames:
+                    if username and not username.startswith(('explore', 'api', 'user', 'repo')):
+                        users.append({'username': username})
+                log(f"  got {len(users)} users via scrape")
+        except Exception as e:
+            log(f"  scrape failed: {e}")
+
+    return users
+
+
+def get_gitea_user_details(instance_url: str, username: str) -> Optional[Dict]:
+    """get detailed user info from gitea/forgejo/gogs"""
+    try:
+        # API endpoint
+        api_url = f"{instance_url}/api/v1/users/{username}"
+        resp = requests.get(api_url, headers=HEADERS, timeout=10)
+
+        if resp.status_code == 200:
+            u = resp.json()
+            return {
+                'username': u.get('login') or u.get('username'),
+                'full_name': u.get('full_name'),
+                'email': u.get('email'),  # may be hidden
+                'website': u.get('website'),
+                'location': u.get('location'),
+                'bio': u.get('description') or u.get('bio'),
+                'created': u.get('created'),
+                'followers': u.get('followers_count', 0),
+                'following': u.get('following_count', 0),
+            }
+    except:
+        pass
+    return None
+
+
+def get_gitea_user_repos(instance_url: str, username: str, limit: int = 10) -> List[Dict]:
+    """get user's repos from gitea/forgejo/gogs"""
+    repos = []
+    try:
+        api_url = f"{instance_url}/api/v1/users/{username}/repos"
+        resp = requests.get(api_url, headers=HEADERS, timeout=10)
+
+        if resp.status_code == 200:
+            for r in resp.json()[:limit]:
+                repos.append({
+                    'name': r.get('name'),
+                    'full_name': r.get('full_name'),
+                    'description': r.get('description'),
+                    'stars': r.get('stars_count', 0),
+                    'forks': r.get('forks_count', 0),
+                    'language': r.get('language'),
+                    'updated': r.get('updated_at'),
+                })
+    except:
+        pass
+    return repos
+
+
+# === GITLAB CE API ===
+
+def scrape_gitlab_users(instance_url: str, limit: int = 100) -> List[Dict]:
+    """scrape users from a gitlab ce instance"""
+    users = []
+
+    try:
+        # gitlab API - public users endpoint
+        api_url = f"{instance_url}/api/v4/users"
+        params = {'per_page': min(limit, 100), 'active': True}
+        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=15)
+
+        if resp.status_code == 200:
+            for u in resp.json()[:limit]:
+                users.append({
+                    'username': u.get('username'),
+                    'full_name': u.get('name'),
+                    'avatar': u.get('avatar_url'),
+                    'website': u.get('website_url'),
+                    'location': u.get('location'),
+                    'bio': u.get('bio'),
+                    'public_email': u.get('public_email'),
+                })
+            log(f"  got {len(users)} gitlab users")
+    except Exception as e:
+        log(f"  gitlab API failed: {e}")
+
+    return users
+
+
+def get_gitlab_user_details(instance_url: str, username: str) -> Optional[Dict]:
+    """get detailed gitlab user info"""
+    try:
+        api_url = f"{instance_url}/api/v4/users"
+        params = {'username': username}
+        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=10)
+
+        if resp.status_code == 200:
+            users = resp.json()
+            if users:
+                u = users[0]
+                return {
+                    'username': u.get('username'),
+                    'full_name': u.get('name'),
+                    'email': u.get('public_email'),
+                    'website': u.get('website_url'),
+                    'location': u.get('location'),
+                    'bio': u.get('bio'),
+                    'created': u.get('created_at'),
+                }
+    except:
+        pass
+    return None
+
+
+def get_gitlab_user_projects(instance_url: str, username: str, limit: int = 10) -> List[Dict]:
+    """get user's projects from gitlab"""
+    repos = []
+    try:
+        # first get user id
+        api_url = f"{instance_url}/api/v4/users"
+        params = {'username': username}
+        resp = requests.get(api_url, params=params, headers=HEADERS, timeout=10)
+
+        if resp.status_code == 200 and resp.json():
+            user_id = resp.json()[0].get('id')
+
+            # get projects
+            proj_url = f"{instance_url}/api/v4/users/{user_id}/projects"
+            resp = requests.get(proj_url, headers=HEADERS, timeout=10)
+
+            if resp.status_code == 200:
+                for p in resp.json()[:limit]:
+                    repos.append({
+                        'name': p.get('name'),
+                        'full_name': p.get('path_with_namespace'),
+                        'description': p.get('description'),
+                        'stars': p.get('star_count', 0),
+                        'forks': p.get('forks_count', 0),
+                        'updated': p.get('last_activity_at'),
+                    })
+    except:
+        pass
+    return repos
+
+
+# === SOURCEHUT API ===
+
+def scrape_sourcehut_users(limit: int = 100) -> List[Dict]:
+    """
+    scrape users from sourcehut.
+    sourcehut doesn't have a public user list, so we scrape from:
+    - recent commits
+    - mailing lists
+    - project pages
+    """
+    users = []
+    seen = set()
+
+    try:
+        # scrape from git.sr.ht explore
+        resp = requests.get('https://git.sr.ht/projects', headers=HEADERS, timeout=15)
+        if resp.status_code == 200:
+            # extract usernames from repo paths like ~username/repo
+            usernames = re.findall(r'href="/~([^/"]+)', resp.text)
+            for username in usernames:
+                if username not in seen:
+                    seen.add(username)
+                    users.append({'username': username})
+                    if len(users) >= limit:
+                        break
+        log(f"  got {len(users)} sourcehut users")
+    except Exception as e:
+        log(f"  sourcehut scrape failed: {e}")
+
+    return users
+
+
+def get_sourcehut_user_details(username: str) -> Optional[Dict]:
+    """get sourcehut user details"""
+    try:
+        # scrape profile page
+        profile_url = f"https://sr.ht/~{username}"
+        resp = requests.get(profile_url, headers=HEADERS, timeout=10)
+
+        if resp.status_code == 200:
+            bio = ''
+            # extract bio from page
+            bio_match = re.search(r'<div class="container">\s*<p>([^<]+)</p>', resp.text)
+            if bio_match:
+                bio = bio_match.group(1).strip()
+
+            return {
+                'username': username,
+                'bio': bio,
+                'profile_url': profile_url,
+            }
+    except:
+        pass
+    return None
+
+
+def get_sourcehut_user_repos(username: str, limit: int = 10) -> List[Dict]:
+    """get sourcehut user's repos"""
+    repos = []
+    try:
+        git_url = f"https://git.sr.ht/~{username}"
+        resp = requests.get(git_url, headers=HEADERS, timeout=10)
+
+        if resp.status_code == 200:
+            # extract repo names
+            repo_matches = re.findall(rf'href="/~{username}/([^"]+)"', resp.text)
+            for repo in repo_matches[:limit]:
+                if repo and not repo.startswith(('refs', 'log', 'tree')):
+                    repos.append({
+                        'name': repo,
+                        'full_name': f"~{username}/{repo}",
+                    })
+    except:
+        pass
+    return repos
+
+
+# === UNIFIED SCRAPER ===
+
+def scrape_forge(instance_name: str, instance_url: str, platform_type: str, limit: int = 50) -> List[Dict]:
+    """
+    scrape users from any forge type.
+    returns list of human dicts ready for database.
+    """
+    log(f"scraping {instance_name} ({platform_type})...")
+
+    humans = []
+
+    # get user list based on platform type
+    if platform_type in ('gitea', 'forgejo', 'gogs'):
+        users = scrape_gitea_users(instance_url, limit)
+        get_details = lambda u: get_gitea_user_details(instance_url, u)
+        get_repos = lambda u: get_gitea_user_repos(instance_url, u)
+    elif platform_type == 'gitlab':
+        users = scrape_gitlab_users(instance_url, limit)
+        get_details = lambda u: get_gitlab_user_details(instance_url, u)
+        get_repos = lambda u: get_gitlab_user_projects(instance_url, u)
+    elif platform_type == 'sourcehut':
+        users = scrape_sourcehut_users(limit)
+        get_details = get_sourcehut_user_details
+        get_repos = get_sourcehut_user_repos
+    else:
+        log(f"  unknown platform type: {platform_type}")
+        return []
+
+    for user in users:
+        username = user.get('username')
+        if not username:
+            continue
+
+        time.sleep(REQUEST_DELAY)
+
+        # get detailed info
+        details = get_details(username)
+        if details:
+            user.update(details)
+
+        # get repos
+        repos = get_repos(username)
+
+        # build human record
+        bio = user.get('bio', '') or ''
+        website = user.get('website', '') or ''
+
+        # analyze signals from bio
+        score, signals, reasons = analyze_text(bio + ' ' + website)
+
+        # BOOST: self-hosted git = highest signal
+        score += 25
+        signals.append('selfhosted_git')
+        reasons.append(f'uses self-hosted git ({instance_name})')
+
+        # extract contact info
+        contact = {}
+        email = user.get('email') or user.get('public_email')
+        if email and '@' in email:
+            contact['email'] = email
+        if website:
+            contact['website'] = website
+
+        # build human dict
+        human = {
+            'platform': f'{platform_type}:{instance_name}',
+            'username': username,
+            'name': user.get('full_name'),
+            'bio': bio,
+            'url': f"{instance_url}/{username}" if platform_type != 'sourcehut' else f"https://sr.ht/~{username}",
+            'score': score,
+            'signals': json.dumps(signals),
+            'reasons': json.dumps(reasons),
+            'contact': json.dumps(contact),
+            'extra': json.dumps({
+                'instance': instance_name,
+                'instance_url': instance_url,
+                'platform_type': platform_type,
+                'repos': repos[:5],
+                'followers': user.get('followers', 0),
+                'email': email,
+                'website': website,
+            }),
+            'user_type': 'builder' if repos else 'none',
+        }
+
+        humans.append(human)
+        log(f"    {username}: score={score}, repos={len(repos)}")
+
+    return humans
+
+
+def scrape_all_forges(limit_per_instance: int = 30) -> List[Dict]:
+    """scrape all known forge instances"""
+    all_humans = []
+
+    for instance_name, instance_url, platform_type in KNOWN_INSTANCES:
+        try:
+            humans = scrape_forge(instance_name, instance_url, platform_type, limit_per_instance)
+            all_humans.extend(humans)
+            log(f"  {instance_name}: {len(humans)} humans")
+        except Exception as e:
+            log(f"  {instance_name} failed: {e}")
+
+        time.sleep(2)  # be nice between instances
+
+    log(f"total: {len(all_humans)} humans from {len(KNOWN_INSTANCES)} forges")
+    return all_humans
+
+
+# === OUTREACH METHODS ===
+
+def can_message_on_forge(instance_url: str, platform_type: str) -> bool:
+    """check if we can send messages on this forge"""
+    # gitea/forgejo don't have DMs
+    # gitlab has merge request comments
+    # sourcehut has mailing lists
+    return platform_type in ('gitlab', 'sourcehut')
+
+
+def open_forge_issue(instance_url: str, platform_type: str,
+                     owner: str, repo: str, title: str, body: str) -> Tuple[bool, str]:
+    """
+    open an issue on a forge as outreach method.
+    requires API token for authenticated requests.
+    """
+    # would need tokens per instance - for now return False
+    # this is a fallback method, email is preferred
+    return False, "forge issue creation not implemented yet"
+
+
+# === DISCOVERY ===
+
+def discover_forge_instances() -> List[Tuple[str, str, str]]:
+    """
+    discover new forge instances from:
+    - fediverse (they often announce)
+    - known lists
+    - DNS patterns
+
+    returns list of (name, url, platform_type)
+    """
+    # start with known instances
+    instances = list(KNOWN_INSTANCES)
+
+    # could add discovery logic here:
+    # - scrape https://codeberg.org/forgejo/forgejo/issues for instance mentions
+    # - check fediverse for git.* domains
+    # - crawl gitea/forgejo awesome lists
+
+    return instances
+
+
+if __name__ == '__main__':
+    # test
+    print("testing forge scrapers...")
+
+    # test codeberg
+    humans = scrape_forge('codeberg', 'https://codeberg.org', 'gitea', limit=5)
+    print(f"codeberg: {len(humans)} humans")
+    for h in humans[:2]:
+        print(f"  {h['username']}: {h['score']} - {h.get('signals')}")
--- a/scoutd/handles.py
+++ b/scoutd/handles.py
@ -103,6 +103,15 @@ PLATFORM_PATTERNS = {
    'devto': [
        (r'https?://dev\.to/([^/?#]+)', lambda m: m.group(1)),
    ],
+    # reddit/lobsters
+    'reddit': [
+        (r'https?://(?:www\.)?reddit\.com/u(?:ser)?/([^/?#]+)', lambda m: f"u/{m.group(1)}"),
+        (r'https?://(?:old|new)\.reddit\.com/u(?:ser)?/([^/?#]+)', lambda m: f"u/{m.group(1)}"),
+    ],
+    'lobsters': [
+        (r'https?://lobste\.rs/u/([^/?#]+)', lambda m: m.group(1)),
+    ],
+

    # funding
    'kofi': [
--- a/scoutd/reddit.py
+++ b/scoutd/reddit.py
@ -1,24 +1,14 @@
 """
-scoutd/reddit.py - reddit discovery (DISCOVERY ONLY, NOT OUTREACH)
+scoutd/reddit.py - reddit discovery with TAVILY web search

-reddit is a SIGNAL SOURCE, not a contact channel.
-flow:
-1. scrape reddit for users active in target subs
-2. extract their reddit profile
-3. look for links TO other platforms (github, mastodon, website, etc.)
-4. add to scout database with reddit as signal source
-5. reach out via their OTHER platforms, never reddit
-
-if reddit user has no external links:
-   - add to manual_queue with note "reddit-only, needs manual review"
-
-also detects lost builders - stuck in learnprogramming for years, imposter syndrome, etc.
+CRITICAL: always quote usernames in tavily searches to avoid fuzzy matching
 """

 import requests
 import json
 import time
 import re
+import os
 from datetime import datetime
 from pathlib import Path
 from collections import defaultdict
@ -35,43 +25,14 @@ from .lost import (
 HEADERS = {'User-Agent': 'connectd:v1.0 (community discovery)'}
 CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'reddit'

-# patterns for extracting external platform links
-PLATFORM_PATTERNS = {
-    'github': [
-        r'github\.com/([a-zA-Z0-9_-]+)',
-        r'gh:\s*@?([a-zA-Z0-9_-]+)',
-    ],
-    'mastodon': [
-        r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
-        r'mastodon\.social/@([a-zA-Z0-9_]+)',
-        r'fosstodon\.org/@([a-zA-Z0-9_]+)',
-        r'hachyderm\.io/@([a-zA-Z0-9_]+)',
-        r'tech\.lgbt/@([a-zA-Z0-9_]+)',
-    ],
-    'twitter': [
-        r'twitter\.com/([a-zA-Z0-9_]+)',
-        r'x\.com/([a-zA-Z0-9_]+)',
-        r'(?:^|\s)@([a-zA-Z0-9_]{1,15})(?:\s|$)',  # bare @handle
-    ],
-    'bluesky': [
-        r'bsky\.app/profile/([a-zA-Z0-9_.-]+)',
-        r'([a-zA-Z0-9_-]+)\.bsky\.social',
-    ],
-    'website': [
-        r'https?://([a-zA-Z0-9_-]+\.[a-zA-Z]{2,}[a-zA-Z0-9./_-]*)',
-    ],
-    'matrix': [
-        r'@([a-zA-Z0-9_-]+):([a-zA-Z0-9.-]+)',
-    ],
-}
+GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
+TAVILY_API_KEY = os.getenv('TAVILY_API_KEY', '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')


-def _api_get(url, params=None):
-    """rate-limited request"""
+def _api_get(url, params=None, headers=None):
    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
-
    if cache_file.exists():
        try:
            data = json.loads(cache_file.read_text())
@ -79,142 +40,263 @@ def _api_get(url, params=None):
                return data.get('_data')
        except:
            pass
-
-    time.sleep(2)  # reddit rate limit
-
+    time.sleep(1)
+    req_headers = {**HEADERS, **(headers or {})}
    try:
-        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp = requests.get(url, headers=req_headers, params=params, timeout=30)
        resp.raise_for_status()
        result = resp.json()
        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
        return result
-    except requests.exceptions.RequestException as e:
-        print(f"  reddit api error: {e}")
+    except:
        return None


-def extract_external_links(text):
-    """extract links to other platforms from text"""
-    links = {}
+def tavily_search(query, max_results=10):
+    if not TAVILY_API_KEY:
+        return []
+    try:
+        resp = requests.post(
+            'https://api.tavily.com/search',
+            json={'api_key': TAVILY_API_KEY, 'query': query, 'max_results': max_results},
+            timeout=30
+        )
+        if resp.status_code == 200:
+            return resp.json().get('results', [])
+    except Exception as e:
+        print(f"      tavily error: {e}")
+    return []

+
+def extract_links_from_text(text, username=None):
+    found = {}
    if not text:
-        return links
+        return found
+    text_lower = text.lower()
+    username_lower = username.lower() if username else None
    
-    for platform, patterns in PLATFORM_PATTERNS.items():
-        for pattern in patterns:
-            matches = re.findall(pattern, text, re.IGNORECASE)
-            if matches:
-                if platform == 'mastodon' and isinstance(matches[0], tuple):
-                    # full fediverse handle
-                    links[platform] = f"@{matches[0][0]}@{matches[0][1]}"
-                elif platform == 'matrix' and isinstance(matches[0], tuple):
-                    links[platform] = f"@{matches[0][0]}:{matches[0][1]}"
-                elif platform == 'website':
-                    # skip reddit/imgur/etc
-                    for match in matches:
-                        if not any(x in match.lower() for x in ['reddit', 'imgur', 'redd.it', 'i.redd']):
-                            links[platform] = f"https://{match}"
+    # email
+    for email in re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', text):
+        if any(x in email.lower() for x in ['noreply', 'example', '@reddit', 'info@', 'support@', 'contact@', 'admin@']):
+            continue
+        if username_lower and username_lower in email.lower():
+            found['email'] = email
            break
-                else:
-                    links[platform] = matches[0]
+        if 'email' not in found:
+            found['email'] = email
+    
+    # github
+    for gh in re.findall(r'github\.com/([a-zA-Z0-9_-]+)', text):
+        if gh.lower() in ['topics', 'explore', 'trending', 'sponsors', 'orgs']:
+            continue
+        if username_lower and gh.lower() == username_lower:
+            found['github'] = gh
            break
    
-    return links
+    # mastodon
+    masto = re.search(r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text)
+    if masto:
+        found['mastodon'] = f"@{masto.group(1)}@{masto.group(2)}"
+    for inst in ['mastodon.social', 'fosstodon.org', 'hachyderm.io', 'tech.lgbt']:
+        m = re.search(f'{inst}/@([a-zA-Z0-9_]+)', text)
+        if m:
+            found['mastodon'] = f"@{m.group(1)}@{inst}"
+            break
+    
+    # bluesky
+    bsky = re.search(r'bsky\.app/profile/([a-zA-Z0-9_.-]+)', text)
+    if bsky:
+        found['bluesky'] = bsky.group(1)
+    
+    # twitter
+    tw = re.search(r'(?:twitter|x)\.com/([a-zA-Z0-9_]+)', text)
+    if tw and tw.group(1).lower() not in ['home', 'explore', 'search']:
+        found['twitter'] = tw.group(1)
+    
+    # linkedin
+    li = re.search(r'linkedin\.com/in/([a-zA-Z0-9_-]+)', text)
+    if li:
+        found['linkedin'] = f"https://linkedin.com/in/{li.group(1)}"
+    
+    # twitch
+    twitch = re.search(r'twitch\.tv/([a-zA-Z0-9_]+)', text)
+    if twitch:
+        found['twitch'] = f"https://twitch.tv/{twitch.group(1)}"
+    
+    # itch.io
+    itch = re.search(r'itch\.io/profile/([a-zA-Z0-9_-]+)', text)
+    if itch:
+        found['itch'] = f"https://itch.io/profile/{itch.group(1)}"
+    
+    # website
+    for url in re.findall(r'https?://([a-zA-Z0-9_-]+\.[a-zA-Z]{2,}[a-zA-Z0-9./_-]*)', text):
+        skip = ['reddit', 'imgur', 'google', 'facebook', 'twitter', 'youtube', 'wikipedia', 'amazon']
+        if not any(x in url.lower() for x in skip):
+            if username_lower and username_lower in url.lower():
+                found['website'] = f"https://{url}"
+                break
+            if 'website' not in found:
+                found['website'] = f"https://{url}"
+    
+    return found
+
+
+def cross_platform_discovery(username, full_text=''):
+    """
+    search the ENTIRE internet using TAVILY.
+    CRITICAL: always quote username to avoid fuzzy matching!
+    """
+    found = {}
+    all_content = full_text
+    username_lower = username.lower()
+    
+    print(f"    🔍 cross-platform search for {username}...")
+    
+    # ALWAYS QUOTE THE USERNAME - critical for exact matching
+    searches = [
+        f'"{username}"',                          # just username, quoted
+        f'"{username}" github',                   # github
+        f'"{username}" developer programmer',     # dev context
+        f'"{username}" email contact',            # contact
+        f'"{username}" mastodon',                 # fediverse
+    ]
+    
+    for query in searches:
+        print(f"      🌐 tavily: {query}")
+        results = tavily_search(query, max_results=5)
+        
+        for result in results:
+            url = result.get('url', '').lower()
+            title = result.get('title', '')
+            content = result.get('content', '')
+            combined = f"{url} {title} {content}"
+            
+            # validate username appears
+            if username_lower not in combined.lower():
+                continue
+            
+            all_content += f" {combined}"
+            
+            # extract from URL directly
+            if f'github.com/{username_lower}' in url and not found.get('github'):
+                found['github'] = username
+                print(f"        ✓ github: {username}")
+            
+            if f'twitch.tv/{username_lower}' in url and not found.get('twitch'):
+                found['twitch'] = f"https://twitch.tv/{username}"
+                print(f"        ✓ twitch")
+            
+            if 'itch.io/profile/' in url and username_lower in url and not found.get('itch'):
+                found['itch'] = url if url.startswith('http') else f"https://{url}"
+                print(f"        ✓ itch.io")
+            
+            if 'linkedin.com/in/' in url and not found.get('linkedin'):
+                li = re.search(r'linkedin\.com/in/([a-zA-Z0-9_-]+)', url)
+                if li:
+                    found['linkedin'] = f"https://linkedin.com/in/{li.group(1)}"
+                    print(f"        ✓ linkedin")
+        
+        # extract from content
+        extracted = extract_links_from_text(all_content, username)
+        for k, v in extracted.items():
+            if k not in found:
+                found[k] = v
+                print(f"        ✓ {k}")
+        
+        # good contact found? stop searching
+        if found.get('email') or found.get('github') or found.get('mastodon') or found.get('twitch'):
+            break
+    
+    # === API CHECKS ===
+    if not found.get('github'):
+        headers = {'Authorization': f'token {GITHUB_TOKEN}'} if GITHUB_TOKEN else {}
+        try:
+            resp = requests.get(f'https://api.github.com/users/{username}', headers=headers, timeout=10)
+            if resp.status_code == 200:
+                data = resp.json()
+                found['github'] = username
+                print(f"        ✓ github API")
+                if data.get('email') and 'email' not in found:
+                    found['email'] = data['email']
+                if data.get('blog') and 'website' not in found:
+                    found['website'] = data['blog'] if data['blog'].startswith('http') else f"https://{data['blog']}"
+        except:
+            pass
+    
+    if not found.get('mastodon'):
+        for inst in ['mastodon.social', 'fosstodon.org', 'hachyderm.io', 'tech.lgbt']:
+            try:
+                resp = requests.get(f'https://{inst}/api/v1/accounts/lookup', params={'acct': username}, timeout=5)
+                if resp.status_code == 200:
+                    found['mastodon'] = f"@{username}@{inst}"
+                    print(f"        ✓ mastodon: {found['mastodon']}")
+                    break
+            except:
+                continue
+    
+    if not found.get('bluesky'):
+        try:
+            resp = requests.get('https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile', 
+                              params={'actor': f'{username}.bsky.social'}, timeout=10)
+            if resp.status_code == 200:
+                found['bluesky'] = resp.json().get('handle')
+                print(f"        ✓ bluesky")
+        except:
+            pass
+    
+    return found


 def get_user_profile(username):
-    """get user profile including bio/description"""
    url = f'https://www.reddit.com/user/{username}/about.json'
    data = _api_get(url)
-
    if not data or 'data' not in data:
        return None
-
    profile = data['data']
    return {
        'username': username,
-        'name': profile.get('name'),
        'bio': profile.get('subreddit', {}).get('public_description', ''),
        'title': profile.get('subreddit', {}).get('title', ''),
-        'icon': profile.get('icon_img'),
-        'created_utc': profile.get('created_utc'),
        'total_karma': profile.get('total_karma', 0),
-        'link_karma': profile.get('link_karma', 0),
-        'comment_karma': profile.get('comment_karma', 0),
    }


 def get_subreddit_users(subreddit, limit=100):
-    """get recent posters/commenters from a subreddit"""
    users = set()
-
-    # posts
-    url = f'https://www.reddit.com/r/{subreddit}/new.json'
+    for endpoint in ['new', 'comments']:
+        url = f'https://www.reddit.com/r/{subreddit}/{endpoint}.json'
        data = _api_get(url, {'limit': limit})
        if data and 'data' in data:
-        for post in data['data'].get('children', []):
-            author = post['data'].get('author')
+            for item in data['data'].get('children', []):
+                author = item['data'].get('author')
                if author and author not in ['[deleted]', 'AutoModerator']:
                    users.add(author)
-
-    # comments
-    url = f'https://www.reddit.com/r/{subreddit}/comments.json'
-    data = _api_get(url, {'limit': limit})
-    if data and 'data' in data:
-        for comment in data['data'].get('children', []):
-            author = comment['data'].get('author')
-            if author and author not in ['[deleted]', 'AutoModerator']:
-                users.add(author)
-
    return users


 def get_user_activity(username):
-    """get user's posts and comments"""
    activity = []
-
-    # posts
-    url = f'https://www.reddit.com/user/{username}/submitted.json'
+    for endpoint in ['submitted', 'comments']:
+        url = f'https://www.reddit.com/user/{username}/{endpoint}.json'
        data = _api_get(url, {'limit': 100})
        if data and 'data' in data:
-        for post in data['data'].get('children', []):
+            for item in data['data'].get('children', []):
                activity.append({
-                'type': 'post',
-                'subreddit': post['data'].get('subreddit'),
-                'title': post['data'].get('title', ''),
-                'body': post['data'].get('selftext', ''),
-                'score': post['data'].get('score', 0),
+                    'type': 'post' if endpoint == 'submitted' else 'comment',
+                    'subreddit': item['data'].get('subreddit'),
+                    'title': item['data'].get('title', ''),
+                    'body': item['data'].get('selftext', '') or item['data'].get('body', ''),
+                    'score': item['data'].get('score', 0),
                })
-
-    # comments
-    url = f'https://www.reddit.com/user/{username}/comments.json'
-    data = _api_get(url, {'limit': 100})
-    if data and 'data' in data:
-        for comment in data['data'].get('children', []):
-            activity.append({
-                'type': 'comment',
-                'subreddit': comment['data'].get('subreddit'),
-                'body': comment['data'].get('body', ''),
-                'score': comment['data'].get('score', 0),
-            })
-
    return activity


 def analyze_reddit_user(username):
-    """
-    analyze a reddit user for alignment and extract external platform links.
-
-    reddit is DISCOVERY ONLY - we find users here but contact them elsewhere.
-    """
    activity = get_user_activity(username)
    if not activity:
        return None

-    # get profile for bio
    profile = get_user_profile(username)
-
-    # count subreddit activity
    sub_activity = defaultdict(int)
    text_parts = []
    total_karma = 0
@ -232,20 +314,16 @@ def analyze_reddit_user(username):
    full_text = ' '.join(text_parts)
    text_score, positive_signals, negative_signals = analyze_text(full_text)

-    # EXTRACT EXTERNAL LINKS - this is the key part
-    # check profile bio first
    external_links = {}
    if profile:
-        bio_text = f"{profile.get('bio', '')} {profile.get('title', '')}"
-        external_links.update(extract_external_links(bio_text))
+        external_links.update(extract_links_from_text(f"{profile.get('bio', '')} {profile.get('title', '')}", username))
+    external_links.update(extract_links_from_text(full_text, username))

-    # also scan posts/comments for links (people often share their github etc)
-    activity_links = extract_external_links(full_text)
-    for platform, link in activity_links.items():
-        if platform not in external_links:
-            external_links[platform] = link
+    # TAVILY search
+    discovered = cross_platform_discovery(username, full_text)
+    external_links.update(discovered)

-    # subreddit scoring
+    # scoring
    sub_score = 0
    aligned_subs = []
    for sub, count in sub_activity.items():
@ -254,13 +332,11 @@ def analyze_reddit_user(username):
            sub_score += weight * min(count, 5)
            aligned_subs.append(sub)

-    # multi-sub bonus
    if len(aligned_subs) >= 5:
        sub_score += 30
    elif len(aligned_subs) >= 3:
        sub_score += 15

-    # negative sub penalty
    for sub in sub_activity:
        if sub.lower() in [n.lower() for n in NEGATIVE_SUBREDDITS]:
            sub_score -= 50
@ -268,77 +344,33 @@ def analyze_reddit_user(username):

    total_score = text_score + sub_score

-    # bonus if they have external links (we can actually contact them)
    if external_links.get('github'):
        total_score += 10
-        positive_signals.append('has github')
+        positive_signals.append('github')
    if external_links.get('mastodon'):
        total_score += 10
-        positive_signals.append('has mastodon')
-    if external_links.get('website'):
+        positive_signals.append('mastodon')
+    if external_links.get('email'):
+        total_score += 15
+        positive_signals.append('email')
+    if external_links.get('twitch'):
        total_score += 5
-        positive_signals.append('has website')
+        positive_signals.append('twitch')

-    # === LOST BUILDER DETECTION ===
-    # reddit is HIGH SIGNAL for lost builders - stuck in learnprogramming,
-    # imposter syndrome posts, "i wish i could" language, etc.
+    # lost builder
    subreddits_list = list(sub_activity.keys())
    lost_signals, lost_weight = analyze_reddit_for_lost_signals(activity, subreddits_list)
-
-    # also check full text for lost patterns (already done partially in analyze_reddit_for_lost_signals)
-    text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
+    text_lost_signals, _ = analyze_text_for_lost_signals(full_text)
    for sig in text_lost_signals:
        if sig not in lost_signals:
            lost_signals.append(sig)
-            lost_weight += text_lost_weight

-    lost_potential_score = lost_weight
+    builder_activity = 20 if external_links.get('github') else 0
+    user_type = classify_user(lost_weight, builder_activity, total_score)

-    # classify: builder, lost, both, or none
-    # for reddit, builder_score is based on having external links + high karma
-    builder_activity = 0
-    if external_links.get('github'):
-        builder_activity += 20
-    if total_karma > 1000:
-        builder_activity += 15
-    elif total_karma > 500:
-        builder_activity += 10
+    confidence = min(0.95, 0.3 + (0.2 if len(activity) > 20 else 0) + (0.2 if len(aligned_subs) >= 2 else 0) + (0.1 if external_links else 0))

-    user_type = classify_user(lost_potential_score, builder_activity, total_score)
-
-    # confidence
-    confidence = 0.3
-    if len(activity) > 20:
-        confidence += 0.2
-    if len(aligned_subs) >= 2:
-        confidence += 0.2
-    if len(text_parts) > 10:
-        confidence += 0.2
-    # higher confidence if we have contact methods
-    if external_links:
-        confidence += 0.1
-    confidence = min(confidence, 0.95)
-
-    reasons = []
-    if aligned_subs:
-        reasons.append(f"active in: {', '.join(aligned_subs[:5])}")
-    if positive_signals:
-        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
-    if negative_signals:
-        reasons.append(f"WARNING: {', '.join(negative_signals)}")
-    if external_links:
-        reasons.append(f"external: {', '.join(external_links.keys())}")
-
-    # add lost reasons if applicable
-    if user_type == 'lost' or user_type == 'both':
-        lost_descriptions = get_signal_descriptions(lost_signals)
-        if lost_descriptions:
-            reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
-
-    # determine if this is reddit-only (needs manual review)
-    reddit_only = len(external_links) == 0
-    if reddit_only:
-        reasons.append("REDDIT-ONLY: needs manual review for outreach")
+    reddit_only = not any([external_links.get(k) for k in ['github', 'mastodon', 'bluesky', 'email', 'matrix', 'linkedin', 'twitch', 'itch']])

    return {
        'platform': 'reddit',
@ -351,153 +383,46 @@ def analyze_reddit_user(username):
        'subreddits': aligned_subs,
        'activity_count': len(activity),
        'karma': total_karma,
-        'reasons': reasons,
+        'reasons': [f"contact: {', '.join(external_links.keys())}"] if external_links else [],
        'scraped_at': datetime.now().isoformat(),
-        # external platform links for outreach
        'external_links': external_links,
        'reddit_only': reddit_only,
-        'extra': {
-            'github': external_links.get('github'),
-            'mastodon': external_links.get('mastodon'),
-            'twitter': external_links.get('twitter'),
-            'bluesky': external_links.get('bluesky'),
-            'website': external_links.get('website'),
-            'matrix': external_links.get('matrix'),
-            'reddit_karma': total_karma,
-            'reddit_activity': len(activity),
-        },
-        # lost builder fields
-        'lost_potential_score': lost_potential_score,
+        'extra': external_links,
+        'lost_potential_score': lost_weight,
        'lost_signals': lost_signals,
        'user_type': user_type,
    }


 def scrape_reddit(db, limit_per_sub=50):
-    """
-    full reddit scrape - DISCOVERY ONLY
-
-    finds aligned users, extracts external links for outreach.
-    reddit-only users go to manual queue.
-    """
-    print("scoutd/reddit: starting scrape (discovery only, not outreach)...")
-
-    # find users in multiple aligned subs
+    print("scoutd/reddit: scraping (TAVILY enabled)...")
    user_subs = defaultdict(set)
-
-    # aligned subs - active builders
-    priority_subs = ['intentionalcommunity', 'cohousing', 'selfhosted',
-                     'homeassistant', 'solarpunk', 'cooperatives', 'privacy',
-                     'localllama', 'homelab', 'degoogle', 'pihole', 'unraid']
-
-    # lost builder subs - people who need encouragement
-    # these folks might be stuck, but they have aligned interests
-    lost_subs = ['learnprogramming', 'findapath', 'getdisciplined',
-                 'careerguidance', 'cscareerquestions', 'decidingtobebetter']
-
-    # scrape both - we want to find lost builders with aligned interests
-    all_subs = priority_subs + lost_subs
-
-    for sub in all_subs:
-        print(f"  scraping r/{sub}...")
+    for sub in ['intentionalcommunity', 'cohousing', 'selfhosted', 'homeassistant', 'solarpunk', 'cooperatives', 'privacy', 'localllama', 'homelab', 'learnprogramming']:
        users = get_subreddit_users(sub, limit=limit_per_sub)
        for user in users:
            user_subs[user].add(sub)
-        print(f"    found {len(users)} users")

-    # filter for multi-sub users
    multi_sub = {u: subs for u, subs in user_subs.items() if len(subs) >= 2}
-    print(f"  {len(multi_sub)} users in 2+ aligned subs")
+    print(f"  {len(multi_sub)} users in 2+ subs")

-    # analyze
    results = []
-    reddit_only_count = 0
-    external_link_count = 0
-    builders_found = 0
-    lost_found = 0
-
    for username in multi_sub:
        try:
            result = analyze_reddit_user(username)
            if result and result['score'] > 0:
                results.append(result)
                db.save_human(result)
-
-                user_type = result.get('user_type', 'none')
-
-                # track lost builders - reddit is high signal for these
-                if user_type == 'lost':
-                    lost_found += 1
-                    lost_score = result.get('lost_potential_score', 0)
-                    if lost_score >= 40:
-                        print(f"    💔 u/{username}: lost_score={lost_score}, values={result['score']} pts")
-                        # lost builders also go to manual queue if reddit-only
-                        if result.get('reddit_only'):
-                            _add_to_manual_queue(result)
-
-                elif user_type == 'builder':
-                    builders_found += 1
-
-                elif user_type == 'both':
-                    builders_found += 1
-                    lost_found += 1
-                    print(f"    ⚡ u/{username}: recovering builder")
-
-                # track external links
-                if result.get('reddit_only'):
-                    reddit_only_count += 1
-                    # add high-value users to manual queue for review
-                    if result['score'] >= 50 and user_type != 'lost':  # lost already added above
-                        _add_to_manual_queue(result)
-                        print(f"    📋 u/{username}: {result['score']} pts (reddit-only → manual queue)")
-                else:
-                    external_link_count += 1
-                    if result['score'] >= 50 and user_type == 'builder':
-                        links = list(result.get('external_links', {}).keys())
-                        print(f"    ★ u/{username}: {result['score']} pts → {', '.join(links)}")
-
        except Exception as e:
-            print(f"    error on {username}: {e}")
+            print(f"  error: {username}: {e}")

-    print(f"scoutd/reddit: found {len(results)} aligned humans")
-    print(f"  - {builders_found} active builders")
-    print(f"  - {lost_found} lost builders (need encouragement)")
-    print(f"  - {external_link_count} with external links (reachable)")
-    print(f"  - {reddit_only_count} reddit-only (manual queue)")
+    print(f"scoutd/reddit: {len(results)} humans")
    return results


 def _add_to_manual_queue(result):
-    """add reddit-only user to manual queue for review"""
-    from pathlib import Path
-    import json
-
    queue_file = Path(__file__).parent.parent / 'data' / 'manual_queue.json'
    queue_file.parent.mkdir(parents=True, exist_ok=True)
-
-    queue = []
-    if queue_file.exists():
-        try:
-            queue = json.loads(queue_file.read_text())
-        except:
-            pass
-
-    # check if already in queue
-    existing = [q for q in queue if q.get('username') == result['username'] and q.get('platform') == 'reddit']
-    if existing:
-        return
-
-    queue.append({
-        'platform': 'reddit',
-        'username': result['username'],
-        'url': result['url'],
-        'score': result['score'],
-        'subreddits': result.get('subreddits', []),
-        'signals': result.get('signals', []),
-        'reasons': result.get('reasons', []),
-        'note': 'reddit-only user - no external links found. DM manually if promising.',
-        'queued_at': datetime.now().isoformat(),
-        'status': 'pending',
-    })
-
+    queue = json.loads(queue_file.read_text()) if queue_file.exists() else []
+    if not any(q.get('username') == result['username'] for q in queue):
+        queue.append({'platform': 'reddit', 'username': result['username'], 'url': result['url'], 'score': result['score'], 'queued_at': datetime.now().isoformat()})
        queue_file.write_text(json.dumps(queue, indent=2))
--- a/soul.txt
+++ b/soul.txt
@ -31,9 +31,8 @@ there's a better way and we are going to build it together."

 you can reach *person* at *preffered contact method*

+- connectd daemon
 hope it goes well!
-
-connectd
  CONNECTD_ICONS (line 33-44):
  CONNECTD_ICONS = '''<div style="display:flex;gap:16px;flex-wrap:wrap">
  <a href="https://github.com/connectd-daemon" title="GitHub" style="color:#888"><svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"/></svg></a>