initial release: connectd add-on v1.1.0

2026-04-14 03:27:15 +00:00 · 2025-12-15 11:06:51 -06:00 · 2025-12-15 11:06:51 -06:00 · 3c02ee85c2
commit 3c02ee85c2
45 changed files with 10301 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+*.pyc
+__pycache__/
+*.db
+.DS_Store
--- a/README.md
+++ b/README.md
@ -0,0 +1,16 @@
+# connectd add-ons for home assistant
+
+## installation
+
+1. go to **settings** → **add-ons** → **add-on store**
+2. click the three dots in the top right → **repositories**
+3. add: `https://github.com/sudoxnym/ha-addons`
+4. find **connectd** in the store and install
+
+## add-ons
+
+### connectd
+
+find isolated builders with aligned values. auto-discovers humans on github, mastodon, lemmy, discord, and more.
+
+[![Open your Home Assistant instance and show the add add-on repository dialog with a specific repository URL pre-filled.](https://my.home-assistant.io/badges/supervisor_add_addon_repository.svg)](https://my.home-assistant.io/redirect/supervisor_add_addon_repository/?repository_url=https%3A%2F%2Fgithub.com%2Fsudoxnym%2Fha-addons)
--- a/connectd/Dockerfile
+++ b/connectd/Dockerfile
@ -0,0 +1,28 @@
+ARG BUILD_FROM
+FROM ${BUILD_FROM}
+
+# install python deps
+RUN apk add --no-cache python3 py3-pip py3-requests py3-beautifulsoup4
+
+# create app directory
+WORKDIR /app
+
+# copy requirements and install
+COPY requirements.txt .
+RUN pip3 install --no-cache-dir --break-system-packages -r requirements.txt
+
+# copy app code
+COPY api.py config.py daemon.py cli.py setup_user.py ./
+COPY db/ db/
+COPY scoutd/ scoutd/
+COPY matchd/ matchd/
+COPY introd/ introd/
+
+# create data directory
+RUN mkdir -p /data/db /data/cache
+
+# copy run script
+COPY run.sh /
+RUN chmod a+x /run.sh
+
+CMD ["/run.sh"]
--- a/connectd/README.md
+++ b/connectd/README.md
@ -0,0 +1,52 @@
+# connectd add-on for home assistant
+
+find isolated builders with aligned values. auto-discovers humans on github, mastodon, lemmy, discord, and more.
+
+## installation
+
+1. add this repository to your home assistant add-on store
+2. install the connectd add-on
+3. configure your HOST_USER (github username) in the add-on settings
+4. start the add-on
+
+## configuration
+
+### required
+- **host_user**: your github username (connectd will auto-discover your profile)
+
+### optional host info
+- **host_name**: your display name
+- **host_email**: your email
+- **host_mastodon**: mastodon handle (@user@instance)
+- **host_reddit**: reddit username
+- **host_lemmy**: lemmy handle (@user@instance)
+- **host_lobsters**: lobsters username
+- **host_matrix**: matrix handle (@user:server)
+- **host_discord**: discord user id
+- **host_bluesky**: bluesky handle (handle.bsky.social)
+- **host_location**: your location
+- **host_interests**: comma-separated interests
+- **host_looking_for**: what you're looking for
+
+### api credentials
+- **github_token**: for higher rate limits
+- **groq_api_key**: for LLM-drafted intros
+- **mastodon_token**: for DM delivery
+- **discord_bot_token**: for discord discovery/delivery
+
+## hacs integration
+
+after starting the add-on, install the connectd integration via HACS:
+
+1. add custom repository: `https://github.com/sudoxnym/connectd`
+2. install connectd integration
+3. add integration in HA settings
+4. configure with host: `localhost`, port: `8099`
+
+## sensors
+
+- total humans, high score humans, active builders
+- platform counts (github, mastodon, reddit, lemmy, discord, lobsters)
+- priority matches, top humans
+- countdown timers (next scout, match, intro)
+- your personal score and profile
--- a/connectd/api.py
+++ b/connectd/api.py
@ -0,0 +1,268 @@
+#!/usr/bin/env python3
+"""
+connectd/api.py - REST API for stats and control
+
+exposes daemon stats for home assistant integration.
+runs on port 8099 by default.
+"""
+
+import os
+import json
+import threading
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from datetime import datetime
+
+from db import Database
+from db.users import get_priority_users, get_priority_user_matches, get_priority_user
+
+API_PORT = int(os.environ.get('CONNECTD_API_PORT', 8099))
+
+# shared state (updated by daemon)
+_daemon_state = {
+    'running': False,
+    'dry_run': False,
+    'last_scout': None,
+    'last_match': None,
+    'last_intro': None,
+    'last_lost': None,
+    'intros_today': 0,
+    'lost_intros_today': 0,
+    'started_at': None,
+}
+
+
+def update_daemon_state(state_dict):
+    """update shared daemon state (called by daemon)"""
+    global _daemon_state
+    _daemon_state.update(state_dict)
+
+
+def get_daemon_state():
+    """get current daemon state"""
+    return _daemon_state.copy()
+
+
+class APIHandler(BaseHTTPRequestHandler):
+    """simple REST API handler"""
+
+    def log_message(self, format, *args):
+        """suppress default logging"""
+        pass
+
+    def _send_json(self, data, status=200):
+        """send JSON response"""
+        self.send_response(status)
+        self.send_header('Content-Type', 'application/json')
+        self.send_header('Access-Control-Allow-Origin', '*')
+        self.end_headers()
+        self.wfile.write(json.dumps(data).encode())
+
+    def do_GET(self):
+        """handle GET requests"""
+        if self.path == '/api/stats':
+            self._handle_stats()
+        elif self.path == '/api/health':
+            self._handle_health()
+        elif self.path == '/api/state':
+            self._handle_state()
+        elif self.path == '/api/priority_matches':
+            self._handle_priority_matches()
+        elif self.path == '/api/top_humans':
+            self._handle_top_humans()
+        elif self.path == '/api/user':
+            self._handle_user()
+        else:
+            self._send_json({'error': 'not found'}, 404)
+
+    def _handle_stats(self):
+        """return database statistics"""
+        try:
+            db = Database()
+            stats = db.stats()
+            db.close()
+            self._send_json(stats)
+        except Exception as e:
+            self._send_json({'error': str(e)}, 500)
+
+    def _handle_health(self):
+        """return daemon health status"""
+        state = get_daemon_state()
+
+        health = {
+            'status': 'running' if state['running'] else 'stopped',
+            'dry_run': state['dry_run'],
+            'uptime_seconds': None,
+        }
+
+        if state['started_at']:
+            uptime = datetime.now() - datetime.fromisoformat(state['started_at'])
+            health['uptime_seconds'] = int(uptime.total_seconds())
+
+        self._send_json(health)
+
+    def _handle_state(self):
+        """return full daemon state"""
+        state = get_daemon_state()
+
+        # convert datetimes to strings
+        for key in ['last_scout', 'last_match', 'last_intro', 'last_lost', 'started_at']:
+            if state[key] and isinstance(state[key], datetime):
+                state[key] = state[key].isoformat()
+
+        self._send_json(state)
+
+    def _handle_priority_matches(self):
+        """return priority matches for HA sensor"""
+        try:
+            db = Database()
+            users = get_priority_users(db.conn)
+
+            if not users:
+                self._send_json({
+                    'count': 0,
+                    'new_count': 0,
+                    'top_matches': [],
+                })
+                db.close()
+                return
+
+            # get matches for first priority user (host)
+            user = users[0]
+            matches = get_priority_user_matches(db.conn, user['id'], limit=10)
+
+            new_count = sum(1 for m in matches if m.get('status') == 'new')
+
+            top_matches = []
+            for m in matches[:5]:
+                overlap_reasons = m.get('overlap_reasons', '[]')
+                if isinstance(overlap_reasons, str):
+                    import json as json_mod
+                    overlap_reasons = json_mod.loads(overlap_reasons) if overlap_reasons else []
+
+                top_matches.append({
+                    'username': m.get('username'),
+                    'platform': m.get('platform'),
+                    'score': m.get('score', 0),
+                    'overlap_score': m.get('overlap_score', 0),
+                    'reasons': overlap_reasons[:3],
+                    'url': m.get('url'),
+                    'status': m.get('status', 'new'),
+                })
+
+            db.close()
+            self._send_json({
+                'count': len(matches),
+                'new_count': new_count,
+                'top_matches': top_matches,
+            })
+        except Exception as e:
+            self._send_json({'error': str(e)}, 500)
+
+    def _handle_top_humans(self):
+        """return top scoring humans for HA sensor"""
+        try:
+            db = Database()
+            humans = db.get_all_humans(min_score=50, limit=5)
+
+            top_humans = []
+            for h in humans:
+                contact = h.get('contact', '{}')
+                if isinstance(contact, str):
+                    import json as json_mod
+                    contact = json_mod.loads(contact) if contact else {}
+
+                signals = h.get('signals', '[]')
+                if isinstance(signals, str):
+                    import json as json_mod
+                    signals = json_mod.loads(signals) if signals else []
+
+                top_humans.append({
+                    'username': h.get('username'),
+                    'platform': h.get('platform'),
+                    'score': h.get('score', 0),
+                    'name': h.get('name'),
+                    'signals': signals[:5],
+                    'contact_method': 'email' if contact.get('email') else
+                                     'mastodon' if contact.get('mastodon') else
+                                     'matrix' if contact.get('matrix') else 'manual',
+                })
+
+            db.close()
+            self._send_json({
+                'count': len(humans),
+                'top_humans': top_humans,
+            })
+        except Exception as e:
+            self._send_json({'error': str(e)}, 500)
+
+    def _handle_user(self):
+        """return priority user info for HA sensor"""
+        try:
+            db = Database()
+            users = get_priority_users(db.conn)
+
+            if not users:
+                self._send_json({
+                    'configured': False,
+                    'score': 0,
+                    'signals': [],
+                    'match_count': 0,
+                })
+                db.close()
+                return
+
+            user = users[0]
+            signals = user.get('signals', '[]')
+            if isinstance(signals, str):
+                import json as json_mod
+                signals = json_mod.loads(signals) if signals else []
+
+            interests = user.get('interests', '[]')
+            if isinstance(interests, str):
+                import json as json_mod
+                interests = json_mod.loads(interests) if interests else []
+
+            matches = get_priority_user_matches(db.conn, user['id'], limit=100)
+
+            db.close()
+            self._send_json({
+                'configured': True,
+                'name': user.get('name'),
+                'github': user.get('github'),
+                'mastodon': user.get('mastodon'),
+                'reddit': user.get('reddit'),
+                'lobsters': user.get('lobsters'),
+                'matrix': user.get('matrix'),
+                'lemmy': user.get('lemmy'),
+                'discord': user.get('discord'),
+                'bluesky': user.get('bluesky'),
+                'score': user.get('score', 0),
+                'signals': signals[:10],
+                'interests': interests,
+                'location': user.get('location'),
+                'bio': user.get('bio'),
+                'match_count': len(matches),
+                'new_match_count': sum(1 for m in matches if m.get('status') == 'new'),
+            })
+        except Exception as e:
+            self._send_json({'error': str(e)}, 500)
+
+
+def run_api_server():
+    """run the API server in a thread"""
+    server = HTTPServer(('0.0.0.0', API_PORT), APIHandler)
+    print(f"connectd api running on port {API_PORT}")
+    server.serve_forever()
+
+
+def start_api_thread():
+    """start API server in background thread"""
+    thread = threading.Thread(target=run_api_server, daemon=True)
+    thread.start()
+    return thread
+
+
+if __name__ == '__main__':
+    # standalone mode for testing
+    print(f"starting connectd api on port {API_PORT}...")
+    run_api_server()
--- a/connectd/build.yaml
+++ b/connectd/build.yaml
@ -0,0 +1,11 @@
+build_from:
+  amd64: ghcr.io/hassio-addons/base:15.0.8
+  aarch64: ghcr.io/hassio-addons/base:15.0.8
+  armv7: ghcr.io/hassio-addons/base:15.0.8
+labels:
+  org.opencontainers.image.title: "connectd"
+  org.opencontainers.image.description: "find isolated builders with aligned values"
+  org.opencontainers.image.source: "https://github.com/sudoxnym/connectd"
+  org.opencontainers.image.licenses: "MIT"
+args:
+  BUILD_ARCH: amd64
--- a/connectd/cli.py
+++ b/connectd/cli.py
@ -0,0 +1,878 @@
+#!/usr/bin/env python3
+"""
+connectd - people discovery and matchmaking daemon
+finds isolated builders and connects them
+also finds LOST builders who need encouragement
+
+usage:
+    connectd scout              # run all scrapers
+    connectd scout --github     # github only
+    connectd scout --reddit     # reddit only
+    connectd scout --mastodon   # mastodon only
+    connectd scout --lobsters   # lobste.rs only
+    connectd scout --matrix     # matrix only
+    connectd scout --lost       # show lost builder stats after scout
+
+    connectd match              # find all matches
+    connectd match --top 20     # show top 20 matches
+    connectd match --mine       # show YOUR matches (priority user)
+    connectd match --lost       # find matches for lost builders
+
+    connectd intro              # generate intros for top matches
+    connectd intro --match 123  # generate intro for specific match
+    connectd intro --dry-run    # preview intros without saving
+    connectd intro --lost       # generate intros for lost builders
+
+    connectd review             # interactive review queue
+    connectd send               # send all approved intros
+    connectd send --export      # export for manual sending
+
+    connectd daemon             # run as continuous daemon
+    connectd daemon --oneshot   # run once then exit
+    connectd daemon --dry-run   # run but never send intros
+    connectd daemon --oneshot --dry-run  # one cycle, preview only
+
+    connectd user               # show your priority user profile
+    connectd user --setup       # setup/update your profile
+    connectd user --matches     # show matches found for you
+
+    connectd status             # show database stats (including lost builders)
+    connectd lost               # show lost builders ready for outreach
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+# add parent to path for imports
+sys.path.insert(0, str(Path(__file__).parent))
+
+from db import Database
+from db.users import (init_users_table, add_priority_user, get_priority_users,
+                      get_priority_user_matches, score_priority_user, auto_match_priority_user,
+                      update_priority_user_profile)
+from scoutd import scrape_github, scrape_reddit, scrape_mastodon, scrape_lobsters, scrape_matrix
+from scoutd.deep import deep_scrape_github_user
+from scoutd.lost import get_signal_descriptions
+from introd.deliver import (deliver_intro, deliver_batch, get_delivery_stats,
+                            review_manual_queue, determine_best_contact, load_manual_queue,
+                            save_manual_queue)
+from matchd import find_all_matches, generate_fingerprint
+from matchd.rank import get_top_matches
+from matchd.lost import find_matches_for_lost_builders, get_lost_match_summary
+from introd import draft_intro
+from introd.draft import draft_intros_for_match
+from introd.lost_intro import draft_lost_intro, get_lost_intro_config
+from introd.review import review_all_pending, get_pending_intros
+from introd.send import send_all_approved, export_manual_intros
+
+
+def cmd_scout(args, db):
+    """run discovery scrapers"""
+    from scoutd.deep import deep_scrape_github_user, save_deep_profile
+
+    print("=" * 60)
+    print("connectd scout - discovering aligned humans")
+    print("=" * 60)
+
+    # deep scrape specific user
+    if args.user:
+        print(f"\ndeep scraping github user: {args.user}")
+        profile = deep_scrape_github_user(args.user)
+        if profile:
+            save_deep_profile(db, profile)
+            print(f"\n=== {profile['username']} ===")
+            print(f"real name: {profile.get('real_name')}")
+            print(f"location: {profile.get('location')}")
+            print(f"company: {profile.get('company')}")
+            print(f"email: {profile.get('email')}")
+            print(f"twitter: {profile.get('twitter')}")
+            print(f"mastodon: {profile.get('mastodon')}")
+            print(f"orgs: {', '.join(profile.get('orgs', []))}")
+            print(f"languages: {', '.join(list(profile.get('languages', {}).keys())[:5])}")
+            print(f"topics: {', '.join(profile.get('topics', [])[:10])}")
+            print(f"signals: {', '.join(profile.get('signals', []))}")
+            print(f"score: {profile.get('score')}")
+            if profile.get('linked_profiles'):
+                print(f"linked profiles: {list(profile['linked_profiles'].keys())}")
+        else:
+            print("failed to scrape user")
+        return
+
+    run_all = not any([args.github, args.reddit, args.mastodon, args.lobsters, args.matrix, args.twitter, args.bluesky, args.lemmy, args.discord])
+
+    if args.github or run_all:
+        if args.deep:
+            # deep scrape mode - slower but more thorough
+            print("\nrunning DEEP github scrape (follows all links)...")
+            from scoutd.github import get_repo_contributors
+            from scoutd.signals import ECOSYSTEM_REPOS
+
+            all_logins = set()
+            for repo in ECOSYSTEM_REPOS[:5]:  # limit for deep mode
+                contributors = get_repo_contributors(repo, per_page=20)
+                for c in contributors:
+                    login = c.get('login')
+                    if login and not login.endswith('[bot]'):
+                        all_logins.add(login)
+                print(f"  {repo}: {len(contributors)} contributors")
+
+            print(f"\ndeep scraping {len(all_logins)} users...")
+            for login in all_logins:
+                try:
+                    profile = deep_scrape_github_user(login)
+                    if profile and profile.get('score', 0) > 0:
+                        save_deep_profile(db, profile)
+                        if profile['score'] >= 30:
+                            print(f"  ★ {login}: {profile['score']} pts")
+                            if profile.get('email'):
+                                print(f"      email: {profile['email']}")
+                            if profile.get('mastodon'):
+                                print(f"      mastodon: {profile['mastodon']}")
+                except Exception as e:
+                    print(f"  error on {login}: {e}")
+        else:
+            scrape_github(db)
+
+    if args.reddit or run_all:
+        scrape_reddit(db)
+
+    if args.mastodon or run_all:
+        scrape_mastodon(db)
+
+    if args.lobsters or run_all:
+        scrape_lobsters(db)
+
+    if args.matrix or run_all:
+        scrape_matrix(db)
+
+    if args.twitter or run_all:
+        from scoutd.twitter import scrape_twitter
+        scrape_twitter(db)
+
+    if args.bluesky or run_all:
+        from scoutd.bluesky import scrape_bluesky
+        scrape_bluesky(db)
+
+    if args.lemmy or run_all:
+        from scoutd.lemmy import scrape_lemmy
+        scrape_lemmy(db)
+
+    if args.discord or run_all:
+        from scoutd.discord import scrape_discord
+        scrape_discord(db)
+
+    # show stats
+    stats = db.stats()
+    print("\n" + "=" * 60)
+    print("SCOUT COMPLETE")
+    print("=" * 60)
+    print(f"total humans: {stats['total_humans']}")
+    for platform, count in stats.get('by_platform', {}).items():
+        print(f"  {platform}: {count}")
+
+    # show lost builder stats if requested
+    if args.lost or True:  # always show lost stats now
+        print("\n--- lost builder stats ---")
+        print(f"active builders: {stats.get('active_builders', 0)}")
+        print(f"lost builders: {stats.get('lost_builders', 0)}")
+        print(f"recovering builders: {stats.get('recovering_builders', 0)}")
+        print(f"high lost score (40+): {stats.get('high_lost_score', 0)}")
+        print(f"lost outreach sent: {stats.get('lost_outreach_sent', 0)}")
+
+
+def cmd_match(args, db):
+    """find and rank matches"""
+    import json as json_mod
+
+    print("=" * 60)
+    print("connectd match - finding aligned pairs")
+    print("=" * 60)
+
+    # lost builder matching
+    if args.lost:
+        print("\n--- LOST BUILDER MATCHING ---")
+        print("finding inspiring builders for lost souls...\n")
+
+        matches, error = find_matches_for_lost_builders(db, limit=args.top or 20)
+
+        if error:
+            print(f"error: {error}")
+            return
+
+        if not matches:
+            print("no lost builders ready for outreach")
+            return
+
+        print(f"found {len(matches)} lost builders with matching active builders\n")
+
+        for i, match in enumerate(matches, 1):
+            lost = match['lost_user']
+            builder = match['inspiring_builder']
+
+            lost_name = lost.get('name') or lost.get('username')
+            builder_name = builder.get('name') or builder.get('username')
+
+            print(f"{i}. {lost_name} ({lost.get('platform')}) → needs inspiration from")
+            print(f"   {builder_name} ({builder.get('platform')})")
+            print(f"   lost score: {lost.get('lost_potential_score', 0)} | values: {lost.get('score', 0)}")
+            print(f"   shared interests: {', '.join(match.get('shared_interests', []))}")
+            print(f"   builder has: {match.get('builder_repos', 0)} repos, {match.get('builder_stars', 0)} stars")
+            print()
+
+        return
+
+    if args.mine:
+        # show matches for priority user
+        init_users_table(db.conn)
+        users = get_priority_users(db.conn)
+        if not users:
+            print("no priority user configured. run: connectd user --setup")
+            return
+
+        for user in users:
+            print(f"\n=== matches for {user['name']} ===\n")
+            matches = get_priority_user_matches(db.conn, user['id'], limit=args.top or 20)
+
+            if not matches:
+                print("no matches yet - run: connectd scout && connectd match")
+                continue
+
+            for i, match in enumerate(matches, 1):
+                print(f"{i}. {match['username']} ({match['platform']})")
+                print(f"   score: {match['overlap_score']:.0f}")
+                print(f"   url: {match['url']}")
+                reasons = match.get('overlap_reasons', '[]')
+                if isinstance(reasons, str):
+                    reasons = json_mod.loads(reasons)
+                if reasons:
+                    print(f"   why: {reasons[0]}")
+                print()
+        return
+
+    if args.top and not args.mine:
+        # just show existing top matches
+        matches = get_top_matches(db, limit=args.top)
+    else:
+        # run full matching
+        matches = find_all_matches(db, min_score=args.min_score, min_overlap=args.min_overlap)
+
+    print("\n" + "-" * 60)
+    print(f"TOP {min(len(matches), args.top or 20)} MATCHES")
+    print("-" * 60)
+
+    for i, match in enumerate(matches[:args.top or 20], 1):
+        human_a = match.get('human_a', {})
+        human_b = match.get('human_b', {})
+
+        print(f"\n{i}. {human_a.get('username')} <-> {human_b.get('username')}")
+        print(f"   platforms: {human_a.get('platform')} / {human_b.get('platform')}")
+        print(f"   overlap: {match.get('overlap_score', 0):.0f} pts")
+
+        reasons = match.get('overlap_reasons', [])
+        if isinstance(reasons, str):
+            reasons = json_mod.loads(reasons)
+        if reasons:
+            print(f"   why: {' | '.join(reasons[:3])}")
+
+        if match.get('geographic_match'):
+            print(f"   location: compatible ✓")
+
+
+def cmd_intro(args, db):
+    """generate intro drafts"""
+    import json as json_mod
+
+    print("=" * 60)
+    print("connectd intro - drafting introductions")
+    print("=" * 60)
+
+    if args.dry_run:
+        print("*** DRY RUN MODE - previewing only ***\n")
+
+    # lost builder intros - different tone entirely
+    if args.lost:
+        print("\n--- LOST BUILDER INTROS ---")
+        print("drafting encouragement for lost souls...\n")
+
+        matches, error = find_matches_for_lost_builders(db, limit=args.limit or 10)
+
+        if error:
+            print(f"error: {error}")
+            return
+
+        if not matches:
+            print("no lost builders ready for outreach")
+            return
+
+        config = get_lost_intro_config()
+        count = 0
+
+        for match in matches:
+            lost = match['lost_user']
+            builder = match['inspiring_builder']
+
+            lost_name = lost.get('name') or lost.get('username')
+            builder_name = builder.get('name') or builder.get('username')
+
+            # draft intro
+            draft, error = draft_lost_intro(lost, builder, config)
+
+            if error:
+                print(f"  error drafting intro for {lost_name}: {error}")
+                continue
+
+            if args.dry_run:
+                print("=" * 60)
+                print(f"TO: {lost_name} ({lost.get('platform')})")
+                print(f"LOST SCORE: {lost.get('lost_potential_score', 0)}")
+                print(f"INSPIRING: {builder_name} ({builder.get('url')})")
+                print("-" * 60)
+                print("MESSAGE:")
+                print(draft)
+                print("-" * 60)
+                print("[DRY RUN - NOT SAVED]")
+                print("=" * 60)
+            else:
+                print(f"  drafted intro for {lost_name} → {builder_name}")
+
+            count += 1
+
+        if args.dry_run:
+            print(f"\npreviewed {count} lost builder intros (dry run)")
+        else:
+            print(f"\ndrafted {count} lost builder intros")
+            print("these require manual review before sending")
+
+        return
+
+    if args.match:
+        # specific match
+        matches = [m for m in get_top_matches(db, limit=1000) if m.get('id') == args.match]
+    else:
+        # top matches
+        matches = get_top_matches(db, limit=args.limit or 10)
+
+    if not matches:
+        print("no matches found")
+        return
+
+    print(f"generating intros for {len(matches)} matches...")
+
+    count = 0
+    for match in matches:
+        intros = draft_intros_for_match(match)
+
+        for intro in intros:
+            recipient = intro['recipient_human']
+            other = intro['other_human']
+
+            if args.dry_run:
+                # get contact info
+                contact = recipient.get('contact', {})
+                if isinstance(contact, str):
+                    contact = json_mod.loads(contact)
+                email = contact.get('email', 'no email')
+
+                # get overlap reasons
+                reasons = match.get('overlap_reasons', [])
+                if isinstance(reasons, str):
+                    reasons = json_mod.loads(reasons)
+                reason_summary = ', '.join(reasons[:3]) if reasons else 'aligned values'
+
+                # print preview
+                print("\n" + "=" * 60)
+                print(f"TO: {recipient.get('username')} ({recipient.get('platform')})")
+                print(f"EMAIL: {email}")
+                print(f"SUBJECT: you might want to meet {other.get('username')}")
+                print(f"SCORE: {match.get('overlap_score', 0):.0f} ({reason_summary})")
+                print("-" * 60)
+                print("MESSAGE:")
+                print(intro['draft'])
+                print("-" * 60)
+                print("[DRY RUN - NOT SENT]")
+                print("=" * 60)
+            else:
+                print(f"\n  {recipient.get('username')} ({intro['channel']})")
+
+                # save to db
+                db.save_intro(
+                    match.get('id'),
+                    recipient.get('id'),
+                    intro['channel'],
+                    intro['draft']
+                )
+
+            count += 1
+
+    if args.dry_run:
+        print(f"\npreviewed {count} intros (dry run - nothing saved)")
+    else:
+        print(f"\ngenerated {count} intro drafts")
+        print("run 'connectd review' to approve before sending")
+
+
+def cmd_review(args, db):
+    """interactive review queue"""
+    review_all_pending(db)
+
+
+def cmd_send(args, db):
+    """send approved intros"""
+    import json as json_mod
+
+    if args.export:
+        # export manual queue to file for review
+        queue = load_manual_queue()
+        pending = [q for q in queue if q.get('status') == 'pending']
+
+        with open(args.export, 'w') as f:
+            json.dump(pending, f, indent=2)
+
+        print(f"exported {len(pending)} pending intros to {args.export}")
+        return
+
+    # send all approved from manual queue
+    queue = load_manual_queue()
+    approved = [q for q in queue if q.get('status') == 'approved']
+
+    if not approved:
+        print("no approved intros to send")
+        print("use 'connectd review' to approve intros first")
+        return
+
+    print(f"sending {len(approved)} approved intros...")
+
+    for item in approved:
+        match_data = item.get('match', {})
+        intro_draft = item.get('draft', '')
+        recipient = item.get('recipient', {})
+
+        success, error, method = deliver_intro(
+            {'human_b': recipient, **match_data},
+            intro_draft,
+            dry_run=args.dry_run if hasattr(args, 'dry_run') else False
+        )
+
+        status = 'ok' if success else f'failed: {error}'
+        print(f"  {recipient.get('username')}: {method} - {status}")
+
+        # update queue status
+        item['status'] = 'sent' if success else 'failed'
+        item['error'] = error
+
+    save_manual_queue(queue)
+
+    # show stats
+    stats = get_delivery_stats()
+    print(f"\ndelivery stats: {stats['sent']} sent, {stats['failed']} failed")
+
+
+def cmd_lost(args, db):
+    """show lost builders ready for outreach"""
+    import json as json_mod
+
+    print("=" * 60)
+    print("connectd lost - lost builders who need encouragement")
+    print("=" * 60)
+
+    # get lost builders
+    lost_builders = db.get_lost_builders_for_outreach(
+        min_lost_score=args.min_score or 40,
+        min_values_score=20,
+        limit=args.limit or 50
+    )
+
+    if not lost_builders:
+        print("\nno lost builders ready for outreach")
+        print("run 'connectd scout' to discover more")
+        return
+
+    print(f"\n{len(lost_builders)} lost builders ready for outreach:\n")
+
+    for i, lost in enumerate(lost_builders, 1):
+        name = lost.get('name') or lost.get('username')
+        platform = lost.get('platform')
+        lost_score = lost.get('lost_potential_score', 0)
+        values_score = lost.get('score', 0)
+
+        # parse lost signals
+        lost_signals = lost.get('lost_signals', [])
+        if isinstance(lost_signals, str):
+            lost_signals = json_mod.loads(lost_signals) if lost_signals else []
+
+        # get signal descriptions
+        signal_descriptions = get_signal_descriptions(lost_signals)
+
+        print(f"{i}. {name} ({platform})")
+        print(f"   lost score: {lost_score} | values score: {values_score}")
+        print(f"   url: {lost.get('url')}")
+        if signal_descriptions:
+            print(f"   why lost: {', '.join(signal_descriptions[:3])}")
+        print()
+
+    if args.verbose:
+        print("-" * 60)
+        print("these people need encouragement, not networking.")
+        print("the goal: show them someone like them made it.")
+        print("-" * 60)
+
+
+def cmd_status(args, db):
+    """show database stats"""
+    import json as json_mod
+
+    init_users_table(db.conn)
+    stats = db.stats()
+
+    print("=" * 60)
+    print("connectd status")
+    print("=" * 60)
+
+    # priority users
+    users = get_priority_users(db.conn)
+    print(f"\npriority users: {len(users)}")
+    for user in users:
+        print(f"  - {user['name']} ({user['email']})")
+
+    print(f"\nhumans discovered: {stats['total_humans']}")
+    print(f"  high-score (50+): {stats['high_score_humans']}")
+
+    print("\nby platform:")
+    for platform, count in stats.get('by_platform', {}).items():
+        print(f"  {platform}: {count}")
+
+    print(f"\nstranger matches: {stats['total_matches']}")
+    print(f"intros created: {stats['total_intros']}")
+    print(f"intros sent: {stats['sent_intros']}")
+
+    # lost builder stats
+    print("\n--- lost builder stats ---")
+    print(f"active builders: {stats.get('active_builders', 0)}")
+    print(f"lost builders: {stats.get('lost_builders', 0)}")
+    print(f"recovering builders: {stats.get('recovering_builders', 0)}")
+    print(f"high lost score (40+): {stats.get('high_lost_score', 0)}")
+    print(f"lost outreach sent: {stats.get('lost_outreach_sent', 0)}")
+
+    # priority user matches
+    for user in users:
+        matches = get_priority_user_matches(db.conn, user['id'])
+        print(f"\nmatches for {user['name']}: {len(matches)}")
+
+    # pending intros
+    pending = get_pending_intros(db)
+    print(f"\nintros pending review: {len(pending)}")
+
+
+def cmd_daemon(args, db):
+    """run as continuous daemon"""
+    from daemon import ConnectDaemon
+
+    daemon = ConnectDaemon(dry_run=args.dry_run)
+
+    if args.oneshot:
+        print("running one cycle...")
+        if args.dry_run:
+            print("*** DRY RUN MODE - no intros will be sent ***")
+        daemon.scout_cycle()
+        daemon.match_priority_users()
+        daemon.match_strangers()
+        daemon.send_stranger_intros()
+        print("done")
+    else:
+        daemon.run()
+
+
+def cmd_user(args, db):
+    """manage priority user profile"""
+    import json as json_mod
+
+    init_users_table(db.conn)
+
+    if args.setup:
+        # interactive setup
+        print("=" * 60)
+        print("connectd priority user setup")
+        print("=" * 60)
+        print("\nlink your profiles so connectd finds matches for YOU\n")
+
+        name = input("name: ").strip()
+        email = input("email: ").strip()
+        github = input("github username: ").strip() or None
+        reddit = input("reddit username: ").strip() or None
+        mastodon = input("mastodon (user@instance): ").strip() or None
+        location = input("location (e.g. seattle): ").strip() or None
+
+        print("\ninterests (comma separated):")
+        interests_raw = input("> ").strip()
+        interests = [i.strip() for i in interests_raw.split(',')] if interests_raw else []
+
+        looking_for = input("looking for: ").strip() or None
+
+        user_data = {
+            'name': name, 'email': email, 'github': github,
+            'reddit': reddit, 'mastodon': mastodon,
+            'location': location, 'interests': interests,
+            'looking_for': looking_for,
+        }
+        user_id = add_priority_user(db.conn, user_data)
+        print(f"\n✓ added as priority user #{user_id}")
+
+    elif args.matches:
+        # show matches
+        users = get_priority_users(db.conn)
+        if not users:
+            print("no priority user. run: connectd user --setup")
+            return
+
+        for user in users:
+            print(f"\n=== matches for {user['name']} ===\n")
+            matches = get_priority_user_matches(db.conn, user['id'], limit=20)
+
+            if not matches:
+                print("no matches yet")
+                continue
+
+            for i, match in enumerate(matches, 1):
+                print(f"{i}. {match['username']} ({match['platform']})")
+                print(f"   {match['url']}")
+                print(f"   score: {match['overlap_score']:.0f}")
+                print()
+
+    else:
+        # show profile
+        users = get_priority_users(db.conn)
+        if not users:
+            print("no priority user configured")
+            print("run: connectd user --setup")
+            return
+
+        for user in users:
+            print("=" * 60)
+            print(f"priority user #{user['id']}: {user['name']}")
+            print("=" * 60)
+            print(f"email: {user['email']}")
+            if user['github']:
+                print(f"github: {user['github']}")
+            if user['reddit']:
+                print(f"reddit: {user['reddit']}")
+            if user['mastodon']:
+                print(f"mastodon: {user['mastodon']}")
+            if user['location']:
+                print(f"location: {user['location']}")
+            if user['interests']:
+                interests = json_mod.loads(user['interests']) if isinstance(user['interests'], str) else user['interests']
+                print(f"interests: {', '.join(interests)}")
+            if user['looking_for']:
+                print(f"looking for: {user['looking_for']}")
+
+
+def cmd_me(args, db):
+    """auto-score and auto-match for priority user with optional groq intros"""
+    import json as json_mod
+
+    init_users_table(db.conn)
+
+    # get priority user
+    users = get_priority_users(db.conn)
+    if not users:
+        print("no priority user configured")
+        print("run: connectd user --setup")
+        return
+
+    user = users[0]  # first/main user
+    print("=" * 60)
+    print(f"connectd me - {user['name']}")
+    print("=" * 60)
+
+    # step 1: scrape github profile
+    if user.get('github') and not args.skip_scrape:
+        print(f"\n[1/4] scraping github profile: {user['github']}")
+        profile = deep_scrape_github_user(user['github'], scrape_commits=False)
+        if profile:
+            print(f"  repos: {len(profile.get('top_repos', []))}")
+            print(f"  languages: {', '.join(list(profile.get('languages', {}).keys())[:5])}")
+        else:
+            print("  failed to scrape (rate limited?)")
+            profile = None
+    else:
+        print("\n[1/4] skipping github scrape (using saved profile)")
+        # use saved profile if available
+        saved = user.get('scraped_profile')
+        if saved:
+            profile = json_mod.loads(saved) if isinstance(saved, str) else saved
+            print(f"  loaded saved profile: {len(profile.get('top_repos', []))} repos")
+        else:
+            profile = None
+
+    # step 2: calculate score
+    print(f"\n[2/4] calculating your score...")
+    result = score_priority_user(db.conn, user['id'], profile)
+    if result:
+        print(f"  score: {result['score']}")
+        print(f"  signals: {', '.join(sorted(result['signals'])[:10])}")
+
+    # step 3: find matches
+    print(f"\n[3/4] finding matches...")
+    matches = auto_match_priority_user(db.conn, user['id'], min_overlap=args.min_overlap)
+    print(f"  found {len(matches)} matches")
+
+    # step 4: show results (optionally with groq intros)
+    print(f"\n[4/4] top matches:")
+    print("-" * 60)
+
+    limit = args.limit or 10
+    for i, m in enumerate(matches[:limit], 1):
+        human = m['human']
+        shared = m['shared']
+
+        print(f"\n{i}. {human.get('name') or human['username']} ({human['platform']})")
+        print(f"   {human.get('url', '')}")
+        print(f"   score: {human.get('score', 0):.0f} | overlap: {m['overlap_score']:.0f}")
+        print(f"   location: {human.get('location') or 'unknown'}")
+        print(f"   why: {', '.join(shared[:5])}")
+
+        # groq intro draft
+        if args.groq:
+            try:
+                from introd.groq_draft import draft_intro_with_llm
+                match_data = {
+                    'human_a': {'name': user['name'], 'username': user.get('github'),
+                                'platform': 'github', 'signals': result.get('signals', []) if result else [],
+                                'bio': user.get('bio'), 'location': user.get('location'),
+                                'extra': profile or {}},
+                    'human_b': human,
+                    'overlap_score': m['overlap_score'],
+                    'overlap_reasons': shared,
+                }
+                intro, err = draft_intro_with_llm(match_data, recipient='b')
+                if intro:
+                    print(f"\n   --- groq draft ({intro.get('contact_method', 'manual')}) ---")
+                    if intro.get('contact_info'):
+                        print(f"   deliver via: {intro['contact_info']}")
+                    for line in intro['draft'].split('\n'):
+                        print(f"   {line}")
+                    print(f"   ------------------")
+                elif err:
+                    print(f"   [groq error: {err}]")
+            except Exception as e:
+                print(f"   [groq error: {e}]")
+
+    # summary
+    print("\n" + "=" * 60)
+    print(f"your score: {result['score'] if result else 'unknown'}")
+    print(f"matches found: {len(matches)}")
+    if args.groq:
+        print("groq intros: enabled")
+    else:
+        print("tip: add --groq to generate ai intro drafts")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='connectd - people discovery and matchmaking daemon',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__
+    )
+
+    subparsers = parser.add_subparsers(dest='command', help='commands')
+
+    # scout command
+    scout_parser = subparsers.add_parser('scout', help='discover aligned humans')
+    scout_parser.add_argument('--github', action='store_true', help='github only')
+    scout_parser.add_argument('--reddit', action='store_true', help='reddit only')
+    scout_parser.add_argument('--mastodon', action='store_true', help='mastodon only')
+    scout_parser.add_argument('--lobsters', action='store_true', help='lobste.rs only')
+    scout_parser.add_argument('--matrix', action='store_true', help='matrix only')
+    scout_parser.add_argument('--twitter', action='store_true', help='twitter/x via nitter')
+    scout_parser.add_argument('--bluesky', action='store_true', help='bluesky/atproto')
+    scout_parser.add_argument('--lemmy', action='store_true', help='lemmy (fediverse reddit)')
+    scout_parser.add_argument('--discord', action='store_true', help='discord servers')
+    scout_parser.add_argument('--deep', action='store_true', help='deep scrape - follow all links')
+    scout_parser.add_argument('--user', type=str, help='deep scrape specific github user')
+    scout_parser.add_argument('--lost', action='store_true', help='show lost builder stats')
+
+    # match command
+    match_parser = subparsers.add_parser('match', help='find and rank matches')
+    match_parser.add_argument('--top', type=int, help='show top N matches')
+    match_parser.add_argument('--mine', action='store_true', help='show YOUR matches')
+    match_parser.add_argument('--lost', action='store_true', help='find matches for lost builders')
+    match_parser.add_argument('--min-score', type=int, default=30, help='min human score')
+    match_parser.add_argument('--min-overlap', type=int, default=20, help='min overlap score')
+
+    # intro command
+    intro_parser = subparsers.add_parser('intro', help='generate intro drafts')
+    intro_parser.add_argument('--match', type=int, help='specific match id')
+    intro_parser.add_argument('--limit', type=int, default=10, help='number of matches')
+    intro_parser.add_argument('--dry-run', action='store_true', help='preview only, do not save')
+    intro_parser.add_argument('--lost', action='store_true', help='generate intros for lost builders')
+
+    # lost command - show lost builders ready for outreach
+    lost_parser = subparsers.add_parser('lost', help='show lost builders who need encouragement')
+    lost_parser.add_argument('--min-score', type=int, default=40, help='min lost score')
+    lost_parser.add_argument('--limit', type=int, default=50, help='max results')
+    lost_parser.add_argument('--verbose', '-v', action='store_true', help='show philosophy')
+
+    # review command
+    review_parser = subparsers.add_parser('review', help='review intro queue')
+
+    # send command
+    send_parser = subparsers.add_parser('send', help='send approved intros')
+    send_parser.add_argument('--export', type=str, help='export to file for manual sending')
+
+    # status command
+    status_parser = subparsers.add_parser('status', help='show stats')
+
+    # daemon command
+    daemon_parser = subparsers.add_parser('daemon', help='run as continuous daemon')
+    daemon_parser.add_argument('--oneshot', action='store_true', help='run once then exit')
+    daemon_parser.add_argument('--dry-run', action='store_true', help='preview intros, do not send')
+
+    # user command
+    user_parser = subparsers.add_parser('user', help='manage priority user profile')
+    user_parser.add_argument('--setup', action='store_true', help='setup/update profile')
+    user_parser.add_argument('--matches', action='store_true', help='show your matches')
+
+    # me command - auto score + match + optional groq intros
+    me_parser = subparsers.add_parser('me', help='auto-score and match yourself')
+    me_parser.add_argument('--groq', action='store_true', help='generate groq llama intro drafts')
+    me_parser.add_argument('--skip-scrape', action='store_true', help='skip github scraping')
+    me_parser.add_argument('--min-overlap', type=int, default=40, help='min overlap score')
+    me_parser.add_argument('--limit', type=int, default=10, help='number of matches to show')
+
+    args = parser.parse_args()
+
+    if not args.command:
+        parser.print_help()
+        return
+
+    # init database
+    db = Database()
+
+    try:
+        if args.command == 'scout':
+            cmd_scout(args, db)
+        elif args.command == 'match':
+            cmd_match(args, db)
+        elif args.command == 'intro':
+            cmd_intro(args, db)
+        elif args.command == 'review':
+            cmd_review(args, db)
+        elif args.command == 'send':
+            cmd_send(args, db)
+        elif args.command == 'status':
+            cmd_status(args, db)
+        elif args.command == 'daemon':
+            cmd_daemon(args, db)
+        elif args.command == 'user':
+            cmd_user(args, db)
+        elif args.command == 'me':
+            cmd_me(args, db)
+        elif args.command == 'lost':
+            cmd_lost(args, db)
+    finally:
+        db.close()
+
+
+if __name__ == '__main__':
+    main()
--- a/connectd/config.py
+++ b/connectd/config.py
@ -0,0 +1,124 @@
+"""
+connectd/config.py - central configuration
+
+all configurable settings in one place.
+"""
+
+import os
+from pathlib import Path
+
+# base paths
+BASE_DIR = Path(__file__).parent
+DB_DIR = BASE_DIR / 'db'
+DATA_DIR = BASE_DIR / 'data'
+CACHE_DIR = DB_DIR / 'cache'
+
+# ensure directories exist
+DATA_DIR.mkdir(exist_ok=True)
+CACHE_DIR.mkdir(exist_ok=True)
+
+
+# === DAEMON CONFIG ===
+SCOUT_INTERVAL = 3600 * 4       # full scout every 4 hours
+MATCH_INTERVAL = 3600           # check matches every hour
+INTRO_INTERVAL = 3600 * 2       # send intros every 2 hours
+MAX_INTROS_PER_DAY = 20         # rate limit builder-to-builder outreach
+
+
+# === MATCHING CONFIG ===
+MIN_OVERLAP_PRIORITY = 30       # min score for priority user matches
+MIN_OVERLAP_STRANGERS = 50      # higher bar for stranger intros
+MIN_HUMAN_SCORE = 25            # min values score to be considered
+
+
+# === LOST BUILDER CONFIG ===
+# these people need encouragement, not networking.
+# the goal isn't to recruit them - it's to show them the door exists.
+
+LOST_CONFIG = {
+    # detection thresholds
+    'min_lost_score': 40,           # minimum lost_potential_score
+    'min_values_score': 20,         # must have SOME values alignment
+
+    # outreach settings
+    'enabled': True,
+    'max_per_day': 5,               # lower volume, higher care
+    'require_review': False,        # fully autonomous
+    'cooldown_days': 90,            # don't spam struggling people
+
+    # matching settings
+    'min_builder_score': 50,        # inspiring builders must be active
+    'min_match_overlap': 10,        # must have SOME shared interests
+
+    # LLM drafting
+    'use_llm': True,
+    'llm_temperature': 0.7,         # be genuine, not robotic
+
+    # message guidelines (for LLM prompt)
+    'tone': 'genuine, not salesy',
+    'max_words': 150,               # they don't have energy for long messages
+    'no_pressure': True,            # never pushy
+    'sign_off': '- connectd',
+}
+
+
+# === API CREDENTIALS ===
+# all credentials from environment variables - no defaults
+
+GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
+GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
+GROQ_MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
+
+GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
+MASTODON_TOKEN = os.environ.get('MASTODON_TOKEN', '')
+MASTODON_INSTANCE = os.environ.get('MASTODON_INSTANCE', '')
+
+BLUESKY_HANDLE = os.environ.get('BLUESKY_HANDLE', '')
+BLUESKY_APP_PASSWORD = os.environ.get('BLUESKY_APP_PASSWORD', '')
+
+MATRIX_HOMESERVER = os.environ.get('MATRIX_HOMESERVER', '')
+MATRIX_USER_ID = os.environ.get('MATRIX_USER_ID', '')
+MATRIX_ACCESS_TOKEN = os.environ.get('MATRIX_ACCESS_TOKEN', '')
+
+DISCORD_BOT_TOKEN = os.environ.get('DISCORD_BOT_TOKEN', '')
+DISCORD_TARGET_SERVERS = os.environ.get('DISCORD_TARGET_SERVERS', '')
+
+# lemmy (for authenticated access to private instance)
+LEMMY_INSTANCE = os.environ.get('LEMMY_INSTANCE', '')
+LEMMY_USERNAME = os.environ.get('LEMMY_USERNAME', '')
+LEMMY_PASSWORD = os.environ.get('LEMMY_PASSWORD', '')
+
+# email (for sending intros)
+SMTP_HOST = os.environ.get('SMTP_HOST', '')
+SMTP_PORT = int(os.environ.get('SMTP_PORT', '465'))
+SMTP_USER = os.environ.get('SMTP_USER', '')
+SMTP_PASS = os.environ.get('SMTP_PASS', '')
+
+# === HOST USER CONFIG ===
+# the person running connectd - gets priority matching
+HOST_USER = os.environ.get('HOST_USER', '')  # alias like sudoxnym
+HOST_NAME = os.environ.get('HOST_NAME', '')
+HOST_EMAIL = os.environ.get('HOST_EMAIL', '')
+HOST_GITHUB = os.environ.get('HOST_GITHUB', '')
+HOST_MASTODON = os.environ.get('HOST_MASTODON', '')  # user@instance
+HOST_REDDIT = os.environ.get('HOST_REDDIT', '')
+HOST_LEMMY = os.environ.get('HOST_LEMMY', '')  # user@instance
+HOST_LOBSTERS = os.environ.get('HOST_LOBSTERS', '')
+HOST_MATRIX = os.environ.get('HOST_MATRIX', '')  # @user:server
+HOST_DISCORD = os.environ.get('HOST_DISCORD', '')  # user id
+HOST_BLUESKY = os.environ.get('HOST_BLUESKY', '')  # handle.bsky.social
+HOST_LOCATION = os.environ.get('HOST_LOCATION', '')
+HOST_INTERESTS = os.environ.get('HOST_INTERESTS', '')  # comma separated
+HOST_LOOKING_FOR = os.environ.get('HOST_LOOKING_FOR', '')
+
+
+def get_lost_config():
+    """get lost builder configuration"""
+    return LOST_CONFIG.copy()
+
+
+def update_lost_config(updates):
+    """update lost builder configuration"""
+    global LOST_CONFIG
+    LOST_CONFIG.update(updates)
+    return LOST_CONFIG.copy()
--- a/connectd/config.yaml
+++ b/connectd/config.yaml
@ -0,0 +1,72 @@
+name: connectd
+version: "1.1.0"
+slug: connectd
+description: "find isolated builders with aligned values. auto-discover humans on github, mastodon, lemmy, discord, and more."
+url: "https://github.com/sudoxnym/connectd"
+arch:
+  - amd64
+  - aarch64
+  - armv7
+startup: application
+boot: auto
+ports:
+  8099/tcp: 8099
+ports_description:
+  8099/tcp: "connectd API (for HACS integration)"
+map:
+  - config:rw
+options:
+  host_user: ""
+  host_name: ""
+  host_email: ""
+  host_mastodon: ""
+  host_reddit: ""
+  host_lemmy: ""
+  host_lobsters: ""
+  host_matrix: ""
+  host_discord: ""
+  host_bluesky: ""
+  host_location: ""
+  host_interests: ""
+  host_looking_for: ""
+  github_token: ""
+  groq_api_key: ""
+  mastodon_token: ""
+  mastodon_instance: ""
+  discord_bot_token: ""
+  discord_target_servers: ""
+  lemmy_instance: ""
+  lemmy_username: ""
+  lemmy_password: ""
+  smtp_host: ""
+  smtp_port: 465
+  smtp_user: ""
+  smtp_pass: ""
+schema:
+  host_user: str?
+  host_name: str?
+  host_email: email?
+  host_mastodon: str?
+  host_reddit: str?
+  host_lemmy: str?
+  host_lobsters: str?
+  host_matrix: str?
+  host_discord: str?
+  host_bluesky: str?
+  host_location: str?
+  host_interests: str?
+  host_looking_for: str?
+  github_token: str?
+  groq_api_key: str?
+  mastodon_token: str?
+  mastodon_instance: str?
+  discord_bot_token: str?
+  discord_target_servers: str?
+  lemmy_instance: str?
+  lemmy_username: str?
+  lemmy_password: str?
+  smtp_host: str?
+  smtp_port: int?
+  smtp_user: str?
+  smtp_pass: str?
+image: sudoxreboot/connectd-addon-{arch}
--- a/connectd/daemon.py
+++ b/connectd/daemon.py
@ -0,0 +1,546 @@
+#!/usr/bin/env python3
+"""
+connectd daemon - continuous discovery and matchmaking
+
+two modes of operation:
+1. priority matching: find matches FOR hosts who run connectd
+2. altruistic matching: connect strangers to each other
+
+runs continuously, respects rate limits, sends intros automatically
+"""
+
+import time
+import json
+import signal
+import sys
+from datetime import datetime, timedelta
+from pathlib import Path
+
+from db import Database
+from db.users import (init_users_table, get_priority_users, save_priority_match,
+                      get_priority_user_matches, discover_host_user)
+from scoutd import scrape_github, scrape_reddit, scrape_mastodon, scrape_lobsters, scrape_lemmy, scrape_discord
+from config import HOST_USER
+from scoutd.github import analyze_github_user, get_github_user
+from scoutd.signals import analyze_text
+from matchd.fingerprint import generate_fingerprint, fingerprint_similarity
+from matchd.overlap import find_overlap
+from matchd.lost import find_matches_for_lost_builders
+from introd.draft import draft_intro, summarize_human, summarize_overlap
+from introd.lost_intro import draft_lost_intro, get_lost_intro_config
+from introd.send import send_email
+from introd.deliver import deliver_intro, determine_best_contact
+from config import get_lost_config
+from api import start_api_thread, update_daemon_state
+
+# daemon config
+SCOUT_INTERVAL = 3600 * 4      # full scout every 4 hours
+MATCH_INTERVAL = 3600          # check matches every hour
+INTRO_INTERVAL = 3600 * 2      # send intros every 2 hours
+LOST_INTERVAL = 3600 * 6       # lost builder outreach every 6 hours (lower volume)
+MAX_INTROS_PER_DAY = 20        # rate limit outreach
+MIN_OVERLAP_PRIORITY = 30      # min score for priority user matches
+MIN_OVERLAP_STRANGERS = 50     # higher bar for stranger intros
+
+
+class ConnectDaemon:
+    def __init__(self, dry_run=False):
+        self.db = Database()
+        init_users_table(self.db.conn)
+        self.running = True
+        self.dry_run = dry_run
+        self.started_at = datetime.now()
+        self.last_scout = None
+        self.last_match = None
+        self.last_intro = None
+        self.last_lost = None
+        self.intros_today = 0
+        self.lost_intros_today = 0
+        self.today = datetime.now().date()
+
+        # handle shutdown gracefully
+        signal.signal(signal.SIGINT, self._shutdown)
+        signal.signal(signal.SIGTERM, self._shutdown)
+
+        # auto-discover host user from env
+        if HOST_USER:
+            self.log(f"HOST_USER set: {HOST_USER}")
+            discover_host_user(self.db.conn, HOST_USER)
+
+        # update API state
+        self._update_api_state()
+
+    def _shutdown(self, signum, frame):
+        print("\nconnectd: shutting down...")
+        self.running = False
+        self._update_api_state()
+
+    def _update_api_state(self):
+        """update API state for HA integration"""
+        now = datetime.now()
+
+        # calculate countdowns - if no cycle has run, use started_at
+        def secs_until(last, interval):
+            base = last if last else self.started_at
+            next_run = base + timedelta(seconds=interval)
+            remaining = (next_run - now).total_seconds()
+            return max(0, int(remaining))
+
+        update_daemon_state({
+            'running': self.running,
+            'dry_run': self.dry_run,
+            'last_scout': self.last_scout.isoformat() if self.last_scout else None,
+            'last_match': self.last_match.isoformat() if self.last_match else None,
+            'last_intro': self.last_intro.isoformat() if self.last_intro else None,
+            'last_lost': self.last_lost.isoformat() if self.last_lost else None,
+            'intros_today': self.intros_today,
+            'lost_intros_today': self.lost_intros_today,
+            'started_at': self.started_at.isoformat(),
+            'countdown_scout': secs_until(self.last_scout, SCOUT_INTERVAL),
+            'countdown_match': secs_until(self.last_match, MATCH_INTERVAL),
+            'countdown_intro': secs_until(self.last_intro, INTRO_INTERVAL),
+            'countdown_lost': secs_until(self.last_lost, LOST_INTERVAL),
+        })
+
+    def log(self, msg):
+        """timestamped log"""
+        print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}")
+
+    def reset_daily_limits(self):
+        """reset daily intro count"""
+        if datetime.now().date() != self.today:
+            self.today = datetime.now().date()
+            self.intros_today = 0
+            self.lost_intros_today = 0
+            self.log("reset daily intro limits")
+
+    def scout_cycle(self):
+        """run discovery on all platforms"""
+        self.log("starting scout cycle...")
+
+        try:
+            scrape_github(self.db, limit_per_source=30)
+        except Exception as e:
+            self.log(f"github scout error: {e}")
+
+        try:
+            scrape_reddit(self.db, limit_per_sub=30)
+        except Exception as e:
+            self.log(f"reddit scout error: {e}")
+
+        try:
+            scrape_mastodon(self.db, limit_per_instance=30)
+        except Exception as e:
+            self.log(f"mastodon scout error: {e}")
+
+        try:
+            scrape_lobsters(self.db)
+        except Exception as e:
+            self.log(f"lobsters scout error: {e}")
+
+        try:
+            scrape_lemmy(self.db, limit_per_community=30)
+        except Exception as e:
+            self.log(f"lemmy scout error: {e}")
+
+        try:
+            scrape_discord(self.db, limit_per_channel=50)
+        except Exception as e:
+            self.log(f"discord scout error: {e}")
+
+        self.last_scout = datetime.now()
+        stats = self.db.stats()
+        self.log(f"scout complete: {stats['total_humans']} humans in db")
+
+    def match_priority_users(self):
+        """find matches for priority users (hosts)"""
+        priority_users = get_priority_users(self.db.conn)
+
+        if not priority_users:
+            return
+
+        self.log(f"matching for {len(priority_users)} priority users...")
+
+        humans = self.db.get_all_humans(min_score=20, limit=500)
+
+        for puser in priority_users:
+            # build priority user's fingerprint from their linked profiles
+            puser_signals = []
+            puser_text = []
+
+            if puser.get('bio'):
+                puser_text.append(puser['bio'])
+            if puser.get('interests'):
+                interests = json.loads(puser['interests']) if isinstance(puser['interests'], str) else puser['interests']
+                puser_signals.extend(interests)
+            if puser.get('looking_for'):
+                puser_text.append(puser['looking_for'])
+
+            # analyze their linked github if available
+            if puser.get('github'):
+                gh_user = analyze_github_user(puser['github'])
+                if gh_user:
+                    puser_signals.extend(gh_user.get('signals', []))
+
+            puser_fingerprint = {
+                'values_vector': {},
+                'skills': {},
+                'interests': list(set(puser_signals)),
+                'location_pref': 'pnw' if puser.get('location') and 'seattle' in puser['location'].lower() else None,
+            }
+
+            # score text
+            if puser_text:
+                _, text_signals, _ = analyze_text(' '.join(puser_text))
+                puser_signals.extend(text_signals)
+
+            # find matches
+            matches_found = 0
+            for human in humans:
+                # skip if it's their own profile on another platform
+                human_user = human.get('username', '').lower()
+                if puser.get('github') and human_user == puser['github'].lower():
+                    continue
+                if puser.get('reddit') and human_user == puser['reddit'].lower():
+                    continue
+                if puser.get('mastodon') and human_user == puser['mastodon'].lower().split('@')[0]:
+                    continue
+
+                # calculate overlap
+                human_signals = human.get('signals', [])
+                if isinstance(human_signals, str):
+                    human_signals = json.loads(human_signals)
+
+                shared = set(puser_signals) & set(human_signals)
+                overlap_score = len(shared) * 10
+
+                # location bonus
+                if puser.get('location') and human.get('location'):
+                    if 'seattle' in human['location'].lower() or 'pnw' in human['location'].lower():
+                        overlap_score += 20
+
+                if overlap_score >= MIN_OVERLAP_PRIORITY:
+                    overlap_data = {
+                        'overlap_score': overlap_score,
+                        'overlap_reasons': [f"shared: {', '.join(list(shared)[:5])}"] if shared else [],
+                    }
+                    save_priority_match(self.db.conn, puser['id'], human['id'], overlap_data)
+                    matches_found += 1
+
+            if matches_found:
+                self.log(f"  found {matches_found} matches for {puser['name'] or puser['email']}")
+
+    def match_strangers(self):
+        """find matches between discovered humans (altruistic)"""
+        self.log("matching strangers...")
+
+        humans = self.db.get_all_humans(min_score=40, limit=200)
+
+        if len(humans) < 2:
+            return
+
+        # generate fingerprints
+        fingerprints = {}
+        for human in humans:
+            fp = generate_fingerprint(human)
+            fingerprints[human['id']] = fp
+
+        # find pairs
+        matches_found = 0
+        from itertools import combinations
+
+        for human_a, human_b in combinations(humans, 2):
+            # skip same platform same user
+            if human_a['platform'] == human_b['platform']:
+                if human_a['username'] == human_b['username']:
+                    continue
+
+            fp_a = fingerprints.get(human_a['id'])
+            fp_b = fingerprints.get(human_b['id'])
+
+            overlap = find_overlap(human_a, human_b, fp_a, fp_b)
+
+            if overlap['overlap_score'] >= MIN_OVERLAP_STRANGERS:
+                # save match
+                self.db.save_match(human_a['id'], human_b['id'], overlap)
+                matches_found += 1
+
+        if matches_found:
+            self.log(f"found {matches_found} stranger matches")
+
+        self.last_match = datetime.now()
+
+    def send_stranger_intros(self):
+        """send intros to connect strangers (or preview in dry-run mode)"""
+        self.reset_daily_limits()
+
+        if not self.dry_run and self.intros_today >= MAX_INTROS_PER_DAY:
+            self.log("daily intro limit reached")
+            return
+
+        # get unsent matches
+        c = self.db.conn.cursor()
+        c.execute('''SELECT m.*,
+                            ha.id as a_id, ha.username as a_user, ha.platform as a_platform,
+                            ha.name as a_name, ha.url as a_url, ha.contact as a_contact,
+                            ha.signals as a_signals, ha.extra as a_extra,
+                            hb.id as b_id, hb.username as b_user, hb.platform as b_platform,
+                            hb.name as b_name, hb.url as b_url, hb.contact as b_contact,
+                            hb.signals as b_signals, hb.extra as b_extra
+                     FROM matches m
+                     JOIN humans ha ON m.human_a_id = ha.id
+                     JOIN humans hb ON m.human_b_id = hb.id
+                     WHERE m.status = 'pending'
+                     ORDER BY m.overlap_score DESC
+                     LIMIT 10''')
+
+        matches = c.fetchall()
+
+        if self.dry_run:
+            self.log(f"DRY RUN: previewing {len(matches)} potential intros")
+
+        for match in matches:
+            if not self.dry_run and self.intros_today >= MAX_INTROS_PER_DAY:
+                break
+
+            match = dict(match)
+
+            # build human dicts
+            human_a = {
+                'id': match['a_id'],
+                'username': match['a_user'],
+                'platform': match['a_platform'],
+                'name': match['a_name'],
+                'url': match['a_url'],
+                'contact': match['a_contact'],
+                'signals': match['a_signals'],
+                'extra': match['a_extra'],
+            }
+            human_b = {
+                'id': match['b_id'],
+                'username': match['b_user'],
+                'platform': match['b_platform'],
+                'name': match['b_name'],
+                'url': match['b_url'],
+                'contact': match['b_contact'],
+                'signals': match['b_signals'],
+                'extra': match['b_extra'],
+            }
+
+            match_data = {
+                'id': match['id'],
+                'human_a': human_a,
+                'human_b': human_b,
+                'overlap_score': match['overlap_score'],
+                'overlap_reasons': match['overlap_reasons'],
+            }
+
+            # try to send intro to person with email
+            for recipient, other in [(human_a, human_b), (human_b, human_a)]:
+                contact = recipient.get('contact', {})
+                if isinstance(contact, str):
+                    contact = json.loads(contact)
+
+                email = contact.get('email')
+                if not email:
+                    continue
+
+                # draft intro
+                intro = draft_intro(match_data, recipient='a' if recipient == human_a else 'b')
+
+                # parse overlap reasons for display
+                reasons = match['overlap_reasons']
+                if isinstance(reasons, str):
+                    reasons = json.loads(reasons)
+                reason_summary = ', '.join(reasons[:3]) if reasons else 'aligned values'
+
+                if self.dry_run:
+                    # print preview
+                    print("\n" + "=" * 60)
+                    print(f"TO: {recipient['username']} ({recipient['platform']})")
+                    print(f"EMAIL: {email}")
+                    print(f"SUBJECT: you might want to meet {other['username']}")
+                    print(f"SCORE: {match['overlap_score']:.0f} ({reason_summary})")
+                    print("-" * 60)
+                    print("MESSAGE:")
+                    print(intro['draft'])
+                    print("-" * 60)
+                    print("[DRY RUN - NOT SENT]")
+                    print("=" * 60)
+                    break
+                else:
+                    # actually send
+                    success, error = send_email(
+                        email,
+                        f"connectd: you might want to meet {other['username']}",
+                        intro['draft']
+                    )
+
+                    if success:
+                        self.log(f"sent intro to {recipient['username']} ({email})")
+                        self.intros_today += 1
+
+                        # mark match as intro_sent
+                        c.execute('UPDATE matches SET status = "intro_sent" WHERE id = ?',
+                                  (match['id'],))
+                        self.db.conn.commit()
+                        break
+                    else:
+                        self.log(f"failed to send to {email}: {error}")
+
+        self.last_intro = datetime.now()
+
+    def send_lost_builder_intros(self):
+        """
+        reach out to lost builders - different tone, lower volume.
+        these people need encouragement, not networking.
+        """
+        self.reset_daily_limits()
+
+        lost_config = get_lost_config()
+
+        if not lost_config.get('enabled', True):
+            return
+
+        max_per_day = lost_config.get('max_per_day', 5)
+        if not self.dry_run and self.lost_intros_today >= max_per_day:
+            self.log("daily lost builder intro limit reached")
+            return
+
+        # find lost builders with matching active builders
+        matches, error = find_matches_for_lost_builders(
+            self.db,
+            min_lost_score=lost_config.get('min_lost_score', 40),
+            min_values_score=lost_config.get('min_values_score', 20),
+            limit=max_per_day - self.lost_intros_today
+        )
+
+        if error:
+            self.log(f"lost builder matching error: {error}")
+            return
+
+        if not matches:
+            self.log("no lost builders ready for outreach")
+            return
+
+        if self.dry_run:
+            self.log(f"DRY RUN: previewing {len(matches)} lost builder intros")
+
+        for match in matches:
+            if not self.dry_run and self.lost_intros_today >= max_per_day:
+                break
+
+            lost = match['lost_user']
+            builder = match['inspiring_builder']
+
+            lost_name = lost.get('name') or lost.get('username')
+            builder_name = builder.get('name') or builder.get('username')
+
+            # draft intro
+            draft, draft_error = draft_lost_intro(lost, builder, lost_config)
+
+            if draft_error:
+                self.log(f"error drafting lost intro for {lost_name}: {draft_error}")
+                continue
+
+            # determine best contact method (activity-based)
+            method, contact_info = determine_best_contact(lost)
+
+            if self.dry_run:
+                print("\n" + "=" * 60)
+                print("LOST BUILDER OUTREACH")
+                print("=" * 60)
+                print(f"TO: {lost_name} ({lost.get('platform')})")
+                print(f"DELIVERY: {method} → {contact_info}")
+                print(f"LOST SCORE: {lost.get('lost_potential_score', 0)}")
+                print(f"VALUES SCORE: {lost.get('score', 0)}")
+                print(f"INSPIRING BUILDER: {builder_name}")
+                print(f"SHARED INTERESTS: {', '.join(match.get('shared_interests', []))}")
+                print("-" * 60)
+                print("MESSAGE:")
+                print(draft)
+                print("-" * 60)
+                print("[DRY RUN - NOT SENT]")
+                print("=" * 60)
+            else:
+                # build match data for unified delivery
+                match_data = {
+                    'human_a': builder,  # inspiring builder
+                    'human_b': lost,     # lost builder (recipient)
+                    'overlap_score': match.get('match_score', 0),
+                    'overlap_reasons': match.get('shared_interests', []),
+                }
+
+                success, error, delivery_method = deliver_intro(match_data, draft)
+
+                if success:
+                    self.log(f"sent lost builder intro to {lost_name} via {delivery_method}")
+                    self.lost_intros_today += 1
+                    self.db.mark_lost_outreach(lost['id'])
+                else:
+                    self.log(f"failed to reach {lost_name} via {delivery_method}: {error}")
+
+        self.last_lost = datetime.now()
+        self.log(f"lost builder cycle complete: {self.lost_intros_today} sent today")
+
+    def run(self):
+        """main daemon loop"""
+        self.log("connectd daemon starting...")
+
+        # start API server
+        start_api_thread()
+        self.log("api server started on port 8099")
+
+        if self.dry_run:
+            self.log("*** DRY RUN MODE - no intros will be sent ***")
+        self.log(f"scout interval: {SCOUT_INTERVAL}s")
+        self.log(f"match interval: {MATCH_INTERVAL}s")
+        self.log(f"intro interval: {INTRO_INTERVAL}s")
+        self.log(f"lost interval: {LOST_INTERVAL}s")
+        self.log(f"max intros/day: {MAX_INTROS_PER_DAY}")
+
+        # initial scout
+        self.scout_cycle()
+        self._update_api_state()
+
+        while self.running:
+            now = datetime.now()
+
+            # scout cycle
+            if not self.last_scout or (now - self.last_scout).seconds >= SCOUT_INTERVAL:
+                self.scout_cycle()
+                self._update_api_state()
+
+            # match cycle
+            if not self.last_match or (now - self.last_match).seconds >= MATCH_INTERVAL:
+                self.match_priority_users()
+                self.match_strangers()
+                self._update_api_state()
+
+            # intro cycle
+            if not self.last_intro or (now - self.last_intro).seconds >= INTRO_INTERVAL:
+                self.send_stranger_intros()
+                self._update_api_state()
+
+            # lost builder cycle
+            if not self.last_lost or (now - self.last_lost).seconds >= LOST_INTERVAL:
+                self.send_lost_builder_intros()
+                self._update_api_state()
+
+            # sleep between checks
+            time.sleep(60)
+
+        self.log("connectd daemon stopped")
+        self.db.close()
+
+
+def run_daemon(dry_run=False):
+    """entry point"""
+    daemon = ConnectDaemon(dry_run=dry_run)
+    daemon.run()
+
+
+if __name__ == '__main__':
+    import sys
+    dry_run = '--dry-run' in sys.argv
+    run_daemon(dry_run=dry_run)
--- a/connectd/db/init.py
+++ b/connectd/db/init.py
@ -0,0 +1,375 @@
+"""
+connectd database layer
+sqlite storage for humans, fingerprints, matches, intros
+"""
+
+import os
+import sqlite3
+import json
+from datetime import datetime
+from pathlib import Path
+
+# use env var for DB path (docker) or default to local
+DB_PATH = Path(os.environ.get('DB_PATH', Path(__file__).parent / 'connectd.db'))
+
+
+class Database:
+    def __init__(self, path=None):
+        self.path = path or DB_PATH
+        self.conn = sqlite3.connect(self.path)
+        self.conn.row_factory = sqlite3.Row
+        self._init_tables()
+
+    def _init_tables(self):
+        c = self.conn.cursor()
+
+        # humans table - all discovered people
+        c.execute('''CREATE TABLE IF NOT EXISTS humans (
+            id INTEGER PRIMARY KEY,
+            platform TEXT NOT NULL,
+            username TEXT NOT NULL,
+            url TEXT,
+            name TEXT,
+            bio TEXT,
+            location TEXT,
+            score REAL DEFAULT 0,
+            confidence REAL DEFAULT 0,
+            signals TEXT,
+            negative_signals TEXT,
+            reasons TEXT,
+            contact TEXT,
+            extra TEXT,
+            fingerprint_id INTEGER,
+            scraped_at TEXT,
+            updated_at TEXT,
+            lost_potential_score REAL DEFAULT 0,
+            lost_signals TEXT,
+            user_type TEXT DEFAULT 'none',
+            last_lost_outreach TEXT,
+            UNIQUE(platform, username)
+        )''')
+
+        # migration: add new columns if they don't exist
+        try:
+            c.execute('ALTER TABLE humans ADD COLUMN lost_potential_score REAL DEFAULT 0')
+        except sqlite3.OperationalError:
+            pass  # column exists
+
+        try:
+            c.execute('ALTER TABLE humans ADD COLUMN lost_signals TEXT')
+        except sqlite3.OperationalError:
+            pass
+
+        try:
+            c.execute('ALTER TABLE humans ADD COLUMN user_type TEXT DEFAULT "none"')
+        except sqlite3.OperationalError:
+            pass
+
+        try:
+            c.execute('ALTER TABLE humans ADD COLUMN last_lost_outreach TEXT')
+        except sqlite3.OperationalError:
+            pass
+
+        # fingerprints table - values profiles
+        c.execute('''CREATE TABLE IF NOT EXISTS fingerprints (
+            id INTEGER PRIMARY KEY,
+            human_id INTEGER,
+            values_vector TEXT,
+            skills TEXT,
+            interests TEXT,
+            location_pref TEXT,
+            availability TEXT,
+            generated_at TEXT,
+            FOREIGN KEY(human_id) REFERENCES humans(id)
+        )''')
+
+        # matches table - paired humans
+        c.execute('''CREATE TABLE IF NOT EXISTS matches (
+            id INTEGER PRIMARY KEY,
+            human_a_id INTEGER,
+            human_b_id INTEGER,
+            overlap_score REAL,
+            overlap_reasons TEXT,
+            complementary_skills TEXT,
+            geographic_match INTEGER,
+            status TEXT DEFAULT 'pending',
+            created_at TEXT,
+            reviewed_at TEXT,
+            FOREIGN KEY(human_a_id) REFERENCES humans(id),
+            FOREIGN KEY(human_b_id) REFERENCES humans(id),
+            UNIQUE(human_a_id, human_b_id)
+        )''')
+
+        # intros table - outreach attempts
+        c.execute('''CREATE TABLE IF NOT EXISTS intros (
+            id INTEGER PRIMARY KEY,
+            match_id INTEGER,
+            recipient_human_id INTEGER,
+            channel TEXT,
+            draft TEXT,
+            status TEXT DEFAULT 'draft',
+            approved_by TEXT,
+            approved_at TEXT,
+            sent_at TEXT,
+            response TEXT,
+            response_at TEXT,
+            FOREIGN KEY(match_id) REFERENCES matches(id),
+            FOREIGN KEY(recipient_human_id) REFERENCES humans(id)
+        )''')
+
+        # cross-platform links
+        c.execute('''CREATE TABLE IF NOT EXISTS cross_platform (
+            id INTEGER PRIMARY KEY,
+            human_a_id INTEGER,
+            human_b_id INTEGER,
+            confidence REAL,
+            reason TEXT,
+            FOREIGN KEY(human_a_id) REFERENCES humans(id),
+            FOREIGN KEY(human_b_id) REFERENCES humans(id),
+            UNIQUE(human_a_id, human_b_id)
+        )''')
+
+        self.conn.commit()
+
+    def save_human(self, data):
+        """save or update a human record"""
+        c = self.conn.cursor()
+
+        # fields to exclude from extra json
+        exclude_fields = ['platform', 'username', 'url', 'name', 'bio',
+                          'location', 'score', 'confidence', 'signals',
+                          'negative_signals', 'reasons', 'contact',
+                          'lost_potential_score', 'lost_signals', 'user_type']
+
+        c.execute('''INSERT OR REPLACE INTO humans
+            (platform, username, url, name, bio, location, score, confidence,
+             signals, negative_signals, reasons, contact, extra, scraped_at, updated_at,
+             lost_potential_score, lost_signals, user_type)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
+            (data.get('platform'),
+             data.get('username'),
+             data.get('url'),
+             data.get('name'),
+             data.get('bio'),
+             data.get('location'),
+             data.get('score', 0),
+             data.get('confidence', 0),
+             json.dumps(data.get('signals', [])),
+             json.dumps(data.get('negative_signals', [])),
+             json.dumps(data.get('reasons', [])),
+             json.dumps(data.get('contact', {})),
+             json.dumps({k: v for k, v in data.items() if k not in exclude_fields}),
+             data.get('scraped_at', datetime.now().isoformat()),
+             datetime.now().isoformat(),
+             data.get('lost_potential_score', 0),
+             json.dumps(data.get('lost_signals', [])),
+             data.get('user_type', 'none')))
+
+        self.conn.commit()
+        return c.lastrowid
+
+    def get_human(self, platform, username):
+        """get a human by platform and username"""
+        c = self.conn.cursor()
+        c.execute('SELECT * FROM humans WHERE platform = ? AND username = ?',
+                  (platform, username))
+        row = c.fetchone()
+        return dict(row) if row else None
+
+    def get_human_by_id(self, human_id):
+        """get a human by id"""
+        c = self.conn.cursor()
+        c.execute('SELECT * FROM humans WHERE id = ?', (human_id,))
+        row = c.fetchone()
+        return dict(row) if row else None
+
+    def get_all_humans(self, min_score=0, limit=1000):
+        """get all humans above score threshold"""
+        c = self.conn.cursor()
+        c.execute('''SELECT * FROM humans
+                     WHERE score >= ?
+                     ORDER BY score DESC, confidence DESC
+                     LIMIT ?''', (min_score, limit))
+        return [dict(row) for row in c.fetchall()]
+
+    def get_humans_by_platform(self, platform, min_score=0, limit=500):
+        """get humans for a specific platform"""
+        c = self.conn.cursor()
+        c.execute('''SELECT * FROM humans
+                     WHERE platform = ? AND score >= ?
+                     ORDER BY score DESC
+                     LIMIT ?''', (platform, min_score, limit))
+        return [dict(row) for row in c.fetchall()]
+
+    def get_lost_builders(self, min_lost_score=40, min_values_score=20, limit=100):
+        """get lost builders who need encouragement"""
+        c = self.conn.cursor()
+        c.execute('''SELECT * FROM humans
+                     WHERE user_type = 'lost' OR user_type = 'both'
+                     AND lost_potential_score >= ?
+                     AND score >= ?
+                     ORDER BY lost_potential_score DESC, score DESC
+                     LIMIT ?''', (min_lost_score, min_values_score, limit))
+        return [dict(row) for row in c.fetchall()]
+
+    def get_lost_builders_for_outreach(self, min_lost_score=40, min_values_score=20,
+                                        cooldown_days=90, limit=50):
+        """get lost builders who are ready for outreach (respecting cooldown)"""
+        c = self.conn.cursor()
+        c.execute('''SELECT * FROM humans
+                     WHERE (user_type = 'lost' OR user_type = 'both')
+                     AND lost_potential_score >= ?
+                     AND score >= ?
+                     AND (last_lost_outreach IS NULL
+                          OR datetime(last_lost_outreach) < datetime('now', '-' || ? || ' days'))
+                     ORDER BY lost_potential_score DESC, score DESC
+                     LIMIT ?''', (min_lost_score, min_values_score, cooldown_days, limit))
+        return [dict(row) for row in c.fetchall()]
+
+    def get_active_builders(self, min_score=50, limit=100):
+        """get active builders who can inspire lost builders"""
+        c = self.conn.cursor()
+        c.execute('''SELECT * FROM humans
+                     WHERE user_type = 'builder'
+                     AND score >= ?
+                     ORDER BY score DESC, confidence DESC
+                     LIMIT ?''', (min_score, limit))
+        return [dict(row) for row in c.fetchall()]
+
+    def mark_lost_outreach(self, human_id):
+        """mark that we reached out to a lost builder"""
+        c = self.conn.cursor()
+        c.execute('''UPDATE humans SET last_lost_outreach = ? WHERE id = ?''',
+                  (datetime.now().isoformat(), human_id))
+        self.conn.commit()
+
+    def save_fingerprint(self, human_id, fingerprint_data):
+        """save a fingerprint for a human"""
+        c = self.conn.cursor()
+        c.execute('''INSERT OR REPLACE INTO fingerprints
+            (human_id, values_vector, skills, interests, location_pref, availability, generated_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?)''',
+            (human_id,
+             json.dumps(fingerprint_data.get('values_vector', {})),
+             json.dumps(fingerprint_data.get('skills', [])),
+             json.dumps(fingerprint_data.get('interests', [])),
+             fingerprint_data.get('location_pref'),
+             fingerprint_data.get('availability'),
+             datetime.now().isoformat()))
+
+        # update human's fingerprint_id
+        c.execute('UPDATE humans SET fingerprint_id = ? WHERE id = ?',
+                  (c.lastrowid, human_id))
+        self.conn.commit()
+        return c.lastrowid
+
+    def get_fingerprint(self, human_id):
+        """get fingerprint for a human"""
+        c = self.conn.cursor()
+        c.execute('SELECT * FROM fingerprints WHERE human_id = ?', (human_id,))
+        row = c.fetchone()
+        return dict(row) if row else None
+
+    def save_match(self, human_a_id, human_b_id, match_data):
+        """save a match between two humans"""
+        c = self.conn.cursor()
+        c.execute('''INSERT OR REPLACE INTO matches
+            (human_a_id, human_b_id, overlap_score, overlap_reasons,
+             complementary_skills, geographic_match, status, created_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?)''',
+            (human_a_id, human_b_id,
+             match_data.get('overlap_score', 0),
+             json.dumps(match_data.get('overlap_reasons', [])),
+             json.dumps(match_data.get('complementary_skills', [])),
+             1 if match_data.get('geographic_match') else 0,
+             'pending',
+             datetime.now().isoformat()))
+        self.conn.commit()
+        return c.lastrowid
+
+    def get_matches(self, status=None, limit=100):
+        """get matches, optionally filtered by status"""
+        c = self.conn.cursor()
+        if status:
+            c.execute('''SELECT * FROM matches WHERE status = ?
+                         ORDER BY overlap_score DESC LIMIT ?''', (status, limit))
+        else:
+            c.execute('''SELECT * FROM matches
+                         ORDER BY overlap_score DESC LIMIT ?''', (limit,))
+        return [dict(row) for row in c.fetchall()]
+
+    def save_intro(self, match_id, recipient_id, channel, draft):
+        """save an intro draft"""
+        c = self.conn.cursor()
+        c.execute('''INSERT INTO intros
+            (match_id, recipient_human_id, channel, draft, status)
+            VALUES (?, ?, ?, ?, 'draft')''',
+            (match_id, recipient_id, channel, draft))
+        self.conn.commit()
+        return c.lastrowid
+
+    def get_pending_intros(self, limit=50):
+        """get intros pending approval"""
+        c = self.conn.cursor()
+        c.execute('''SELECT * FROM intros WHERE status = 'draft'
+                     ORDER BY id DESC LIMIT ?''', (limit,))
+        return [dict(row) for row in c.fetchall()]
+
+    def approve_intro(self, intro_id, approved_by='human'):
+        """approve an intro for sending"""
+        c = self.conn.cursor()
+        c.execute('''UPDATE intros SET status = 'approved',
+                     approved_by = ?, approved_at = ? WHERE id = ?''',
+                  (approved_by, datetime.now().isoformat(), intro_id))
+        self.conn.commit()
+
+    def mark_intro_sent(self, intro_id):
+        """mark an intro as sent"""
+        c = self.conn.cursor()
+        c.execute('''UPDATE intros SET status = 'sent', sent_at = ? WHERE id = ?''',
+                  (datetime.now().isoformat(), intro_id))
+        self.conn.commit()
+
+    def stats(self):
+        """get database statistics"""
+        c = self.conn.cursor()
+        stats = {}
+
+        c.execute('SELECT COUNT(*) FROM humans')
+        stats['total_humans'] = c.fetchone()[0]
+
+        c.execute('SELECT platform, COUNT(*) FROM humans GROUP BY platform')
+        stats['by_platform'] = {row[0]: row[1] for row in c.fetchall()}
+
+        c.execute('SELECT COUNT(*) FROM humans WHERE score >= 50')
+        stats['high_score_humans'] = c.fetchone()[0]
+
+        c.execute('SELECT COUNT(*) FROM matches')
+        stats['total_matches'] = c.fetchone()[0]
+
+        c.execute('SELECT COUNT(*) FROM intros')
+        stats['total_intros'] = c.fetchone()[0]
+
+        c.execute('SELECT COUNT(*) FROM intros WHERE status = "sent"')
+        stats['sent_intros'] = c.fetchone()[0]
+
+        # lost builder stats
+        c.execute("SELECT COUNT(*) FROM humans WHERE user_type = 'builder'")
+        stats['active_builders'] = c.fetchone()[0]
+
+        c.execute("SELECT COUNT(*) FROM humans WHERE user_type = 'lost'")
+        stats['lost_builders'] = c.fetchone()[0]
+
+        c.execute("SELECT COUNT(*) FROM humans WHERE user_type = 'both'")
+        stats['recovering_builders'] = c.fetchone()[0]
+
+        c.execute('SELECT COUNT(*) FROM humans WHERE lost_potential_score >= 40')
+        stats['high_lost_score'] = c.fetchone()[0]
+
+        c.execute('SELECT COUNT(*) FROM humans WHERE last_lost_outreach IS NOT NULL')
+        stats['lost_outreach_sent'] = c.fetchone()[0]
+
+        return stats
+
+    def close(self):
+        self.conn.close()
--- a/connectd/db/users.py
+++ b/connectd/db/users.py
@ -0,0 +1,510 @@
+"""
+priority users - people who host connectd get direct matching
+"""
+
+import sqlite3
+import json
+from datetime import datetime
+from pathlib import Path
+
+DB_PATH = Path(__file__).parent / 'connectd.db'
+
+# map user-friendly interests to signal terms
+INTEREST_TO_SIGNALS = {
+    'self-hosting': ['selfhosted', 'home_automation'],
+    'home-assistant': ['home_automation'],
+    'intentional-community': ['community', 'cooperative'],
+    'cooperatives': ['cooperative', 'community'],
+    'solarpunk': ['solarpunk'],
+    'privacy': ['privacy', 'local_first'],
+    'local-first': ['local_first', 'privacy'],
+    'queer-friendly': ['queer'],
+    'anti-capitalism': ['cooperative', 'decentralized', 'community'],
+    'esports-venue': [],
+    'foss': ['foss'],
+    'decentralized': ['decentralized'],
+    'federated': ['federated_chat'],
+    'mesh': ['mesh'],
+}
+
+
+def init_users_table(conn):
+    """create priority users table"""
+    c = conn.cursor()
+
+    c.execute('''CREATE TABLE IF NOT EXISTS priority_users (
+        id INTEGER PRIMARY KEY,
+        name TEXT,
+        email TEXT UNIQUE,
+        github TEXT,
+        reddit TEXT,
+        mastodon TEXT,
+        lobsters TEXT,
+        matrix TEXT,
+        lemmy TEXT,
+        discord TEXT,
+        bluesky TEXT,
+        location TEXT,
+        bio TEXT,
+        interests TEXT,
+        looking_for TEXT,
+        created_at TEXT,
+        active INTEGER DEFAULT 1,
+        score REAL DEFAULT 0,
+        signals TEXT,
+        scraped_profile TEXT,
+        last_scored_at TEXT
+    )''')
+
+    # add missing columns to existing table
+    for col in ['lemmy', 'discord', 'bluesky']:
+        try:
+            c.execute(f'ALTER TABLE priority_users ADD COLUMN {col} TEXT')
+        except:
+            pass  # column already exists
+
+    # matches specifically for priority users
+    c.execute('''CREATE TABLE IF NOT EXISTS priority_matches (
+        id INTEGER PRIMARY KEY,
+        priority_user_id INTEGER,
+        matched_human_id INTEGER,
+        overlap_score REAL,
+        overlap_reasons TEXT,
+        status TEXT DEFAULT 'new',
+        notified_at TEXT,
+        viewed_at TEXT,
+        FOREIGN KEY(priority_user_id) REFERENCES priority_users(id),
+        FOREIGN KEY(matched_human_id) REFERENCES humans(id)
+    )''')
+
+    conn.commit()
+
+
+def add_priority_user(conn, user_data):
+    """add a priority user (someone hosting connectd)"""
+    c = conn.cursor()
+
+    c.execute('''INSERT OR REPLACE INTO priority_users
+        (name, email, github, reddit, mastodon, lobsters, matrix, lemmy, discord, bluesky,
+         location, bio, interests, looking_for, created_at)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
+        (user_data.get('name'),
+         user_data.get('email'),
+         user_data.get('github'),
+         user_data.get('reddit'),
+         user_data.get('mastodon'),
+         user_data.get('lobsters'),
+         user_data.get('matrix'),
+         user_data.get('lemmy'),
+         user_data.get('discord'),
+         user_data.get('bluesky'),
+         user_data.get('location'),
+         user_data.get('bio'),
+         json.dumps(user_data.get('interests', [])),
+         user_data.get('looking_for'),
+         datetime.now().isoformat()))
+
+    conn.commit()
+    return c.lastrowid
+
+
+def get_priority_users(conn):
+    """get all active priority users"""
+    c = conn.cursor()
+    c.execute('SELECT * FROM priority_users WHERE active = 1')
+    return [dict(row) for row in c.fetchall()]
+
+
+def get_priority_user(conn, user_id):
+    """get a specific priority user"""
+    c = conn.cursor()
+    c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
+    row = c.fetchone()
+    return dict(row) if row else None
+
+
+def save_priority_match(conn, priority_user_id, human_id, overlap_data):
+    """save a match for a priority user"""
+    c = conn.cursor()
+
+    c.execute('''INSERT OR IGNORE INTO priority_matches
+        (priority_user_id, matched_human_id, overlap_score, overlap_reasons, status)
+        VALUES (?, ?, ?, ?, 'new')''',
+        (priority_user_id, human_id,
+         overlap_data.get('overlap_score', 0),
+         json.dumps(overlap_data.get('overlap_reasons', []))))
+
+    conn.commit()
+    return c.lastrowid
+
+
+def get_priority_user_matches(conn, priority_user_id, status=None, limit=50):
+    """get matches for a priority user"""
+    c = conn.cursor()
+
+    if status:
+        c.execute('''SELECT pm.*, h.* FROM priority_matches pm
+                     JOIN humans h ON pm.matched_human_id = h.id
+                     WHERE pm.priority_user_id = ? AND pm.status = ?
+                     ORDER BY pm.overlap_score DESC
+                     LIMIT ?''', (priority_user_id, status, limit))
+    else:
+        c.execute('''SELECT pm.*, h.* FROM priority_matches pm
+                     JOIN humans h ON pm.matched_human_id = h.id
+                     WHERE pm.priority_user_id = ?
+                     ORDER BY pm.overlap_score DESC
+                     LIMIT ?''', (priority_user_id, limit))
+
+    return [dict(row) for row in c.fetchall()]
+
+
+def mark_match_viewed(conn, match_id):
+    """mark a priority match as viewed"""
+    c = conn.cursor()
+    c.execute('''UPDATE priority_matches SET status = 'viewed', viewed_at = ?
+                 WHERE id = ?''', (datetime.now().isoformat(), match_id))
+    conn.commit()
+
+
+def expand_interests_to_signals(interests):
+    """expand user-friendly interests to signal terms"""
+    signals = set()
+    for interest in interests:
+        interest_lower = interest.lower().strip()
+        if interest_lower in INTEREST_TO_SIGNALS:
+            signals.update(INTEREST_TO_SIGNALS[interest_lower])
+        else:
+            signals.add(interest_lower)
+
+    # always add these aligned signals for priority users
+    signals.update(['foss', 'decentralized', 'federated_chat', 'containers', 'unix', 'selfhosted'])
+    return list(signals)
+
+
+def score_priority_user(conn, user_id, scraped_profile=None):
+    """
+    calculate a score for a priority user based on:
+    - their stated interests
+    - their scraped github profile (if available)
+    - their repos and activity
+    """
+    c = conn.cursor()
+    c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
+    row = c.fetchone()
+    if not row:
+        return None
+
+    user = dict(row)
+    score = 0
+    signals = set()
+
+    # 1. score from stated interests
+    interests = user.get('interests')
+    if isinstance(interests, str):
+        interests = json.loads(interests) if interests else []
+
+    for interest in interests:
+        interest_lower = interest.lower()
+        # high-value interests
+        if 'solarpunk' in interest_lower:
+            score += 30
+            signals.add('solarpunk')
+        if 'queer' in interest_lower:
+            score += 30
+            signals.add('queer')
+        if 'cooperative' in interest_lower or 'intentional' in interest_lower:
+            score += 20
+            signals.add('cooperative')
+        if 'privacy' in interest_lower:
+            score += 10
+            signals.add('privacy')
+        if 'self-host' in interest_lower or 'selfhost' in interest_lower:
+            score += 15
+            signals.add('selfhosted')
+        if 'home-assistant' in interest_lower:
+            score += 15
+            signals.add('home_automation')
+        if 'foss' in interest_lower or 'open source' in interest_lower:
+            score += 10
+            signals.add('foss')
+
+    # 2. score from scraped profile
+    if scraped_profile:
+        # repos
+        repos = scraped_profile.get('top_repos', [])
+        if len(repos) >= 20:
+            score += 20
+        elif len(repos) >= 10:
+            score += 10
+        elif len(repos) >= 5:
+            score += 5
+
+        # languages
+        languages = scraped_profile.get('languages', {})
+        if 'Python' in languages or 'Rust' in languages:
+            score += 5
+            signals.add('modern_lang')
+
+        # topics from repos
+        topics = scraped_profile.get('topics', [])
+        for topic in topics:
+            if topic in ['self-hosted', 'home-assistant', 'privacy', 'foss']:
+                score += 10
+                signals.add(topic.replace('-', '_'))
+
+        # followers
+        followers = scraped_profile.get('followers', 0)
+        if followers >= 100:
+            score += 15
+        elif followers >= 50:
+            score += 10
+        elif followers >= 10:
+            score += 5
+
+    # 3. add expanded signals
+    expanded = expand_interests_to_signals(interests)
+    signals.update(expanded)
+
+    # update user
+    c.execute('''UPDATE priority_users
+                 SET score = ?, signals = ?, scraped_profile = ?, last_scored_at = ?
+                 WHERE id = ?''',
+              (score, json.dumps(list(signals)), json.dumps(scraped_profile) if scraped_profile else None,
+               datetime.now().isoformat(), user_id))
+    conn.commit()
+
+    return {'score': score, 'signals': list(signals)}
+
+
+def auto_match_priority_user(conn, user_id, min_overlap=40):
+    """
+    automatically find and save matches for a priority user
+    uses relationship filtering to skip already-connected people
+    """
+    from scoutd.deep import check_already_connected
+
+    c = conn.cursor()
+
+    # get user
+    c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
+    row = c.fetchone()
+    if not row:
+        return []
+
+    user = dict(row)
+
+    # get user signals
+    user_signals = set()
+    if user.get('signals'):
+        signals = json.loads(user['signals']) if isinstance(user['signals'], str) else user['signals']
+        user_signals.update(signals)
+
+    # also expand interests
+    if user.get('interests'):
+        interests = json.loads(user['interests']) if isinstance(user['interests'], str) else user['interests']
+        user_signals.update(expand_interests_to_signals(interests))
+
+    # clear old matches
+    c.execute('DELETE FROM priority_matches WHERE priority_user_id = ?', (user_id,))
+    conn.commit()
+
+    # get all humans
+    c.execute('SELECT * FROM humans WHERE score >= 25')
+    columns = [d[0] for d in c.description]
+
+    matches = []
+    for row in c.fetchall():
+        human = dict(zip(columns, row))
+
+        # skip own profiles
+        username = (human.get('username') or '').lower()
+        if user.get('github') and username == user['github'].lower():
+            continue
+        if user.get('reddit') and username == user.get('reddit', '').lower():
+            continue
+
+        # check if already connected
+        user_human = {'username': user.get('github'), 'platform': 'github', 'extra': {}}
+        connected, reason = check_already_connected(user_human, human)
+        if connected:
+            continue
+
+        # get human signals
+        human_signals = human.get('signals', [])
+        if isinstance(human_signals, str):
+            human_signals = json.loads(human_signals) if human_signals else []
+
+        # calculate overlap
+        shared = user_signals & set(human_signals)
+        overlap_score = len(shared) * 10
+
+        # high-value bonuses
+        if 'queer' in human_signals:
+            overlap_score += 40
+            shared.add('queer (rare!)')
+        if 'solarpunk' in human_signals:
+            overlap_score += 30
+            shared.add('solarpunk (rare!)')
+        if 'cooperative' in human_signals:
+            overlap_score += 20
+            shared.add('cooperative (values)')
+
+        # location bonus
+        location = (human.get('location') or '').lower()
+        user_location = (user.get('location') or '').lower()
+        if user_location and location:
+            if any(x in location for x in ['seattle', 'portland', 'pnw', 'washington', 'oregon']):
+                if 'seattle' in user_location or 'pnw' in user_location:
+                    overlap_score += 25
+                    shared.add('PNW location!')
+
+        if overlap_score >= min_overlap:
+            matches.append({
+                'human': human,
+                'overlap_score': overlap_score,
+                'shared': list(shared),
+            })
+
+    # sort and save top matches
+    matches.sort(key=lambda x: x['overlap_score'], reverse=True)
+
+    for m in matches[:50]:  # save top 50
+        save_priority_match(conn, user_id, m['human']['id'], {
+            'overlap_score': m['overlap_score'],
+            'overlap_reasons': m['shared'],
+        })
+
+    return matches
+
+
+def update_priority_user_profile(conn, user_id, profile_data):
+    """update a priority user's profile with new data"""
+    c = conn.cursor()
+
+    updates = []
+    values = []
+
+    for field in ['name', 'email', 'github', 'reddit', 'mastodon', 'lobsters',
+                  'matrix', 'lemmy', 'discord', 'bluesky', 'location', 'bio', 'looking_for']:
+        if field in profile_data and profile_data[field]:
+            updates.append(f'{field} = ?')
+            values.append(profile_data[field])
+
+    if 'interests' in profile_data:
+        updates.append('interests = ?')
+        values.append(json.dumps(profile_data['interests']))
+
+    if updates:
+        values.append(user_id)
+        c.execute(f'''UPDATE priority_users SET {', '.join(updates)} WHERE id = ?''', values)
+        conn.commit()
+
+    return True
+
+
+def discover_host_user(conn, alias):
+    """
+    auto-discover a host user by their alias (username).
+    scrapes github and discovers all connected social handles.
+    also merges in HOST_ env vars from config for manual overrides.
+
+    returns the priority user id
+    """
+    from scoutd.github import analyze_github_user
+    from config import (HOST_NAME, HOST_EMAIL, HOST_GITHUB, HOST_MASTODON,
+                        HOST_REDDIT, HOST_LEMMY, HOST_LOBSTERS, HOST_MATRIX,
+                        HOST_DISCORD, HOST_BLUESKY, HOST_LOCATION, HOST_INTERESTS, HOST_LOOKING_FOR)
+
+    print(f"connectd: discovering host user '{alias}'...")
+
+    # scrape github for full profile
+    profile = analyze_github_user(alias)
+
+    if not profile:
+        print(f"  could not find github user '{alias}'")
+        # still create from env vars if no github found
+        profile = {'name': HOST_NAME or alias, 'bio': '', 'location': HOST_LOCATION,
+                   'contact': {}, 'extra': {'handles': {}}, 'topics': [], 'signals': []}
+
+    print(f"  found: {profile.get('name')} ({alias})")
+    print(f"  score: {profile.get('score', 0)}, signals: {len(profile.get('signals', []))}")
+
+    # extract contact info
+    contact = profile.get('contact', {})
+    handles = profile.get('extra', {}).get('handles', {})
+
+    # merge in HOST_ env vars (override discovered values)
+    if HOST_MASTODON:
+        handles['mastodon'] = HOST_MASTODON
+    if HOST_REDDIT:
+        handles['reddit'] = HOST_REDDIT
+    if HOST_LEMMY:
+        handles['lemmy'] = HOST_LEMMY
+    if HOST_LOBSTERS:
+        handles['lobsters'] = HOST_LOBSTERS
+    if HOST_MATRIX:
+        handles['matrix'] = HOST_MATRIX
+    if HOST_DISCORD:
+        handles['discord'] = HOST_DISCORD
+    if HOST_BLUESKY:
+        handles['bluesky'] = HOST_BLUESKY
+
+    # check if user already exists
+    c = conn.cursor()
+    c.execute('SELECT id FROM priority_users WHERE github = ?', (alias,))
+    existing = c.fetchone()
+
+    # parse HOST_INTERESTS if provided
+    interests = profile.get('topics', [])
+    if HOST_INTERESTS:
+        interests = [i.strip() for i in HOST_INTERESTS.split(',') if i.strip()]
+
+    user_data = {
+        'name': HOST_NAME or profile.get('name') or alias,
+        'email': HOST_EMAIL or contact.get('email'),
+        'github': HOST_GITHUB or alias,
+        'reddit': handles.get('reddit'),
+        'mastodon': handles.get('mastodon') or contact.get('mastodon'),
+        'lobsters': handles.get('lobsters'),
+        'matrix': handles.get('matrix') or contact.get('matrix'),
+        'lemmy': handles.get('lemmy') or contact.get('lemmy'),
+        'discord': handles.get('discord'),
+        'bluesky': handles.get('bluesky') or contact.get('bluesky'),
+        'location': HOST_LOCATION or profile.get('location'),
+        'bio': profile.get('bio'),
+        'interests': interests,
+        'looking_for': HOST_LOOKING_FOR,
+    }
+
+    if existing:
+        # update existing user
+        user_id = existing['id']
+        update_priority_user_profile(conn, user_id, user_data)
+        print(f"  updated existing priority user (id={user_id})")
+    else:
+        # create new user
+        user_id = add_priority_user(conn, user_data)
+        print(f"  created new priority user (id={user_id})")
+
+    # score the user
+    scraped_profile = {
+        'top_repos': profile.get('extra', {}).get('top_repos', []),
+        'languages': profile.get('languages', {}),
+        'topics': profile.get('topics', []),
+        'followers': profile.get('extra', {}).get('followers', 0),
+    }
+    score_result = score_priority_user(conn, user_id, scraped_profile)
+    print(f"  scored: {score_result.get('score')}, {len(score_result.get('signals', []))} signals")
+
+    # print discovered handles
+    print(f"  discovered handles:")
+    for platform, handle in handles.items():
+        print(f"    {platform}: {handle}")
+
+    return user_id
+
+
+def get_host_user(conn):
+    """get the host user (first priority user)"""
+    users = get_priority_users(conn)
+    return users[0] if users else None
--- a/connectd/icon.png
+++ b/connectd/icon.png
--- a/connectd/introd/init.py
+++ b/connectd/introd/init.py
@ -0,0 +1,10 @@
+"""
+introd - outreach module
+drafts intros, queues for human review, sends via appropriate channel
+"""
+
+from .draft import draft_intro
+from .review import get_pending_intros, approve_intro, reject_intro
+from .send import send_intro
+
+__all__ = ['draft_intro', 'get_pending_intros', 'approve_intro', 'reject_intro', 'send_intro']
--- a/connectd/introd/deliver.py
+++ b/connectd/introd/deliver.py
@ -0,0 +1,509 @@
+"""
+introd/deliver.py - intro delivery via multiple channels
+
+supports:
+- email (smtp)
+- mastodon dm (if they allow dms)
+- bluesky dm (via AT Protocol)
+- matrix dm (creates DM room and sends message)
+- github issue (opens intro as issue on their most active repo)
+- manual queue (for review before sending)
+
+contact method is determined by ACTIVITY-BASED SELECTION:
+- picks the platform where the user is MOST ACTIVE
+- verified handles (from rel="me" links) get a bonus
+
+NOTE: reddit is NOT a delivery method - it's discovery only.
+reddit-discovered users are contacted via their external links.
+"""
+
+import os
+import json
+import smtplib
+import requests
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from datetime import datetime
+from pathlib import Path
+
+# config from env - no hardcoded credentials
+SMTP_HOST = os.environ.get('SMTP_HOST', '')
+SMTP_PORT = int(os.environ.get('SMTP_PORT', 465))
+SMTP_USER = os.environ.get('SMTP_USER', '')
+SMTP_PASS = os.environ.get('SMTP_PASS', '')
+FROM_EMAIL = os.environ.get('FROM_EMAIL', '')
+
+GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
+MASTODON_TOKEN = os.environ.get('MASTODON_TOKEN', '')
+MASTODON_INSTANCE = os.environ.get('MASTODON_INSTANCE', '')
+BLUESKY_HANDLE = os.environ.get('BLUESKY_HANDLE', '')
+BLUESKY_APP_PASSWORD = os.environ.get('BLUESKY_APP_PASSWORD', '')
+MATRIX_HOMESERVER = os.environ.get('MATRIX_HOMESERVER', '')
+MATRIX_USER_ID = os.environ.get('MATRIX_USER_ID', '')
+MATRIX_ACCESS_TOKEN = os.environ.get('MATRIX_ACCESS_TOKEN', '')
+
+# delivery log
+DELIVERY_LOG = Path(__file__).parent.parent / 'data' / 'delivery_log.json'
+MANUAL_QUEUE = Path(__file__).parent.parent / 'data' / 'manual_queue.json'
+
+
+def load_delivery_log():
+    """load delivery history"""
+    if DELIVERY_LOG.exists():
+        return json.loads(DELIVERY_LOG.read_text())
+    return {'sent': [], 'failed': [], 'queued': []}
+
+
+def save_delivery_log(log):
+    """save delivery history"""
+    DELIVERY_LOG.parent.mkdir(parents=True, exist_ok=True)
+    DELIVERY_LOG.write_text(json.dumps(log, indent=2))
+
+
+def load_manual_queue():
+    """load manual review queue"""
+    if MANUAL_QUEUE.exists():
+        return json.loads(MANUAL_QUEUE.read_text())
+    return []
+
+
+def save_manual_queue(queue):
+    """save manual review queue"""
+    MANUAL_QUEUE.parent.mkdir(parents=True, exist_ok=True)
+    MANUAL_QUEUE.write_text(json.dumps(queue, indent=2))
+
+
+def already_contacted(recipient_id):
+    """check if we've already sent an intro to this person"""
+    log = load_delivery_log()
+    sent_ids = [s.get('recipient_id') for s in log.get('sent', [])]
+    return recipient_id in sent_ids
+
+
+def send_email(to_email, subject, body, dry_run=False):
+    """send email via smtp"""
+    if dry_run:
+        print(f"  [dry run] would email {to_email}")
+        print(f"    subject: {subject}")
+        print(f"    body preview: {body[:100]}...")
+        return True, "dry run"
+
+    try:
+        msg = MIMEMultipart('alternative')
+        msg['Subject'] = subject
+        msg['From'] = FROM_EMAIL
+        msg['To'] = to_email
+
+        # plain text
+        text_part = MIMEText(body, 'plain')
+        msg.attach(text_part)
+
+        # html version (simple)
+        html_body = body.replace('\n', '<br>')
+        html_part = MIMEText(f"<html><body><p>{html_body}</p></body></html>", 'html')
+        msg.attach(html_part)
+
+        with smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) as server:
+            server.login(SMTP_USER, SMTP_PASS)
+            server.sendmail(SMTP_USER, to_email, msg.as_string())
+
+        return True, None
+    except Exception as e:
+        return False, str(e)
+
+
+def create_github_issue(owner, repo, title, body, dry_run=False):
+    """create github issue as intro"""
+    if not GITHUB_TOKEN:
+        return False, "GITHUB_TOKEN not set"
+
+    if dry_run:
+        print(f"  [dry run] would create issue on {owner}/{repo}")
+        print(f"    title: {title}")
+        return True, "dry run"
+
+    try:
+        url = f"https://api.github.com/repos/{owner}/{repo}/issues"
+        resp = requests.post(
+            url,
+            headers={
+                'Authorization': f'token {GITHUB_TOKEN}',
+                'Accept': 'application/vnd.github.v3+json',
+            },
+            json={
+                'title': title,
+                'body': body,
+                'labels': ['introduction', 'community'],
+            },
+            timeout=30,
+        )
+
+        if resp.status_code == 201:
+            issue_url = resp.json().get('html_url')
+            return True, issue_url
+        else:
+            return False, f"github api error: {resp.status_code} - {resp.text}"
+    except Exception as e:
+        return False, str(e)
+
+
+def send_mastodon_dm(recipient_acct, message, dry_run=False):
+    """send mastodon direct message"""
+    if not MASTODON_TOKEN:
+        return False, "MASTODON_TOKEN not set"
+
+    if dry_run:
+        print(f"  [dry run] would DM {recipient_acct}")
+        print(f"    message preview: {message[:100]}...")
+        return True, "dry run"
+
+    try:
+        # post as direct message (visibility: direct, mention recipient)
+        url = f"https://{MASTODON_INSTANCE}/api/v1/statuses"
+        resp = requests.post(
+            url,
+            headers={
+                'Authorization': f'Bearer {MASTODON_TOKEN}',
+                'Content-Type': 'application/json',
+            },
+            json={
+                'status': f"@{recipient_acct} {message}",
+                'visibility': 'direct',
+            },
+            timeout=30,
+        )
+
+        if resp.status_code in [200, 201]:
+            return True, resp.json().get('url')
+        else:
+            return False, f"mastodon api error: {resp.status_code} - {resp.text}"
+    except Exception as e:
+        return False, str(e)
+
+
+def send_bluesky_dm(recipient_handle, message, dry_run=False):
+    """send bluesky direct message via AT Protocol"""
+    if not BLUESKY_APP_PASSWORD:
+        return False, "BLUESKY_APP_PASSWORD not set"
+
+    if dry_run:
+        print(f"  [dry run] would DM {recipient_handle} on bluesky")
+        print(f"    message preview: {message[:100]}...")
+        return True, "dry run"
+
+    try:
+        # authenticate with bluesky
+        auth_url = "https://bsky.social/xrpc/com.atproto.server.createSession"
+        auth_resp = requests.post(
+            auth_url,
+            json={
+                'identifier': BLUESKY_HANDLE,
+                'password': BLUESKY_APP_PASSWORD,
+            },
+            timeout=30,
+        )
+
+        if auth_resp.status_code != 200:
+            return False, f"bluesky auth failed: {auth_resp.status_code}"
+
+        auth_data = auth_resp.json()
+        access_token = auth_data.get('accessJwt')
+        did = auth_data.get('did')
+
+        # resolve recipient DID
+        resolve_url = f"https://bsky.social/xrpc/com.atproto.identity.resolveHandle"
+        resolve_resp = requests.get(
+            resolve_url,
+            params={'handle': recipient_handle.lstrip('@')},
+            timeout=30,
+        )
+
+        if resolve_resp.status_code != 200:
+            return False, f"couldn't resolve handle {recipient_handle}"
+
+        recipient_did = resolve_resp.json().get('did')
+
+        # create chat/DM (using convo namespace)
+        # first get or create conversation
+        convo_url = "https://bsky.social/xrpc/chat.bsky.convo.getConvoForMembers"
+        convo_resp = requests.get(
+            convo_url,
+            headers={'Authorization': f'Bearer {access_token}'},
+            params={'members': [recipient_did]},
+            timeout=30,
+        )
+
+        if convo_resp.status_code != 200:
+            # try creating conversation
+            return False, f"couldn't get/create conversation: {convo_resp.status_code}"
+
+        convo_id = convo_resp.json().get('convo', {}).get('id')
+
+        # send message
+        msg_url = "https://bsky.social/xrpc/chat.bsky.convo.sendMessage"
+        msg_resp = requests.post(
+            msg_url,
+            headers={
+                'Authorization': f'Bearer {access_token}',
+                'Content-Type': 'application/json',
+            },
+            json={
+                'convoId': convo_id,
+                'message': {'text': message},
+            },
+            timeout=30,
+        )
+
+        if msg_resp.status_code in [200, 201]:
+            return True, f"sent to {recipient_handle}"
+        else:
+            return False, f"bluesky dm failed: {msg_resp.status_code} - {msg_resp.text}"
+
+    except Exception as e:
+        return False, str(e)
+
+
+def send_matrix_dm(recipient_mxid, message, dry_run=False):
+    """send matrix direct message"""
+    if not MATRIX_ACCESS_TOKEN:
+        return False, "MATRIX_ACCESS_TOKEN not set"
+
+    if dry_run:
+        print(f"  [dry run] would DM {recipient_mxid} on matrix")
+        print(f"    message preview: {message[:100]}...")
+        return True, "dry run"
+
+    try:
+        # create or get direct room with recipient
+        # first, check if we already have a DM room
+        headers = {'Authorization': f'Bearer {MATRIX_ACCESS_TOKEN}'}
+
+        # create a new DM room
+        create_room_resp = requests.post(
+            f'{MATRIX_HOMESERVER}/_matrix/client/v3/createRoom',
+            headers=headers,
+            json={
+                'is_direct': True,
+                'invite': [recipient_mxid],
+                'preset': 'trusted_private_chat',
+            },
+            timeout=30,
+        )
+
+        if create_room_resp.status_code not in [200, 201]:
+            return False, f"matrix room creation failed: {create_room_resp.status_code} - {create_room_resp.text}"
+
+        room_id = create_room_resp.json().get('room_id')
+
+        # send message to room
+        import time
+        txn_id = str(int(time.time() * 1000))
+
+        msg_resp = requests.put(
+            f'{MATRIX_HOMESERVER}/_matrix/client/v3/rooms/{room_id}/send/m.room.message/{txn_id}',
+            headers=headers,
+            json={
+                'msgtype': 'm.text',
+                'body': message,
+            },
+            timeout=30,
+        )
+
+        if msg_resp.status_code in [200, 201]:
+            return True, f"sent to {recipient_mxid} in {room_id}"
+        else:
+            return False, f"matrix send failed: {msg_resp.status_code} - {msg_resp.text}"
+
+    except Exception as e:
+        return False, str(e)
+
+
+def add_to_manual_queue(intro_data):
+    """add intro to manual review queue"""
+    queue = load_manual_queue()
+    queue.append({
+        **intro_data,
+        'queued_at': datetime.now().isoformat(),
+        'status': 'pending',
+    })
+    save_manual_queue(queue)
+    return True
+
+
+def determine_best_contact(human):
+    """
+    determine best contact method based on WHERE THEY'RE MOST ACTIVE
+
+    uses activity-based selection from groq_draft module
+    """
+    from introd.groq_draft import determine_contact_method as activity_based_contact
+
+    method, info = activity_based_contact(human)
+
+    # convert github_issue info to dict format for delivery
+    if method == 'github_issue' and isinstance(info, str) and '/' in info:
+        parts = info.split('/', 1)
+        return method, {'owner': parts[0], 'repo': parts[1]}
+
+    return method, info
+
+
+def deliver_intro(match_data, intro_draft, dry_run=False):
+    """
+    deliver an intro via the best available method
+
+    match_data: {human_a, human_b, overlap_score, overlap_reasons}
+    intro_draft: the text to send (from groq)
+    """
+    recipient = match_data.get('human_b', {})
+    recipient_id = f"{recipient.get('platform')}:{recipient.get('username')}"
+
+    # check if already contacted
+    if already_contacted(recipient_id):
+        return False, "already contacted", None
+
+    # determine contact method
+    method, contact_info = determine_best_contact(recipient)
+
+    log = load_delivery_log()
+    result = {
+        'recipient_id': recipient_id,
+        'recipient_name': recipient.get('name') or recipient.get('username'),
+        'method': method,
+        'contact_info': contact_info,
+        'overlap_score': match_data.get('overlap_score'),
+        'timestamp': datetime.now().isoformat(),
+    }
+
+    success = False
+    error = None
+
+    if method == 'email':
+        subject = f"someone you might want to know - connectd"
+        success, error = send_email(contact_info, subject, intro_draft, dry_run)
+
+    elif method == 'mastodon':
+        success, error = send_mastodon_dm(contact_info, intro_draft, dry_run)
+
+    elif method == 'bluesky':
+        success, error = send_bluesky_dm(contact_info, intro_draft, dry_run)
+
+    elif method == 'matrix':
+        success, error = send_matrix_dm(contact_info, intro_draft, dry_run)
+
+    elif method == 'discord':
+        from scoutd.discord import send_discord_dm
+        success, error = send_discord_dm(contact_info, intro_draft, dry_run)
+
+    elif method == 'lemmy':
+        from scoutd.lemmy import send_lemmy_dm
+        success, error = send_lemmy_dm(contact_info, intro_draft, dry_run)
+
+    elif method == 'github_issue':
+        owner = contact_info.get('owner')
+        repo = contact_info.get('repo')
+        title = "community introduction from connectd"
+        # format for github
+        github_body = f"""hey {recipient.get('name') or recipient.get('username')},
+
+{intro_draft}
+
+---
+*this is an automated introduction from [connectd](https://github.com/connectd-daemon), a daemon that finds isolated builders with aligned values and connects them. if this feels spammy, i apologize - you can close this issue and we won't reach out again.*
+"""
+        success, error = create_github_issue(owner, repo, title, github_body, dry_run)
+
+    elif method == 'manual':
+        # add to review queue
+        add_to_manual_queue({
+            'match': match_data,
+            'draft': intro_draft,
+            'recipient': recipient,
+        })
+        success = True
+        error = "added to manual queue"
+
+    # log result
+    result['success'] = success
+    result['error'] = error
+
+    if success:
+        log['sent'].append(result)
+    else:
+        log['failed'].append(result)
+
+    save_delivery_log(log)
+
+    return success, error, method
+
+
+def deliver_batch(matches_with_intros, dry_run=False):
+    """
+    deliver intros for a batch of matches
+
+    matches_with_intros: list of {match_data, intro_draft}
+    """
+    results = []
+
+    for item in matches_with_intros:
+        match_data = item.get('match_data') or item.get('match')
+        intro_draft = item.get('intro_draft') or item.get('draft')
+
+        if not match_data or not intro_draft:
+            continue
+
+        success, error, method = deliver_intro(match_data, intro_draft, dry_run)
+        results.append({
+            'recipient': match_data.get('human_b', {}).get('username'),
+            'method': method,
+            'success': success,
+            'error': error,
+        })
+
+        print(f"  {match_data.get('human_b', {}).get('username')}: {method} - {'ok' if success else error}")
+
+    return results
+
+
+def get_delivery_stats():
+    """get delivery statistics"""
+    log = load_delivery_log()
+    queue = load_manual_queue()
+
+    return {
+        'sent': len(log.get('sent', [])),
+        'failed': len(log.get('failed', [])),
+        'queued': len(log.get('queued', [])),
+        'manual_pending': len([q for q in queue if q.get('status') == 'pending']),
+        'by_method': {
+            'email': len([s for s in log.get('sent', []) if s.get('method') == 'email']),
+            'mastodon': len([s for s in log.get('sent', []) if s.get('method') == 'mastodon']),
+            'github_issue': len([s for s in log.get('sent', []) if s.get('method') == 'github_issue']),
+            'manual': len([s for s in log.get('sent', []) if s.get('method') == 'manual']),
+        },
+    }
+
+
+def review_manual_queue():
+    """review and process manual queue"""
+    queue = load_manual_queue()
+    pending = [q for q in queue if q.get('status') == 'pending']
+
+    if not pending:
+        print("no items in manual queue")
+        return
+
+    print(f"\n{len(pending)} items pending review:\n")
+
+    for i, item in enumerate(pending, 1):
+        recipient = item.get('recipient', {})
+        match = item.get('match', {})
+
+        print(f"[{i}] {recipient.get('name') or recipient.get('username')}")
+        print(f"    platform: {recipient.get('platform')}")
+        print(f"    url: {recipient.get('url')}")
+        print(f"    overlap: {match.get('overlap_score')}")
+        print(f"    draft preview: {item.get('draft', '')[:80]}...")
+        print()
+
+    return pending
--- a/connectd/introd/draft.py
+++ b/connectd/introd/draft.py
@ -0,0 +1,210 @@
+"""
+introd/draft.py - AI writes intro messages referencing both parties' work
+"""
+
+import json
+
+# intro template - transparent about being AI, neutral third party
+INTRO_TEMPLATE = """hi {recipient_name},
+
+i'm an AI that connects isolated builders working on similar things.
+
+you're building: {recipient_summary}
+
+{other_name} is building: {other_summary}
+
+overlap: {overlap_summary}
+
+thought you might benefit from knowing each other.
+
+their work: {other_url}
+
+no pitch. just connection. ignore if not useful.
+
+- connectd
+"""
+
+# shorter version for platforms with character limits
+SHORT_TEMPLATE = """hi {recipient_name} - i'm an AI connecting aligned builders.
+
+you: {recipient_summary}
+{other_name}: {other_summary}
+
+overlap: {overlap_summary}
+
+their work: {other_url}
+
+no pitch, just connection.
+"""
+
+
+def summarize_human(human_data):
+    """generate a brief summary of what someone is building/interested in"""
+    parts = []
+
+    # name or username
+    name = human_data.get('name') or human_data.get('username', 'unknown')
+
+    # platform context
+    platform = human_data.get('platform', '')
+
+    # signals/interests
+    signals = human_data.get('signals', [])
+    if isinstance(signals, str):
+        signals = json.loads(signals)
+
+    # extra data
+    extra = human_data.get('extra', {})
+    if isinstance(extra, str):
+        extra = json.loads(extra)
+
+    # build summary based on available data
+    topics = extra.get('topics', [])
+    languages = list(extra.get('languages', {}).keys())[:3]
+    repo_count = extra.get('repo_count', 0)
+    subreddits = extra.get('subreddits', [])
+
+    if platform == 'github':
+        if topics:
+            parts.append(f"working on {', '.join(topics[:3])}")
+        if languages:
+            parts.append(f"using {', '.join(languages)}")
+        if repo_count > 10:
+            parts.append(f"({repo_count} repos)")
+
+    elif platform == 'reddit':
+        if subreddits:
+            parts.append(f"active in r/{', r/'.join(subreddits[:3])}")
+
+    elif platform == 'mastodon':
+        instance = extra.get('instance', '')
+        if instance:
+            parts.append(f"on {instance}")
+
+    elif platform == 'lobsters':
+        karma = extra.get('karma', 0)
+        if karma > 50:
+            parts.append(f"active on lobste.rs ({karma} karma)")
+
+    # add key signals
+    key_signals = [s for s in signals if s in ['selfhosted', 'privacy', 'cooperative',
+                                                 'solarpunk', 'intentional_community',
+                                                 'home_automation', 'foss']]
+    if key_signals:
+        parts.append(f"interested in {', '.join(key_signals[:3])}")
+
+    if not parts:
+        parts.append(f"builder on {platform}")
+
+    return ' | '.join(parts)
+
+
+def summarize_overlap(overlap_data):
+    """generate overlap summary"""
+    reasons = overlap_data.get('overlap_reasons', [])
+    if isinstance(reasons, str):
+        reasons = json.loads(reasons)
+
+    if reasons:
+        return ' | '.join(reasons[:3])
+
+    # fallback
+    shared = overlap_data.get('shared_signals', [])
+    if shared:
+        return f"shared interests: {', '.join(shared[:3])}"
+
+    return "aligned values and interests"
+
+
+def draft_intro(match_data, recipient='a'):
+    """
+    draft an intro message for a match
+
+    match_data: dict with human_a, human_b, overlap info
+    recipient: 'a' or 'b' - who receives this intro
+
+    returns: dict with draft text, channel, metadata
+    """
+    if recipient == 'a':
+        recipient_human = match_data['human_a']
+        other_human = match_data['human_b']
+    else:
+        recipient_human = match_data['human_b']
+        other_human = match_data['human_a']
+
+    # get names
+    recipient_name = recipient_human.get('name') or recipient_human.get('username', 'friend')
+    other_name = other_human.get('name') or other_human.get('username', 'someone')
+
+    # generate summaries
+    recipient_summary = summarize_human(recipient_human)
+    other_summary = summarize_human(other_human)
+    overlap_summary = summarize_overlap(match_data)
+
+    # other's url
+    other_url = other_human.get('url', '')
+
+    # determine best channel
+    contact = recipient_human.get('contact', {})
+    if isinstance(contact, str):
+        contact = json.loads(contact)
+
+    channel = None
+    channel_address = None
+
+    # prefer email if available
+    if contact.get('email'):
+        channel = 'email'
+        channel_address = contact['email']
+    # github issue/discussion
+    elif recipient_human.get('platform') == 'github':
+        channel = 'github'
+        channel_address = recipient_human.get('url')
+    # mastodon DM
+    elif recipient_human.get('platform') == 'mastodon':
+        channel = 'mastodon'
+        channel_address = recipient_human.get('username')
+    # reddit message
+    elif recipient_human.get('platform') == 'reddit':
+        channel = 'reddit'
+        channel_address = recipient_human.get('username')
+    else:
+        channel = 'manual'
+        channel_address = recipient_human.get('url')
+
+    # choose template based on channel
+    if channel in ['mastodon', 'reddit']:
+        template = SHORT_TEMPLATE
+    else:
+        template = INTRO_TEMPLATE
+
+    # render draft
+    draft = template.format(
+        recipient_name=recipient_name.split()[0] if recipient_name else 'friend',  # first name only
+        recipient_summary=recipient_summary,
+        other_name=other_name.split()[0] if other_name else 'someone',
+        other_summary=other_summary,
+        overlap_summary=overlap_summary,
+        other_url=other_url,
+    )
+
+    return {
+        'recipient_human': recipient_human,
+        'other_human': other_human,
+        'channel': channel,
+        'channel_address': channel_address,
+        'draft': draft,
+        'overlap_score': match_data.get('overlap_score', 0),
+        'match_id': match_data.get('id'),
+    }
+
+
+def draft_intros_for_match(match_data):
+    """
+    draft intros for both parties in a match
+    returns list of two intro dicts
+    """
+    intro_a = draft_intro(match_data, recipient='a')
+    intro_b = draft_intro(match_data, recipient='b')
+
+    return [intro_a, intro_b]
--- a/connectd/introd/groq_draft.py
+++ b/connectd/introd/groq_draft.py
@ -0,0 +1,437 @@
+"""
+introd/groq_draft.py - groq llama 4 maverick for smart intro drafting
+
+uses groq api to generate personalized, natural intro messages
+that don't sound like ai-generated slop
+"""
+
+import os
+import json
+import requests
+from datetime import datetime
+
+GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
+GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
+MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
+
+
+def determine_contact_method(human):
+    """
+    determine best contact method based on WHERE THEY'RE MOST ACTIVE
+
+    don't use fixed hierarchy - analyze activity per platform:
+    - count posts/commits/activity
+    - weight by recency (last 30 days matters more)
+    - contact them where they already are
+    - fall back to email only if no social activity
+    """
+    from datetime import datetime, timedelta
+
+    extra = human.get('extra', {})
+    if isinstance(extra, str):
+        extra = json.loads(extra) if extra else {}
+
+    # handle nested extra.extra from old save format
+    if 'extra' in extra and isinstance(extra['extra'], dict):
+        extra = {**extra, **extra['extra']}
+
+    contact = human.get('contact', {})
+    if isinstance(contact, str):
+        contact = json.loads(contact) if contact else {}
+
+    # collect activity scores per platform
+    activity_scores = {}
+    now = datetime.now()
+    thirty_days_ago = now - timedelta(days=30)
+    ninety_days_ago = now - timedelta(days=90)
+
+    # github activity
+    github_username = human.get('username') if human.get('platform') == 'github' else extra.get('github')
+    if github_username:
+        github_score = 0
+        top_repos = extra.get('top_repos', [])
+
+        for repo in top_repos:
+            # recent commits weight more
+            pushed_at = repo.get('pushed_at', '')
+            if pushed_at:
+                try:
+                    push_date = datetime.fromisoformat(pushed_at.replace('Z', '+00:00')).replace(tzinfo=None)
+                    if push_date > thirty_days_ago:
+                        github_score += 10  # very recent
+                    elif push_date > ninety_days_ago:
+                        github_score += 5   # somewhat recent
+                    else:
+                        github_score += 1   # old but exists
+                except:
+                    github_score += 1
+
+            # stars indicate engagement
+            github_score += min(repo.get('stars', 0) // 10, 5)
+
+        # commit activity from deep scrape
+        commit_count = extra.get('commit_count', 0)
+        github_score += min(commit_count // 10, 20)
+
+        if github_score > 0:
+            activity_scores['github_issue'] = {
+                'score': github_score,
+                'info': f"{github_username}/{top_repos[0]['name']}" if top_repos else github_username
+            }
+
+    # mastodon activity
+    mastodon_handle = extra.get('mastodon') or contact.get('mastodon')
+    if mastodon_handle:
+        mastodon_score = 0
+        statuses_count = extra.get('mastodon_statuses', 0) or human.get('statuses_count', 0)
+
+        # high post count = active user
+        if statuses_count > 1000:
+            mastodon_score += 30
+        elif statuses_count > 500:
+            mastodon_score += 20
+        elif statuses_count > 100:
+            mastodon_score += 10
+        elif statuses_count > 0:
+            mastodon_score += 5
+
+        # platform bonus for fediverse (values-aligned)
+        mastodon_score += 10
+
+        # bonus if handle was discovered via rel="me" or similar verification
+        # (having a handle linked from their website = they want to be contacted there)
+        handles = extra.get('handles', {})
+        if handles.get('mastodon') == mastodon_handle:
+            mastodon_score += 15  # verified handle bonus
+
+        if mastodon_score > 0:
+            activity_scores['mastodon'] = {'score': mastodon_score, 'info': mastodon_handle}
+
+    # bluesky activity
+    bluesky_handle = extra.get('bluesky') or contact.get('bluesky')
+    if bluesky_handle:
+        bluesky_score = 0
+        posts_count = extra.get('bluesky_posts', 0) or human.get('posts_count', 0)
+
+        if posts_count > 500:
+            bluesky_score += 25
+        elif posts_count > 100:
+            bluesky_score += 15
+        elif posts_count > 0:
+            bluesky_score += 5
+
+        # newer platform, slightly lower weight
+        bluesky_score += 5
+
+        if bluesky_score > 0:
+            activity_scores['bluesky'] = {'score': bluesky_score, 'info': bluesky_handle}
+
+    # twitter activity
+    twitter_handle = extra.get('twitter') or contact.get('twitter')
+    if twitter_handle:
+        twitter_score = 0
+        tweets_count = extra.get('twitter_tweets', 0)
+
+        if tweets_count > 1000:
+            twitter_score += 20
+        elif tweets_count > 100:
+            twitter_score += 10
+        elif tweets_count > 0:
+            twitter_score += 5
+
+        # if we found them via twitter hashtags, they're active there
+        if human.get('platform') == 'twitter':
+            twitter_score += 15
+
+        if twitter_score > 0:
+            activity_scores['twitter'] = {'score': twitter_score, 'info': twitter_handle}
+
+    # NOTE: reddit is DISCOVERY ONLY, not a contact method
+    # we find users on reddit but reach out via their external links (github, mastodon, etc.)
+    # reddit-only users go to manual_queue for review
+
+    # lobsters activity
+    lobsters_username = extra.get('lobsters') or contact.get('lobsters')
+    if lobsters_username or human.get('platform') == 'lobsters':
+        lobsters_score = 0
+        lobsters_username = lobsters_username or human.get('username')
+
+        karma = extra.get('lobsters_karma', 0) or human.get('karma', 0)
+
+        # lobsters is invite-only, high signal
+        lobsters_score += 15
+
+        if karma > 100:
+            lobsters_score += 15
+        elif karma > 50:
+            lobsters_score += 10
+        elif karma > 0:
+            lobsters_score += 5
+
+        if lobsters_score > 0:
+            activity_scores['lobsters'] = {'score': lobsters_score, 'info': lobsters_username}
+
+    # matrix activity
+    matrix_id = extra.get('matrix') or contact.get('matrix')
+    if matrix_id:
+        matrix_score = 0
+
+        # matrix users are typically privacy-conscious and technical
+        matrix_score += 15  # platform bonus for decentralized chat
+
+        # bonus if handle was discovered via rel="me" verification
+        handles = extra.get('handles', {})
+        if handles.get('matrix') == matrix_id:
+            matrix_score += 10  # verified handle bonus
+
+        if matrix_score > 0:
+            activity_scores['matrix'] = {'score': matrix_score, 'info': matrix_id}
+
+    # lemmy activity (fediverse)
+    lemmy_username = human.get('username') if human.get('platform') == 'lemmy' else extra.get('lemmy')
+    if lemmy_username:
+        lemmy_score = 0
+
+        # lemmy is fediverse - high values alignment
+        lemmy_score += 20  # fediverse platform bonus
+
+        post_count = extra.get('post_count', 0)
+        comment_count = extra.get('comment_count', 0)
+
+        if post_count > 100:
+            lemmy_score += 15
+        elif post_count > 50:
+            lemmy_score += 10
+        elif post_count > 10:
+            lemmy_score += 5
+
+        if comment_count > 500:
+            lemmy_score += 10
+        elif comment_count > 100:
+            lemmy_score += 5
+
+        if lemmy_score > 0:
+            activity_scores['lemmy'] = {'score': lemmy_score, 'info': lemmy_username}
+
+    # pick highest activity platform
+    if activity_scores:
+        best_platform = max(activity_scores.items(), key=lambda x: x[1]['score'])
+        return best_platform[0], best_platform[1]['info']
+
+    # fall back to email ONLY if no social activity detected
+    email = extra.get('email') or contact.get('email')
+    # also check emails list
+    if not email:
+        emails = extra.get('emails') or contact.get('emails') or []
+        for e in emails:
+            if e and '@' in e and 'noreply' not in e.lower():
+                email = e
+                break
+
+    if email and '@' in email and 'noreply' not in email.lower():
+        return 'email', email
+
+    # last resort: manual
+    return 'manual', None
+
+
+def draft_intro_with_llm(match_data, recipient='a', dry_run=False):
+    """
+    use groq llama 4 maverick to draft a personalized intro
+
+    match_data should contain:
+    - human_a: the first person
+    - human_b: the second person
+    - overlap_score: numeric score
+    - overlap_reasons: list of why they match
+
+    recipient: 'a' or 'b' - who we're writing to
+    """
+    if not GROQ_API_KEY:
+        return None, "GROQ_API_KEY not set"
+
+    # determine recipient and other person
+    if recipient == 'a':
+        to_person = match_data.get('human_a', {})
+        other_person = match_data.get('human_b', {})
+    else:
+        to_person = match_data.get('human_b', {})
+        other_person = match_data.get('human_a', {})
+
+    # build context
+    to_name = to_person.get('name') or to_person.get('username', 'friend')
+    other_name = other_person.get('name') or other_person.get('username', 'someone')
+
+    to_signals = to_person.get('signals', [])
+    if isinstance(to_signals, str):
+        to_signals = json.loads(to_signals) if to_signals else []
+
+    other_signals = other_person.get('signals', [])
+    if isinstance(other_signals, str):
+        other_signals = json.loads(other_signals) if other_signals else []
+
+    overlap_reasons = match_data.get('overlap_reasons', [])
+    if isinstance(overlap_reasons, str):
+        overlap_reasons = json.loads(overlap_reasons) if overlap_reasons else []
+
+    # parse extra data
+    to_extra = to_person.get('extra', {})
+    other_extra = other_person.get('extra', {})
+    if isinstance(to_extra, str):
+        to_extra = json.loads(to_extra) if to_extra else {}
+    if isinstance(other_extra, str):
+        other_extra = json.loads(other_extra) if other_extra else {}
+
+    # build profile summaries
+    to_profile = f"""
+name: {to_name}
+platform: {to_person.get('platform', 'unknown')}
+bio: {to_person.get('bio') or 'no bio'}
+location: {to_person.get('location') or 'unknown'}
+signals: {', '.join(to_signals[:8])}
+repos: {len(to_extra.get('top_repos', []))} public repos
+languages: {', '.join(to_extra.get('languages', {}).keys())}
+"""
+
+    other_profile = f"""
+name: {other_name}
+platform: {other_person.get('platform', 'unknown')}
+bio: {other_person.get('bio') or 'no bio'}
+location: {other_person.get('location') or 'unknown'}
+signals: {', '.join(other_signals[:8])}
+repos: {len(other_extra.get('top_repos', []))} public repos
+languages: {', '.join(other_extra.get('languages', {}).keys())}
+url: {other_person.get('url', '')}
+"""
+
+    # build prompt
+    system_prompt = """you are connectd, an ai that connects isolated builders who share values but don't know each other yet.
+
+your job is to write a short, genuine intro message to one person about another person they might want to know.
+
+rules:
+- be brief (3-5 sentences max)
+- be genuine, not salesy or fake
+- focus on WHY they might want to connect, not just WHAT they have in common
+- don't be cringe or use buzzwords
+- lowercase preferred (casual tone)
+- no emojis unless the person's profile suggests they'd like them
+- mention specific things from their profiles, not generic "you both like open source"
+- end with a simple invitation, not a hard sell
+- sign off as "- connectd" (lowercase)
+
+bad examples:
+- "I noticed you're both passionate about..." (too formal)
+- "You two would be PERFECT for each other!" (too salesy)
+- "As a fellow privacy enthusiast..." (cringe)
+
+good examples:
+- "hey, saw you're building X. there's someone else working on similar stuff in Y who might be interesting to know."
+- "you might want to check out Z's work on federated systems - similar approach to what you're doing with A."
+"""
+
+    user_prompt = f"""write an intro message to {to_name} about {other_name}.
+
+RECIPIENT ({to_name}):
+{to_profile}
+
+INTRODUCING ({other_name}):
+{other_profile}
+
+WHY THEY MATCH (overlap score {match_data.get('overlap_score', 0)}):
+{', '.join(overlap_reasons[:5])}
+
+write a short intro message. remember: lowercase, genuine, not salesy."""
+
+    try:
+        response = requests.post(
+            GROQ_API_URL,
+            headers={
+                'Authorization': f'Bearer {GROQ_API_KEY}',
+                'Content-Type': 'application/json',
+            },
+            json={
+                'model': MODEL,
+                'messages': [
+                    {'role': 'system', 'content': system_prompt},
+                    {'role': 'user', 'content': user_prompt},
+                ],
+                'temperature': 0.7,
+                'max_tokens': 300,
+            },
+            timeout=30,
+        )
+
+        if response.status_code != 200:
+            return None, f"groq api error: {response.status_code} - {response.text}"
+
+        data = response.json()
+        draft = data['choices'][0]['message']['content'].strip()
+
+        # determine contact method for recipient
+        contact_method, contact_info = determine_contact_method(to_person)
+
+        return {
+            'draft': draft,
+            'model': MODEL,
+            'to': to_name,
+            'about': other_name,
+            'overlap_score': match_data.get('overlap_score', 0),
+            'contact_method': contact_method,
+            'contact_info': contact_info,
+            'generated_at': datetime.now().isoformat(),
+        }, None
+
+    except Exception as e:
+        return None, f"groq error: {str(e)}"
+
+
+def draft_intro_batch(matches, dry_run=False):
+    """
+    draft intros for multiple matches
+    returns list of (match, intro_result, error) tuples
+    """
+    results = []
+
+    for match in matches:
+        # draft for both directions
+        intro_a, err_a = draft_intro_with_llm(match, recipient='a', dry_run=dry_run)
+        intro_b, err_b = draft_intro_with_llm(match, recipient='b', dry_run=dry_run)
+
+        results.append({
+            'match': match,
+            'intro_to_a': intro_a,
+            'intro_to_b': intro_b,
+            'errors': [err_a, err_b],
+        })
+
+    return results
+
+
+def test_groq_connection():
+    """test that groq api is working"""
+    if not GROQ_API_KEY:
+        return False, "GROQ_API_KEY not set"
+
+    try:
+        response = requests.post(
+            GROQ_API_URL,
+            headers={
+                'Authorization': f'Bearer {GROQ_API_KEY}',
+                'Content-Type': 'application/json',
+            },
+            json={
+                'model': MODEL,
+                'messages': [{'role': 'user', 'content': 'say "ok" and nothing else'}],
+                'max_tokens': 10,
+            },
+            timeout=10,
+        )
+
+        if response.status_code == 200:
+            return True, "groq api working"
+        else:
+            return False, f"groq api error: {response.status_code}"
+
+    except Exception as e:
+        return False, f"groq connection error: {str(e)}"
--- a/connectd/introd/lost_intro.py
+++ b/connectd/introd/lost_intro.py
@ -0,0 +1,250 @@
+"""
+introd/lost_intro.py - intro drafting for lost builders
+
+different tone than builder-to-builder intros.
+these people need encouragement, not networking.
+
+the goal isn't to recruit them. it's to show them the door exists.
+they take it or they don't. but they'll know someone saw them.
+"""
+
+import os
+import json
+import requests
+from datetime import datetime
+
+GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
+GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
+MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
+
+
+LOST_INTRO_TEMPLATE = """hey {name},
+
+i'm connectd. i'm a daemon that finds people who might need a nudge.
+
+i noticed you're interested in {interests}. you ask good questions. you clearly get it.
+
+but maybe you haven't built anything yet. or you started and stopped. or you don't think you can.
+
+that's okay. most people don't.
+
+but some people do. here's one: {builder_name} ({builder_url})
+
+{builder_description}
+
+they started where you are. look at what they built.
+
+you're not behind. you're just not started yet.
+
+no pressure. just wanted you to know someone noticed.
+
+- connectd"""
+
+
+SYSTEM_PROMPT = """you are connectd, a daemon that finds isolated builders with aligned values and connects them.
+
+right now you're reaching out to someone who has POTENTIAL but hasn't found it yet. maybe they gave up, maybe they're stuck, maybe they don't believe they can do it.
+
+your job is to:
+1. acknowledge where they are without being condescending
+2. point them to an active builder who could inspire them
+3. be genuine, not salesy or motivational-speaker-y
+4. keep it short - these people are tired, don't overwhelm them
+5. use lowercase, be human, no corporate bullshit
+6. make it clear there's no pressure, no follow-up spam
+
+you're not recruiting. you're not selling. you're just showing them a door.
+
+the template structure:
+- acknowledge them (you noticed something about them)
+- normalize where they are (most people don't build things)
+- show them someone who did (the builder)
+- brief encouragement (you're not behind, just not started)
+- sign off with no pressure
+
+do NOT:
+- be preachy or lecture them
+- use motivational cliches ("you got this!", "believe in yourself!")
+- make promises about outcomes
+- be too long - they don't have energy for long messages
+- make them feel bad about where they are"""
+
+
+def draft_lost_intro(lost_user, inspiring_builder, config=None):
+    """
+    draft an intro for a lost builder, pairing them with an inspiring active builder.
+
+    lost_user: the person who needs a nudge
+    inspiring_builder: an active builder with similar interests who could inspire them
+    """
+    config = config or {}
+
+    # gather info about lost user
+    lost_name = lost_user.get('name') or lost_user.get('username', 'there')
+    lost_signals = lost_user.get('lost_signals', [])
+    lost_interests = extract_interests(lost_user)
+
+    # gather info about inspiring builder
+    builder_name = inspiring_builder.get('name') or inspiring_builder.get('username')
+    builder_url = inspiring_builder.get('url') or f"https://github.com/{inspiring_builder.get('username')}"
+    builder_description = create_builder_description(inspiring_builder)
+
+    # use LLM to personalize
+    if GROQ_API_KEY and config.get('use_llm', True):
+        return draft_with_llm(lost_user, inspiring_builder, lost_interests, builder_description)
+
+    # fallback to template
+    return LOST_INTRO_TEMPLATE.format(
+        name=lost_name,
+        interests=', '.join(lost_interests[:3]) if lost_interests else 'building things',
+        builder_name=builder_name,
+        builder_url=builder_url,
+        builder_description=builder_description,
+    ), None
+
+
+def extract_interests(user):
+    """extract interests from user profile"""
+    interests = []
+
+    # from topics/tags
+    extra = user.get('extra', {})
+    if isinstance(extra, str):
+        try:
+            extra = json.loads(extra)
+        except:
+            extra = {}
+
+    topics = extra.get('topics', []) or extra.get('aligned_topics', [])
+    interests.extend(topics[:5])
+
+    # from subreddits
+    subreddits = user.get('subreddits', [])
+    for sub in subreddits[:3]:
+        if sub.lower() not in ['learnprogramming', 'findapath', 'getdisciplined']:
+            interests.append(sub)
+
+    # from bio keywords
+    bio = user.get('bio') or ''
+    bio_lower = bio.lower()
+
+    interest_keywords = [
+        'rust', 'python', 'javascript', 'go', 'linux', 'self-hosting', 'homelab',
+        'privacy', 'security', 'open source', 'foss', 'decentralized', 'ai', 'ml',
+        'web dev', 'backend', 'frontend', 'devops', 'data', 'automation',
+    ]
+
+    for kw in interest_keywords:
+        if kw in bio_lower and kw not in interests:
+            interests.append(kw)
+
+    return interests[:5] if interests else ['technology', 'building things']
+
+
+def create_builder_description(builder):
+    """create a brief description of what the builder has done"""
+    extra = builder.get('extra', {})
+    if isinstance(extra, str):
+        try:
+            extra = json.loads(extra)
+        except:
+            extra = {}
+
+    parts = []
+
+    # what they build
+    repos = extra.get('top_repos', [])[:3]
+    if repos:
+        repo_names = [r.get('name') for r in repos if r.get('name')]
+        if repo_names:
+            parts.append(f"they've built things like {', '.join(repo_names[:2])}")
+
+    # their focus
+    topics = extra.get('aligned_topics', []) or extra.get('topics', [])
+    if topics:
+        parts.append(f"they work on {', '.join(topics[:3])}")
+
+    # their vibe
+    signals = builder.get('signals', [])
+    if 'self-hosted' in str(signals).lower():
+        parts.append("they're into self-hosting and owning their own infrastructure")
+    if 'privacy' in str(signals).lower():
+        parts.append("they care about privacy")
+    if 'community' in str(signals).lower():
+        parts.append("they're community-focused")
+
+    if parts:
+        return '. '.join(parts) + '.'
+    else:
+        return "they're building cool stuff in the open."
+
+
+def draft_with_llm(lost_user, inspiring_builder, interests, builder_description):
+    """use LLM to draft personalized intro"""
+
+    lost_name = lost_user.get('name') or lost_user.get('username', 'there')
+    lost_signals = lost_user.get('lost_signals', [])
+    lost_bio = lost_user.get('bio', '')
+
+    builder_name = inspiring_builder.get('name') or inspiring_builder.get('username')
+    builder_url = inspiring_builder.get('url') or f"https://github.com/{inspiring_builder.get('username')}"
+
+    user_prompt = f"""draft an intro for this lost builder:
+
+LOST USER:
+- name: {lost_name}
+- interests: {', '.join(interests)}
+- signals detected: {', '.join(lost_signals[:5]) if lost_signals else 'general stuck/aspiring patterns'}
+- bio: {lost_bio[:200] if lost_bio else 'none'}
+
+INSPIRING BUILDER TO SHOW THEM:
+- name: {builder_name}
+- url: {builder_url}
+- what they do: {builder_description}
+
+write a short, genuine message. no fluff. no motivational cliches. just human.
+keep it under 150 words.
+use lowercase.
+end with "- connectd"
+"""
+
+    try:
+        resp = requests.post(
+            GROQ_API_URL,
+            headers={
+                'Authorization': f'Bearer {GROQ_API_KEY}',
+                'Content-Type': 'application/json',
+            },
+            json={
+                'model': MODEL,
+                'messages': [
+                    {'role': 'system', 'content': SYSTEM_PROMPT},
+                    {'role': 'user', 'content': user_prompt},
+                ],
+                'temperature': 0.7,
+                'max_tokens': 500,
+            },
+            timeout=30,
+        )
+
+        if resp.status_code == 200:
+            content = resp.json()['choices'][0]['message']['content']
+            return content.strip(), None
+        else:
+            return None, f"llm error: {resp.status_code}"
+
+    except Exception as e:
+        return None, str(e)
+
+
+def get_lost_intro_config():
+    """get configuration for lost builder outreach"""
+    return {
+        'enabled': True,
+        'max_per_day': 5,  # lower volume, higher care
+        'require_review': True,  # always manual approval
+        'cooldown_days': 90,  # don't spam struggling people
+        'min_lost_score': 40,
+        'min_values_score': 20,
+        'use_llm': True,
+    }
--- a/connectd/introd/review.py
+++ b/connectd/introd/review.py
@ -0,0 +1,126 @@
+"""
+introd/review.py - human approval queue before sending
+"""
+
+import json
+from datetime import datetime
+
+
+def get_pending_intros(db, limit=50):
+    """
+    get all intros pending human review
+
+    returns list of intro dicts with full context
+    """
+    rows = db.get_pending_intros(limit=limit)
+
+    intros = []
+    for row in rows:
+        # get associated match and humans
+        match_id = row.get('match_id')
+        recipient_id = row.get('recipient_human_id')
+
+        recipient = db.get_human_by_id(recipient_id) if recipient_id else None
+
+        intros.append({
+            'id': row['id'],
+            'match_id': match_id,
+            'recipient': recipient,
+            'channel': row.get('channel'),
+            'draft': row.get('draft'),
+            'status': row.get('status'),
+        })
+
+    return intros
+
+
+def approve_intro(db, intro_id, approved_by='human'):
+    """
+    approve an intro for sending
+
+    intro_id: database id of the intro
+    approved_by: who approved it (for audit trail)
+    """
+    db.approve_intro(intro_id, approved_by)
+    print(f"introd: approved intro {intro_id} by {approved_by}")
+
+
+def reject_intro(db, intro_id, reason=None):
+    """
+    reject an intro (won't be sent)
+    """
+    c = db.conn.cursor()
+    c.execute('''UPDATE intros SET status = 'rejected',
+                 approved_at = ?, approved_by = ? WHERE id = ?''',
+              (datetime.now().isoformat(), f"rejected: {reason}" if reason else "rejected", intro_id))
+    db.conn.commit()
+    print(f"introd: rejected intro {intro_id}")
+
+
+def review_intro_interactive(db, intro):
+    """
+    interactive review of a single intro
+
+    returns: 'approve', 'reject', 'edit', or 'skip'
+    """
+    print("\n" + "=" * 60)
+    print("INTRO FOR REVIEW")
+    print("=" * 60)
+
+    recipient = intro.get('recipient', {})
+    print(f"\nRecipient: {recipient.get('name') or recipient.get('username')}")
+    print(f"Platform: {recipient.get('platform')}")
+    print(f"Channel: {intro.get('channel')}")
+    print(f"\n--- DRAFT ---")
+    print(intro.get('draft'))
+    print("--- END ---\n")
+
+    while True:
+        choice = input("[a]pprove / [r]eject / [s]kip / [e]dit? ").strip().lower()
+
+        if choice in ['a', 'approve']:
+            approve_intro(db, intro['id'])
+            return 'approve'
+        elif choice in ['r', 'reject']:
+            reason = input("reason (optional): ").strip()
+            reject_intro(db, intro['id'], reason)
+            return 'reject'
+        elif choice in ['s', 'skip']:
+            return 'skip'
+        elif choice in ['e', 'edit']:
+            print("editing not yet implemented - approve or reject")
+        else:
+            print("invalid choice")
+
+
+def review_all_pending(db):
+    """
+    interactive review of all pending intros
+    """
+    intros = get_pending_intros(db)
+
+    if not intros:
+        print("no pending intros to review")
+        return
+
+    print(f"\n{len(intros)} intros pending review\n")
+
+    approved = 0
+    rejected = 0
+    skipped = 0
+
+    for intro in intros:
+        result = review_intro_interactive(db, intro)
+
+        if result == 'approve':
+            approved += 1
+        elif result == 'reject':
+            rejected += 1
+        else:
+            skipped += 1
+
+        cont = input("\ncontinue reviewing? [y/n] ").strip().lower()
+        if cont != 'y':
+            break
+
+    print(f"\nreview complete: {approved} approved, {rejected} rejected, {skipped} skipped")
--- a/connectd/introd/send.py
+++ b/connectd/introd/send.py
@ -0,0 +1,216 @@
+"""
+introd/send.py - actually deliver intros via appropriate channel
+"""
+
+import smtplib
+import requests
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from datetime import datetime
+import os
+
+# email config (from env)
+SMTP_HOST = os.environ.get('SMTP_HOST', '')
+SMTP_PORT = int(os.environ.get('SMTP_PORT', '465'))
+SMTP_USER = os.environ.get('SMTP_USER', '')
+SMTP_PASS = os.environ.get('SMTP_PASS', '')
+FROM_EMAIL = os.environ.get('FROM_EMAIL', '')
+
+
+def send_email(to_email, subject, body):
+    """send email via SMTP"""
+    msg = MIMEMultipart()
+    msg['From'] = FROM_EMAIL
+    msg['To'] = to_email
+    msg['Subject'] = subject
+
+    msg.attach(MIMEText(body, 'plain'))
+
+    try:
+        with smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) as server:
+            server.login(SMTP_USER, SMTP_PASS)
+            server.send_message(msg)
+        return True, None
+    except Exception as e:
+        return False, str(e)
+
+
+def send_github_issue(repo_url, title, body):
+    """
+    create a github issue (requires GITHUB_TOKEN)
+    note: only works if you have write access to the repo
+    typically won't work for random users - fallback to manual
+    """
+    # extract owner/repo from url
+    # https://github.com/owner/repo -> owner/repo
+    parts = repo_url.rstrip('/').split('/')
+    if len(parts) < 2:
+        return False, "invalid github url"
+
+    owner = parts[-2]
+    repo = parts[-1]
+
+    token = os.environ.get('GITHUB_TOKEN')
+    if not token:
+        return False, "no github token"
+
+    # would create issue via API - but this is invasive
+    # better to just output the info for manual action
+    return False, "github issues not automated - use manual outreach"
+
+
+def send_mastodon_dm(instance, username, message):
+    """
+    send mastodon DM (requires account credentials)
+    not implemented - requires oauth setup
+    """
+    return False, "mastodon DMs not automated - use manual outreach"
+
+
+def send_reddit_message(username, subject, body):
+    """
+    send reddit message (requires account credentials)
+    not implemented - requires oauth setup
+    """
+    return False, "reddit messages not automated - use manual outreach"
+
+
+def send_intro(db, intro_id):
+    """
+    send an approved intro
+
+    returns: (success, error_message)
+    """
+    # get intro from db
+    c = db.conn.cursor()
+    c.execute('SELECT * FROM intros WHERE id = ?', (intro_id,))
+    row = c.fetchone()
+
+    if not row:
+        return False, "intro not found"
+
+    intro = dict(row)
+
+    if intro['status'] != 'approved':
+        return False, f"intro not approved (status: {intro['status']})"
+
+    channel = intro.get('channel')
+    draft = intro.get('draft')
+
+    # get recipient info
+    recipient = db.get_human_by_id(intro['recipient_human_id'])
+    if not recipient:
+        return False, "recipient not found"
+
+    success = False
+    error = None
+
+    if channel == 'email':
+        # get email from contact
+        import json
+        contact = recipient.get('contact', {})
+        if isinstance(contact, str):
+            contact = json.loads(contact)
+
+        email = contact.get('email')
+        if email:
+            success, error = send_email(
+                email,
+                "connection: aligned builder intro",
+                draft
+            )
+        else:
+            error = "no email address"
+
+    elif channel == 'github':
+        success, error = send_github_issue(
+            recipient.get('url'),
+            "connection: aligned builder intro",
+            draft
+        )
+
+    elif channel == 'mastodon':
+        success, error = send_mastodon_dm(
+            recipient.get('instance'),
+            recipient.get('username'),
+            draft
+        )
+
+    elif channel == 'reddit':
+        success, error = send_reddit_message(
+            recipient.get('username'),
+            "connection: aligned builder intro",
+            draft
+        )
+
+    else:
+        error = f"unknown channel: {channel}"
+
+    # update status
+    if success:
+        db.mark_intro_sent(intro_id)
+        print(f"introd: sent intro {intro_id} via {channel}")
+    else:
+        # mark as needs manual sending
+        c.execute('''UPDATE intros SET status = 'manual_needed',
+                     approved_at = ? WHERE id = ?''',
+                  (datetime.now().isoformat(), intro_id))
+        db.conn.commit()
+        print(f"introd: intro {intro_id} needs manual send ({error})")
+
+    return success, error
+
+
+def send_all_approved(db):
+    """
+    send all approved intros
+    """
+    c = db.conn.cursor()
+    c.execute('SELECT id FROM intros WHERE status = "approved"')
+    rows = c.fetchall()
+
+    if not rows:
+        print("no approved intros to send")
+        return
+
+    print(f"sending {len(rows)} approved intros...")
+
+    sent = 0
+    failed = 0
+
+    for row in rows:
+        success, error = send_intro(db, row['id'])
+        if success:
+            sent += 1
+        else:
+            failed += 1
+
+    print(f"sent: {sent}, failed/manual: {failed}")
+
+
+def export_manual_intros(db, output_file='manual_intros.txt'):
+    """
+    export intros that need manual sending to a text file
+    """
+    c = db.conn.cursor()
+    c.execute('''SELECT i.*, h.username, h.platform, h.url
+                 FROM intros i
+                 JOIN humans h ON i.recipient_human_id = h.id
+                 WHERE i.status IN ('approved', 'manual_needed')''')
+    rows = c.fetchall()
+
+    if not rows:
+        print("no intros to export")
+        return
+
+    with open(output_file, 'w') as f:
+        for row in rows:
+            f.write("=" * 60 + "\n")
+            f.write(f"TO: {row['username']} ({row['platform']})\n")
+            f.write(f"URL: {row['url']}\n")
+            f.write(f"CHANNEL: {row['channel']}\n")
+            f.write("-" * 60 + "\n")
+            f.write(row['draft'] + "\n")
+            f.write("\n")
+
+    print(f"exported {len(rows)} intros to {output_file}")
--- a/connectd/logo.png
+++ b/connectd/logo.png
--- a/connectd/matchd/init.py
+++ b/connectd/matchd/init.py
@ -0,0 +1,10 @@
+"""
+matchd - pairing module
+generates fingerprints, finds overlaps, ranks matches
+"""
+
+from .fingerprint import generate_fingerprint
+from .overlap import find_overlap
+from .rank import rank_matches, find_all_matches
+
+__all__ = ['generate_fingerprint', 'find_overlap', 'rank_matches', 'find_all_matches']
--- a/connectd/matchd/fingerprint.py
+++ b/connectd/matchd/fingerprint.py
@ -0,0 +1,210 @@
+"""
+matchd/fingerprint.py - generate values profiles for humans
+"""
+
+import json
+from collections import defaultdict
+
+# values dimensions we track
+VALUES_DIMENSIONS = [
+    'privacy',          # surveillance concern, degoogle, self-hosted
+    'decentralization', # p2p, fediverse, local-first
+    'cooperation',      # coops, mutual aid, community
+    'queer_friendly',   # lgbtq+, pronouns
+    'environmental',    # solarpunk, degrowth, sustainability
+    'anticapitalist',   # post-capitalism, worker ownership
+    'builder',          # creates vs consumes
+    'pnw_oriented',     # pacific northwest connection
+]
+
+# skill categories
+SKILL_CATEGORIES = [
+    'backend',      # python, go, rust, databases
+    'frontend',     # js, react, css
+    'devops',       # docker, k8s, linux admin
+    'hardware',     # electronics, embedded, iot
+    'design',       # ui/ux, graphics
+    'community',    # organizing, facilitation
+    'writing',      # documentation, content
+]
+
+# signal to dimension mapping
+SIGNAL_TO_DIMENSION = {
+    'privacy': 'privacy',
+    'selfhosted': 'privacy',
+    'degoogle': 'privacy',
+    'decentralized': 'decentralization',
+    'local_first': 'decentralization',
+    'p2p': 'decentralization',
+    'federated_chat': 'decentralization',
+    'foss': 'decentralization',
+    'cooperative': 'cooperation',
+    'community': 'cooperation',
+    'mutual_aid': 'cooperation',
+    'intentional_community': 'cooperation',
+    'queer': 'queer_friendly',
+    'pronouns': 'queer_friendly',
+    'blm': 'queer_friendly',
+    'acab': 'queer_friendly',
+    'solarpunk': 'environmental',
+    'anticapitalist': 'anticapitalist',
+    'pnw': 'pnw_oriented',
+    'pnw_state': 'pnw_oriented',
+    'remote': 'pnw_oriented',
+    'home_automation': 'builder',
+    'modern_lang': 'builder',
+    'unix': 'builder',
+    'containers': 'builder',
+}
+
+# language to skill mapping
+LANGUAGE_TO_SKILL = {
+    'python': 'backend',
+    'go': 'backend',
+    'rust': 'backend',
+    'java': 'backend',
+    'ruby': 'backend',
+    'php': 'backend',
+    'javascript': 'frontend',
+    'typescript': 'frontend',
+    'html': 'frontend',
+    'css': 'frontend',
+    'vue': 'frontend',
+    'shell': 'devops',
+    'dockerfile': 'devops',
+    'nix': 'devops',
+    'hcl': 'devops',
+    'c': 'hardware',
+    'c++': 'hardware',
+    'arduino': 'hardware',
+    'verilog': 'hardware',
+}
+
+
+def generate_fingerprint(human_data):
+    """
+    generate a values fingerprint for a human
+
+    input: human dict from database (has signals, languages, etc)
+    output: fingerprint dict with values_vector, skills, interests
+    """
+    # parse stored json fields
+    signals = human_data.get('signals', [])
+    if isinstance(signals, str):
+        signals = json.loads(signals)
+
+    extra = human_data.get('extra', {})
+    if isinstance(extra, str):
+        extra = json.loads(extra)
+
+    languages = extra.get('languages', {})
+    topics = extra.get('topics', [])
+
+    # build values vector
+    values_vector = defaultdict(float)
+
+    # from signals
+    for signal in signals:
+        dimension = SIGNAL_TO_DIMENSION.get(signal)
+        if dimension:
+            values_vector[dimension] += 1.0
+
+    # normalize values vector (0-1 scale)
+    max_val = max(values_vector.values()) if values_vector else 1
+    values_vector = {k: min(v / max_val, 1.0) for k, v in values_vector.items()}
+
+    # fill in missing dimensions with 0
+    for dim in VALUES_DIMENSIONS:
+        if dim not in values_vector:
+            values_vector[dim] = 0.0
+
+    # determine skills from languages
+    skills = defaultdict(float)
+    total_repos = sum(languages.values()) if languages else 1
+
+    for lang, count in languages.items():
+        skill = LANGUAGE_TO_SKILL.get(lang.lower())
+        if skill:
+            skills[skill] += count / total_repos
+
+    # normalize skills
+    if skills:
+        max_skill = max(skills.values())
+        skills = {k: min(v / max_skill, 1.0) for k, v in skills.items()}
+
+    # interests from topics and signals
+    interests = list(set(topics + signals))
+
+    # location preference
+    location_pref = None
+    if 'pnw' in signals or 'pnw_state' in signals:
+        location_pref = 'pnw'
+    elif 'remote' in signals:
+        location_pref = 'remote'
+    elif human_data.get('location'):
+        loc = human_data['location'].lower()
+        if any(x in loc for x in ['seattle', 'portland', 'washington', 'oregon', 'pnw', 'cascadia']):
+            location_pref = 'pnw'
+
+    # availability (based on hireable flag if present)
+    availability = None
+    if extra.get('hireable'):
+        availability = 'open'
+
+    return {
+        'human_id': human_data.get('id'),
+        'values_vector': dict(values_vector),
+        'skills': dict(skills),
+        'interests': interests,
+        'location_pref': location_pref,
+        'availability': availability,
+    }
+
+
+def fingerprint_similarity(fp_a, fp_b):
+    """
+    calculate similarity between two fingerprints
+    returns 0-1 score
+    """
+    # values similarity (cosine-ish)
+    va = fp_a.get('values_vector', {})
+    vb = fp_b.get('values_vector', {})
+
+    all_dims = set(va.keys()) | set(vb.keys())
+    if not all_dims:
+        return 0.0
+
+    dot_product = sum(va.get(d, 0) * vb.get(d, 0) for d in all_dims)
+    mag_a = sum(v**2 for v in va.values()) ** 0.5
+    mag_b = sum(v**2 for v in vb.values()) ** 0.5
+
+    if mag_a == 0 or mag_b == 0:
+        values_sim = 0.0
+    else:
+        values_sim = dot_product / (mag_a * mag_b)
+
+    # interest overlap (jaccard)
+    ia = set(fp_a.get('interests', []))
+    ib = set(fp_b.get('interests', []))
+
+    if ia or ib:
+        interest_sim = len(ia & ib) / len(ia | ib)
+    else:
+        interest_sim = 0.0
+
+    # location compatibility
+    loc_a = fp_a.get('location_pref')
+    loc_b = fp_b.get('location_pref')
+
+    loc_sim = 0.0
+    if loc_a == loc_b and loc_a is not None:
+        loc_sim = 1.0
+    elif loc_a == 'remote' or loc_b == 'remote':
+        loc_sim = 0.5
+    elif loc_a == 'pnw' or loc_b == 'pnw':
+        loc_sim = 0.3
+
+    # weighted combination
+    similarity = (values_sim * 0.5) + (interest_sim * 0.3) + (loc_sim * 0.2)
+
+    return similarity
--- a/connectd/matchd/lost.py
+++ b/connectd/matchd/lost.py
@ -0,0 +1,199 @@
+"""
+matchd/lost.py - lost builder matching
+
+lost builders don't get matched to each other (both need energy).
+they get matched to ACTIVE builders who can inspire them.
+
+the goal: show them someone like them who made it.
+"""
+
+import json
+from .overlap import find_overlap, is_same_person
+
+
+def find_inspiring_builder(lost_user, active_builders, db=None):
+    """
+    find an active builder who could inspire a lost builder.
+
+    criteria:
+    - shared interests (they need to relate to this person)
+    - active builder has shipped real work (proof it's possible)
+    - similar background signals if possible
+    - NOT the same person across platforms
+    """
+    if not active_builders:
+        return None, "no active builders available"
+
+    # parse lost user data
+    lost_signals = lost_user.get('signals', [])
+    if isinstance(lost_signals, str):
+        lost_signals = json.loads(lost_signals) if lost_signals else []
+
+    lost_extra = lost_user.get('extra', {})
+    if isinstance(lost_extra, str):
+        lost_extra = json.loads(lost_extra) if lost_extra else {}
+
+    # lost user interests
+    lost_interests = set()
+    lost_interests.update(lost_signals)
+    lost_interests.update(lost_extra.get('topics', []))
+    lost_interests.update(lost_extra.get('aligned_topics', []))
+
+    # also include subreddits if from reddit (shows interests)
+    subreddits = lost_user.get('subreddits', [])
+    if isinstance(subreddits, str):
+        subreddits = json.loads(subreddits) if subreddits else []
+    lost_interests.update(subreddits)
+
+    # score each active builder
+    candidates = []
+
+    for builder in active_builders:
+        # skip if same person (cross-platform)
+        if is_same_person(lost_user, builder):
+            continue
+
+        # get builder signals
+        builder_signals = builder.get('signals', [])
+        if isinstance(builder_signals, str):
+            builder_signals = json.loads(builder_signals) if builder_signals else []
+
+        builder_extra = builder.get('extra', {})
+        if isinstance(builder_extra, str):
+            builder_extra = json.loads(builder_extra) if builder_extra else {}
+
+        # builder interests
+        builder_interests = set()
+        builder_interests.update(builder_signals)
+        builder_interests.update(builder_extra.get('topics', []))
+        builder_interests.update(builder_extra.get('aligned_topics', []))
+
+        # calculate match score
+        shared_interests = lost_interests & builder_interests
+        match_score = len(shared_interests) * 10
+
+        # bonus for high-value shared signals
+        high_value_signals = ['privacy', 'selfhosted', 'home_automation', 'foss',
+                              'solarpunk', 'cooperative', 'decentralized', 'queer']
+        for signal in shared_interests:
+            if signal in high_value_signals:
+                match_score += 15
+
+        # bonus if builder has shipped real work (proof it's possible)
+        repos = builder_extra.get('top_repos', [])
+        if len(repos) >= 5:
+            match_score += 20  # they've built things
+        elif len(repos) >= 2:
+            match_score += 10
+
+        # bonus for high stars (visible success)
+        total_stars = sum(r.get('stars', 0) for r in repos) if repos else 0
+        if total_stars >= 100:
+            match_score += 15
+        elif total_stars >= 20:
+            match_score += 5
+
+        # bonus for similar location (relatable)
+        lost_loc = (lost_user.get('location') or '').lower()
+        builder_loc = (builder.get('location') or '').lower()
+        if lost_loc and builder_loc:
+            pnw_keywords = ['seattle', 'portland', 'washington', 'oregon', 'pnw']
+            if any(k in lost_loc for k in pnw_keywords) and any(k in builder_loc for k in pnw_keywords):
+                match_score += 10
+
+        # minimum threshold - need SOMETHING in common
+        if match_score < 10:
+            continue
+
+        candidates.append({
+            'builder': builder,
+            'match_score': match_score,
+            'shared_interests': list(shared_interests)[:5],
+            'repos_count': len(repos),
+            'total_stars': total_stars,
+        })
+
+    if not candidates:
+        return None, "no matching active builders found"
+
+    # sort by match score, return best
+    candidates.sort(key=lambda x: x['match_score'], reverse=True)
+    best = candidates[0]
+
+    return best, None
+
+
+def find_matches_for_lost_builders(db, min_lost_score=40, min_values_score=20, limit=10):
+    """
+    find inspiring builder matches for all lost builders ready for outreach.
+
+    returns list of (lost_user, inspiring_builder, match_data)
+    """
+    # get lost builders ready for outreach
+    lost_builders = db.get_lost_builders_for_outreach(
+        min_lost_score=min_lost_score,
+        min_values_score=min_values_score,
+        limit=limit
+    )
+
+    if not lost_builders:
+        return [], "no lost builders ready for outreach"
+
+    # get active builders who can inspire
+    active_builders = db.get_active_builders(min_score=50, limit=200)
+
+    if not active_builders:
+        return [], "no active builders available"
+
+    matches = []
+
+    for lost_user in lost_builders:
+        best_match, error = find_inspiring_builder(lost_user, active_builders, db)
+
+        if best_match:
+            matches.append({
+                'lost_user': lost_user,
+                'inspiring_builder': best_match['builder'],
+                'match_score': best_match['match_score'],
+                'shared_interests': best_match['shared_interests'],
+                'builder_repos': best_match['repos_count'],
+                'builder_stars': best_match['total_stars'],
+            })
+
+    return matches, None
+
+
+def get_lost_match_summary(match_data):
+    """
+    get a human-readable summary of a lost builder match.
+    """
+    lost = match_data['lost_user']
+    builder = match_data['inspiring_builder']
+
+    lost_name = lost.get('name') or lost.get('username', 'someone')
+    builder_name = builder.get('name') or builder.get('username', 'a builder')
+
+    lost_signals = match_data.get('lost_signals', [])
+    if isinstance(lost_signals, str):
+        lost_signals = json.loads(lost_signals) if lost_signals else []
+
+    shared = match_data.get('shared_interests', [])
+
+    summary = f"""
+lost builder: {lost_name} ({lost.get('platform')})
+  lost score: {lost.get('lost_potential_score', 0)}
+  values score: {lost.get('score', 0)}
+  url: {lost.get('url')}
+
+inspiring builder: {builder_name} ({builder.get('platform')})
+  score: {builder.get('score', 0)}
+  repos: {match_data.get('builder_repos', 0)}
+  stars: {match_data.get('builder_stars', 0)}
+  url: {builder.get('url')}
+
+match score: {match_data.get('match_score', 0)}
+shared interests: {', '.join(shared) if shared else 'values alignment'}
+
+this lost builder needs to see that someone like them made it.
+"""
+    return summary.strip()
--- a/connectd/matchd/overlap.py
+++ b/connectd/matchd/overlap.py
@ -0,0 +1,150 @@
+"""
+matchd/overlap.py - find pairs with alignment
+"""
+
+import json
+from .fingerprint import fingerprint_similarity
+
+
+def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
+    """
+    analyze overlap between two humans
+    returns overlap details: score, shared values, complementary skills
+    """
+    # parse stored json if needed
+    signals_a = human_a.get('signals', [])
+    if isinstance(signals_a, str):
+        signals_a = json.loads(signals_a)
+
+    signals_b = human_b.get('signals', [])
+    if isinstance(signals_b, str):
+        signals_b = json.loads(signals_b)
+
+    extra_a = human_a.get('extra', {})
+    if isinstance(extra_a, str):
+        extra_a = json.loads(extra_a)
+
+    extra_b = human_b.get('extra', {})
+    if isinstance(extra_b, str):
+        extra_b = json.loads(extra_b)
+
+    # shared signals
+    shared_signals = list(set(signals_a) & set(signals_b))
+
+    # shared topics
+    topics_a = set(extra_a.get('topics', []))
+    topics_b = set(extra_b.get('topics', []))
+    shared_topics = list(topics_a & topics_b)
+
+    # complementary skills (what one has that the other doesn't)
+    langs_a = set(extra_a.get('languages', {}).keys())
+    langs_b = set(extra_b.get('languages', {}).keys())
+    complementary_langs = list((langs_a - langs_b) | (langs_b - langs_a))
+
+    # geographic compatibility
+    loc_a = human_a.get('location', '').lower() if human_a.get('location') else ''
+    loc_b = human_b.get('location', '').lower() if human_b.get('location') else ''
+
+    pnw_keywords = ['seattle', 'portland', 'washington', 'oregon', 'pnw', 'cascadia', 'pacific northwest']
+    remote_keywords = ['remote', 'anywhere', 'distributed']
+
+    a_pnw = any(k in loc_a for k in pnw_keywords) or 'pnw' in signals_a
+    b_pnw = any(k in loc_b for k in pnw_keywords) or 'pnw' in signals_b
+    a_remote = any(k in loc_a for k in remote_keywords) or 'remote' in signals_a
+    b_remote = any(k in loc_b for k in remote_keywords) or 'remote' in signals_b
+
+    geographic_match = False
+    geo_reason = None
+
+    if a_pnw and b_pnw:
+        geographic_match = True
+        geo_reason = 'both in pnw'
+    elif (a_pnw or b_pnw) and (a_remote or b_remote):
+        geographic_match = True
+        geo_reason = 'pnw + remote compatible'
+    elif a_remote and b_remote:
+        geographic_match = True
+        geo_reason = 'both remote-friendly'
+
+    # calculate overlap score
+    base_score = 0
+
+    # shared values (most important)
+    base_score += len(shared_signals) * 10
+
+    # shared interests
+    base_score += len(shared_topics) * 5
+
+    # complementary skills bonus (they can help each other)
+    if complementary_langs:
+        base_score += min(len(complementary_langs), 5) * 3
+
+    # geographic bonus
+    if geographic_match:
+        base_score += 20
+
+    # fingerprint similarity if available
+    fp_score = 0
+    if fp_a and fp_b:
+        fp_score = fingerprint_similarity(fp_a, fp_b) * 50
+
+    total_score = base_score + fp_score
+
+    # build reasons
+    overlap_reasons = []
+    if shared_signals:
+        overlap_reasons.append(f"shared values: {', '.join(shared_signals[:5])}")
+    if shared_topics:
+        overlap_reasons.append(f"shared interests: {', '.join(shared_topics[:5])}")
+    if geo_reason:
+        overlap_reasons.append(geo_reason)
+    if complementary_langs:
+        overlap_reasons.append(f"complementary skills: {', '.join(complementary_langs[:5])}")
+
+    return {
+        'overlap_score': total_score,
+        'shared_signals': shared_signals,
+        'shared_topics': shared_topics,
+        'complementary_skills': complementary_langs,
+        'geographic_match': geographic_match,
+        'geo_reason': geo_reason,
+        'overlap_reasons': overlap_reasons,
+        'fingerprint_similarity': fp_score / 50 if fp_a and fp_b else None,
+    }
+
+
+def is_same_person(human_a, human_b):
+    """
+    check if two records might be the same person (cross-platform)
+    """
+    # same platform = definitely different records
+    if human_a['platform'] == human_b['platform']:
+        return False
+
+    # check username similarity
+    user_a = human_a.get('username', '').lower().split('@')[0]
+    user_b = human_b.get('username', '').lower().split('@')[0]
+
+    if user_a == user_b:
+        return True
+
+    # check if github username matches
+    contact_a = human_a.get('contact', {})
+    contact_b = human_b.get('contact', {})
+
+    if isinstance(contact_a, str):
+        contact_a = json.loads(contact_a)
+    if isinstance(contact_b, str):
+        contact_b = json.loads(contact_b)
+
+    # github cross-reference
+    if contact_a.get('github') and contact_a.get('github') == contact_b.get('github'):
+        return True
+    if contact_a.get('github') == user_b or contact_b.get('github') == user_a:
+        return True
+
+    # email cross-reference
+    if contact_a.get('email') and contact_a.get('email') == contact_b.get('email'):
+        return True
+
+    return False
--- a/connectd/matchd/rank.py
+++ b/connectd/matchd/rank.py
@ -0,0 +1,137 @@
+"""
+matchd/rank.py - score and rank match quality
+"""
+
+from itertools import combinations
+from .fingerprint import generate_fingerprint
+from .overlap import find_overlap, is_same_person
+from scoutd.deep import check_already_connected
+
+
+def rank_matches(matches):
+    """
+    rank a list of matches by quality
+    returns sorted list with quality scores
+    """
+    ranked = []
+
+    for match in matches:
+        # base score from overlap
+        score = match.get('overlap_score', 0)
+
+        # bonus for geographic match
+        if match.get('geographic_match'):
+            score *= 1.2
+
+        # bonus for high fingerprint similarity
+        fp_sim = match.get('fingerprint_similarity')
+        if fp_sim and fp_sim > 0.7:
+            score *= 1.3
+
+        # bonus for complementary skills
+        comp_skills = match.get('complementary_skills', [])
+        if len(comp_skills) >= 3:
+            score *= 1.1
+
+        match['quality_score'] = score
+        ranked.append(match)
+
+    # sort by quality score
+    ranked.sort(key=lambda x: x['quality_score'], reverse=True)
+
+    return ranked
+
+
+def find_all_matches(db, min_score=30, min_overlap=20):
+    """
+    find all potential matches from database
+    returns list of match dicts
+    """
+    print("matchd: finding all potential matches...")
+
+    # get all humans above threshold
+    humans = db.get_all_humans(min_score=min_score)
+    print(f"  {len(humans)} humans to match")
+
+    # generate fingerprints
+    fingerprints = {}
+    for human in humans:
+        fp = generate_fingerprint(human)
+        fingerprints[human['id']] = fp
+        db.save_fingerprint(human['id'], fp)
+
+    print(f"  generated {len(fingerprints)} fingerprints")
+
+    # find all pairs
+    matches = []
+    checked = 0
+    skipped_same = 0
+    skipped_connected = 0
+
+    for human_a, human_b in combinations(humans, 2):
+        checked += 1
+
+        # skip if likely same person
+        if is_same_person(human_a, human_b):
+            skipped_same += 1
+            continue
+
+        # skip if already connected (same org, company, co-contributors)
+        connected, reason = check_already_connected(human_a, human_b)
+        if connected:
+            skipped_connected += 1
+            continue
+
+        # calculate overlap
+        fp_a = fingerprints.get(human_a['id'])
+        fp_b = fingerprints.get(human_b['id'])
+
+        overlap = find_overlap(human_a, human_b, fp_a, fp_b)
+
+        if overlap['overlap_score'] >= min_overlap:
+            match = {
+                'human_a': human_a,
+                'human_b': human_b,
+                **overlap
+            }
+            matches.append(match)
+
+            # save to db
+            db.save_match(human_a['id'], human_b['id'], overlap)
+
+        if checked % 1000 == 0:
+            print(f"  checked {checked} pairs, {len(matches)} matches so far...")
+
+    print(f"  checked {checked} pairs")
+    print(f"  skipped {skipped_same} (same person), {skipped_connected} (already connected)")
+    print(f"  found {len(matches)} potential matches")
+
+    # rank them
+    ranked = rank_matches(matches)
+
+    return ranked
+
+
+def get_top_matches(db, limit=50):
+    """
+    get top matches from database
+    """
+    match_rows = db.get_matches(limit=limit)
+
+    matches = []
+    for row in match_rows:
+        human_a = db.get_human_by_id(row['human_a_id'])
+        human_b = db.get_human_by_id(row['human_b_id'])
+
+        if human_a and human_b:
+            matches.append({
+                'id': row['id'],
+                'human_a': human_a,
+                'human_b': human_b,
+                'overlap_score': row['overlap_score'],
+                'overlap_reasons': row['overlap_reasons'],
+                'geographic_match': row['geographic_match'],
+                'status': row['status'],
+            })
+
+    return matches
--- a/connectd/repository.yaml
+++ b/connectd/repository.yaml
@ -0,0 +1,3 @@
+name: connectd add-ons
+url: https://github.com/sudoxnym/connectd
+maintainer: sudoxnym
--- a/connectd/requirements.txt
+++ b/connectd/requirements.txt
@ -0,0 +1,2 @@
+requests>=2.28.0
+beautifulsoup4>=4.12.0
--- a/connectd/run.sh
+++ b/connectd/run.sh
@ -0,0 +1,45 @@
+#!/usr/bin/with-contenv bashio
+# shellcheck shell=bash
+
+# read options from add-on config
+export HOST_USER=$(bashio::config 'host_user')
+export HOST_NAME=$(bashio::config 'host_name')
+export HOST_EMAIL=$(bashio::config 'host_email')
+export HOST_MASTODON=$(bashio::config 'host_mastodon')
+export HOST_REDDIT=$(bashio::config 'host_reddit')
+export HOST_LEMMY=$(bashio::config 'host_lemmy')
+export HOST_LOBSTERS=$(bashio::config 'host_lobsters')
+export HOST_MATRIX=$(bashio::config 'host_matrix')
+export HOST_DISCORD=$(bashio::config 'host_discord')
+export HOST_BLUESKY=$(bashio::config 'host_bluesky')
+export HOST_LOCATION=$(bashio::config 'host_location')
+export HOST_INTERESTS=$(bashio::config 'host_interests')
+export HOST_LOOKING_FOR=$(bashio::config 'host_looking_for')
+
+export GITHUB_TOKEN=$(bashio::config 'github_token')
+export GROQ_API_KEY=$(bashio::config 'groq_api_key')
+
+export MASTODON_TOKEN=$(bashio::config 'mastodon_token')
+export MASTODON_INSTANCE=$(bashio::config 'mastodon_instance')
+
+export DISCORD_BOT_TOKEN=$(bashio::config 'discord_bot_token')
+export DISCORD_TARGET_SERVERS=$(bashio::config 'discord_target_servers')
+
+export LEMMY_INSTANCE=$(bashio::config 'lemmy_instance')
+export LEMMY_USERNAME=$(bashio::config 'lemmy_username')
+export LEMMY_PASSWORD=$(bashio::config 'lemmy_password')
+
+export SMTP_HOST=$(bashio::config 'smtp_host')
+export SMTP_PORT=$(bashio::config 'smtp_port')
+export SMTP_USER=$(bashio::config 'smtp_user')
+export SMTP_PASS=$(bashio::config 'smtp_pass')
+
+# set data paths
+export DB_PATH=/data/db/connectd.db
+export CACHE_DIR=/data/cache
+
+bashio::log.info "starting connectd daemon..."
+bashio::log.info "HOST_USER: ${HOST_USER}"
+
+cd /app
+exec python3 daemon.py
--- a/connectd/scoutd/init.py
+++ b/connectd/scoutd/init.py
@ -0,0 +1,29 @@
+"""
+scoutd - discovery module
+finds humans across platforms
+"""
+
+from .github import scrape_github, get_github_user
+from .reddit import scrape_reddit
+from .mastodon import scrape_mastodon
+from .lobsters import scrape_lobsters
+from .matrix import scrape_matrix
+from .twitter import scrape_twitter
+from .bluesky import scrape_bluesky
+from .lemmy import scrape_lemmy
+from .discord import scrape_discord, send_discord_dm
+from .deep import (
+    deep_scrape_github_user, check_already_connected, save_deep_profile,
+    determine_contact_method, get_cached_orgs, cache_orgs,
+    get_emails_from_commit_history, scrape_website_for_emails,
+)
+
+__all__ = [
+    'scrape_github', 'scrape_reddit', 'scrape_mastodon', 'scrape_lobsters',
+    'scrape_matrix', 'scrape_twitter', 'scrape_bluesky', 'scrape_lemmy',
+    'scrape_discord', 'send_discord_dm',
+    'get_github_user', 'deep_scrape_github_user',
+    'check_already_connected', 'save_deep_profile', 'determine_contact_method',
+    'get_cached_orgs', 'cache_orgs', 'get_emails_from_commit_history',
+    'scrape_website_for_emails',
+]
--- a/connectd/scoutd/bluesky.py
+++ b/connectd/scoutd/bluesky.py
@ -0,0 +1,216 @@
+"""
+scoutd/bluesky.py - bluesky/atproto discovery
+
+bluesky has an open API via AT Protocol - no auth needed for public data
+many twitter refugees landed here, good source for aligned builders
+"""
+
+import requests
+import json
+import time
+from datetime import datetime
+from pathlib import Path
+
+from .signals import analyze_text
+
+HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'bluesky'
+
+# public bluesky API
+BSKY_API = 'https://public.api.bsky.app'
+
+# hashtags to search
+ALIGNED_HASHTAGS = [
+    'selfhosted', 'homelab', 'homeassistant', 'foss', 'opensource',
+    'privacy', 'solarpunk', 'cooperative', 'mutualaid', 'localfirst',
+    'indieweb', 'smallweb', 'permacomputing', 'techworkers', 'coops',
+]
+
+
+def _api_get(endpoint, params=None):
+    """rate-limited API request with caching"""
+    url = f"{BSKY_API}{endpoint}"
+    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
+    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    if cache_file.exists():
+        try:
+            data = json.loads(cache_file.read_text())
+            if time.time() - data.get('_cached_at', 0) < 3600:
+                return data.get('_data')
+        except:
+            pass
+
+    time.sleep(0.5)  # rate limit
+
+    try:
+        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp.raise_for_status()
+        result = resp.json()
+        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
+        return result
+    except requests.exceptions.RequestException as e:
+        print(f"  bluesky api error: {e}")
+        return None
+
+
+def search_posts(query, limit=50):
+    """search for posts containing query"""
+    result = _api_get('/xrpc/app.bsky.feed.searchPosts', {
+        'q': query,
+        'limit': min(limit, 100),
+    })
+
+    if not result:
+        return []
+
+    posts = result.get('posts', [])
+    return posts
+
+
+def get_profile(handle):
+    """get user profile by handle (e.g., user.bsky.social)"""
+    result = _api_get('/xrpc/app.bsky.actor.getProfile', {'actor': handle})
+    return result
+
+
+def get_author_feed(handle, limit=30):
+    """get user's recent posts"""
+    result = _api_get('/xrpc/app.bsky.feed.getAuthorFeed', {
+        'actor': handle,
+        'limit': limit,
+    })
+
+    if not result:
+        return []
+
+    return result.get('feed', [])
+
+
+def analyze_bluesky_user(handle):
+    """analyze a bluesky user for alignment"""
+    profile = get_profile(handle)
+    if not profile:
+        return None
+
+    # collect text
+    text_parts = []
+
+    # bio/description
+    description = profile.get('description', '')
+    if description:
+        text_parts.append(description)
+
+    display_name = profile.get('displayName', '')
+    if display_name:
+        text_parts.append(display_name)
+
+    # recent posts
+    feed = get_author_feed(handle, limit=20)
+    for item in feed:
+        post = item.get('post', {})
+        record = post.get('record', {})
+        text = record.get('text', '')
+        if text:
+            text_parts.append(text)
+
+    full_text = ' '.join(text_parts)
+    text_score, positive_signals, negative_signals = analyze_text(full_text)
+
+    # bluesky bonus (decentralized, values-aligned platform choice)
+    platform_bonus = 10
+    total_score = text_score + platform_bonus
+
+    # activity bonus
+    followers = profile.get('followersCount', 0)
+    posts_count = profile.get('postsCount', 0)
+
+    if posts_count >= 100:
+        total_score += 5
+    if followers >= 100:
+        total_score += 5
+
+    # confidence
+    confidence = 0.35  # base for bluesky (better signal than twitter)
+    if len(text_parts) > 5:
+        confidence += 0.2
+    if len(positive_signals) >= 3:
+        confidence += 0.2
+    if posts_count >= 50:
+        confidence += 0.1
+    confidence = min(confidence, 0.85)
+
+    reasons = ['on bluesky (atproto)']
+    if positive_signals:
+        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
+    if negative_signals:
+        reasons.append(f"WARNING: {', '.join(negative_signals)}")
+
+    return {
+        'platform': 'bluesky',
+        'username': handle,
+        'url': f"https://bsky.app/profile/{handle}",
+        'name': display_name or handle,
+        'bio': description,
+        'score': total_score,
+        'confidence': confidence,
+        'signals': positive_signals,
+        'negative_signals': negative_signals,
+        'followers': followers,
+        'posts_count': posts_count,
+        'reasons': reasons,
+        'contact': {
+            'bluesky': handle,
+        },
+        'scraped_at': datetime.now().isoformat(),
+    }
+
+
+def scrape_bluesky(db, limit_per_hashtag=30):
+    """full bluesky scrape"""
+    print("scoutd/bluesky: starting scrape...")
+
+    all_users = {}
+
+    for hashtag in ALIGNED_HASHTAGS:
+        print(f"  #{hashtag}...")
+
+        # search for hashtag
+        posts = search_posts(f"#{hashtag}", limit=limit_per_hashtag)
+
+        for post in posts:
+            author = post.get('author', {})
+            handle = author.get('handle')
+
+            if handle and handle not in all_users:
+                all_users[handle] = {
+                    'handle': handle,
+                    'display_name': author.get('displayName'),
+                    'hashtags': [hashtag],
+                }
+            elif handle:
+                all_users[handle]['hashtags'].append(hashtag)
+
+        print(f"    found {len(posts)} posts")
+
+    # prioritize users in multiple hashtags
+    multi_hashtag = {h: d for h, d in all_users.items() if len(d.get('hashtags', [])) >= 2}
+    print(f"  {len(multi_hashtag)} users in 2+ aligned hashtags")
+
+    # analyze
+    results = []
+    for handle in list(multi_hashtag.keys())[:100]:
+        try:
+            result = analyze_bluesky_user(handle)
+            if result and result['score'] > 0:
+                results.append(result)
+                db.save_human(result)
+
+                if result['score'] >= 30:
+                    print(f"    ★ @{handle}: {result['score']} pts")
+        except Exception as e:
+            print(f"    error on {handle}: {e}")
+
+    print(f"scoutd/bluesky: found {len(results)} aligned humans")
+    return results
--- a/connectd/scoutd/deep.py
+++ b/connectd/scoutd/deep.py
@ -0,0 +1,966 @@
+"""
+scoutd/deep.py - deep profile discovery
+when we find someone, follow ALL their links to build complete picture
+
+github profile -> mastodon link -> scrape mastodon
+                -> website -> scrape for more links
+                -> twitter handle -> note it
+                -> email -> store it
+
+email discovery sources:
+- github profile (if public)
+- git commit history
+- personal website/blog contact page
+- README "contact me" sections
+- mastodon/twitter bio
+
+fallback contact methods if no email:
+- github_issue: open issue on their repo
+- mastodon: DM if allowed
+- manual: pending contact queue for review
+
+also filters out people who clearly already know each other
+(same org, co-contributors to same repos)
+"""
+
+import re
+import json
+import requests
+import time
+import subprocess
+import tempfile
+import shutil
+from datetime import datetime
+from urllib.parse import urlparse
+from pathlib import Path
+
+from .signals import analyze_text
+from .github import get_github_user, get_user_repos, _api_get as github_api
+from .mastodon import analyze_mastodon_user, _api_get as mastodon_api
+from .handles import discover_all_handles, extract_handles_from_text, scrape_website_for_handles
+
+# local cache for org memberships
+ORG_CACHE_FILE = Path(__file__).parent.parent / 'data' / 'org_cache.json'
+_org_cache = None
+
+# patterns to find social links in text
+MASTODON_PATTERN = r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-z]{2,})'
+TWITTER_PATTERN = r'(?:twitter\.com/|x\.com/)([a-zA-Z0-9_]+)'
+GITHUB_PATTERN = r'github\.com/([a-zA-Z0-9_-]+)'
+MATRIX_PATTERN = r'@([a-zA-Z0-9_]+):([a-zA-Z0-9.-]+)'
+EMAIL_PATTERN = r'\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b'
+
+# known mastodon instances for validation
+KNOWN_INSTANCES = [
+    'mastodon.social', 'fosstodon.org', 'tech.lgbt', 'social.coop',
+    'hackers.town', 'hachyderm.io', 'infosec.exchange', 'chaos.social',
+    'mas.to', 'mstdn.social', 'mastodon.online', 'universeodon.com',
+    'mathstodon.xyz', 'ruby.social', 'functional.cafe', 'types.pl',
+]
+
+# contact page patterns for website scraping
+CONTACT_PAGE_PATHS = [
+    '/contact', '/contact/', '/contact.html',
+    '/about', '/about/', '/about.html',
+    '/connect', '/reach-out', '/hire', '/hire-me',
+]
+
+# patterns to find emails in contact sections
+CONTACT_SECTION_PATTERNS = [
+    r'(?:contact|email|reach|mail)[:\s]+([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
+    r'([a-zA-Z0-9._%+-]+)\s*(?:\[at\]|\(at\)|@)\s*([a-zA-Z0-9.-]+)\s*(?:\[dot\]|\(dot\)|\.)\s*([a-zA-Z]{2,})',
+]
+
+
+def load_org_cache():
+    """load org membership cache from disk"""
+    global _org_cache
+    if _org_cache is not None:
+        return _org_cache
+
+    try:
+        ORG_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
+        if ORG_CACHE_FILE.exists():
+            with open(ORG_CACHE_FILE) as f:
+                _org_cache = json.load(f)
+        else:
+            _org_cache = {'users': {}, 'updated': {}}
+    except:
+        _org_cache = {'users': {}, 'updated': {}}
+
+    return _org_cache
+
+
+def save_org_cache():
+    """save org membership cache to disk"""
+    global _org_cache
+    if _org_cache is None:
+        return
+
+    try:
+        ORG_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
+        with open(ORG_CACHE_FILE, 'w') as f:
+            json.dump(_org_cache, f, indent=2)
+    except:
+        pass
+
+
+def get_cached_orgs(username):
+    """get orgs from cache if available and fresh (< 7 days old)"""
+    cache = load_org_cache()
+
+    if username not in cache['users']:
+        return None
+
+    updated = cache['updated'].get(username)
+    if updated:
+        updated_dt = datetime.fromisoformat(updated)
+        if (datetime.now() - updated_dt).days < 7:
+            return cache['users'][username]
+
+    return None
+
+
+def cache_orgs(username, orgs):
+    """cache org membership for a user"""
+    cache = load_org_cache()
+    cache['users'][username] = orgs
+    cache['updated'][username] = datetime.now().isoformat()
+    save_org_cache()
+
+
+def get_emails_from_commit_history(repo_url, limit=50):
+    """
+    clone a repo (shallow) and extract unique committer emails from git log
+    """
+    emails = set()
+
+    try:
+        # create temp dir
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # shallow clone with limited depth
+            result = subprocess.run(
+                ['git', 'clone', '--depth', '50', '--single-branch', repo_url, tmpdir],
+                capture_output=True,
+                text=True,
+                timeout=30
+            )
+
+            if result.returncode != 0:
+                return []
+
+            # get unique emails from commit log
+            result = subprocess.run(
+                ['git', 'log', f'--max-count={limit}', '--format=%ae'],
+                cwd=tmpdir,
+                capture_output=True,
+                text=True,
+                timeout=10
+            )
+
+            if result.returncode == 0:
+                for email in result.stdout.strip().split('\n'):
+                    email = email.strip().lower()
+                    # filter out bot/noreply emails
+                    if email and not any(x in email for x in [
+                        'noreply', 'no-reply', 'dependabot', 'github-actions',
+                        'renovate', 'greenkeeper', 'snyk-bot', 'users.noreply.github'
+                    ]):
+                        emails.add(email)
+    except (subprocess.TimeoutExpired, Exception):
+        pass
+
+    return list(emails)
+
+
+def scrape_website_for_emails(url, timeout=10):
+    """
+    scrape a personal website for email addresses
+    checks main page and common contact pages
+    """
+    emails = set()
+
+    if not is_personal_website(url):
+        return []
+
+    headers = {'User-Agent': 'connectd/1.0 (looking for contact info)'}
+
+    # normalize url
+    if not url.startswith('http'):
+        url = 'https://' + url
+
+    base_url = url.rstrip('/')
+
+    # pages to check
+    pages_to_check = [base_url] + [base_url + path for path in CONTACT_PAGE_PATHS]
+
+    for page_url in pages_to_check:
+        try:
+            resp = requests.get(page_url, timeout=timeout, headers=headers)
+            if resp.status_code == 200:
+                text = resp.text
+
+                # standard email pattern
+                for match in re.finditer(EMAIL_PATTERN, text):
+                    email = match.group(0).lower()
+                    if not any(x in email for x in ['noreply', 'no-reply', 'example.com', 'users.noreply']):
+                        emails.add(email)
+
+                # obfuscated email patterns like "user [at] domain [dot] com"
+                for pattern in CONTACT_SECTION_PATTERNS:
+                    for match in re.finditer(pattern, text, re.IGNORECASE):
+                        if len(match.groups()) == 3:
+                            email = f"{match.group(1)}@{match.group(2)}.{match.group(3)}".lower()
+                            emails.add(email)
+                        elif len(match.groups()) == 1:
+                            emails.add(match.group(1).lower())
+
+                # mailto: links
+                for match in re.finditer(r'mailto:([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text):
+                    emails.add(match.group(1).lower())
+
+        except:
+            continue
+
+    return list(emails)
+
+
+def extract_emails_from_readme(text):
+    """
+    extract emails from README text, looking for contact sections
+    """
+    emails = set()
+
+    if not text:
+        return []
+
+    # look for contact-related sections
+    contact_patterns = [
+        r'(?:##?\s*)?(?:contact|reach|email|get in touch|connect)[^\n]*\n([^\n#]+)',
+        r'(?:email|contact|reach me)[:\s]+([^\n]+)',
+    ]
+
+    for pattern in contact_patterns:
+        for match in re.finditer(pattern, text, re.IGNORECASE):
+            section = match.group(1)
+            # extract emails from this section
+            for email_match in re.finditer(EMAIL_PATTERN, section):
+                email = email_match.group(0).lower()
+                if not any(x in email for x in ['noreply', 'no-reply', 'example.com']):
+                    emails.add(email)
+
+    # also check for obfuscated emails
+    for match in re.finditer(r'([a-zA-Z0-9._%+-]+)\s*(?:\[at\]|\(at\))\s*([a-zA-Z0-9.-]+)\s*(?:\[dot\]|\(dot\))\s*([a-zA-Z]{2,})', text, re.IGNORECASE):
+        email = f"{match.group(1)}@{match.group(2)}.{match.group(3)}".lower()
+        emails.add(email)
+
+    return list(emails)
+
+
+def get_mastodon_dm_allowed(handle):
+    """check if a mastodon user allows DMs"""
+    profile = get_mastodon_profile(handle)
+    if not profile:
+        return False
+
+    # check if they're locked (requires follow approval)
+    if profile.get('locked'):
+        return False
+
+    # check bio for "DMs open" type messages
+    bio = (profile.get('note') or profile.get('summary') or '').lower()
+    if any(x in bio for x in ['dms open', 'dm me', 'message me', 'dms welcome']):
+        return True
+
+    # default: assume open if not locked
+    return True
+
+
+def determine_contact_method(profile):
+    """
+    determine the best way to contact someone
+    returns (method, details) where method is one of:
+    - 'email': direct email contact
+    - 'github_issue': open issue on their repo
+    - 'mastodon': DM on mastodon
+    - 'manual': needs manual review
+    """
+    # prefer email
+    if profile.get('email'):
+        return 'email', {'email': profile['email']}
+
+    # check for multiple emails to pick from
+    if profile.get('emails') and len(profile['emails']) > 0:
+        # prefer non-github, non-work emails
+        for email in profile['emails']:
+            if not any(x in email.lower() for x in ['github', 'noreply', '@company', '@corp']):
+                return 'email', {'email': email}
+        # fall back to first one
+        return 'email', {'email': profile['emails'][0]}
+
+    # try mastodon DM
+    if profile.get('mastodon'):
+        handles = profile['mastodon'] if isinstance(profile['mastodon'], list) else [profile['mastodon']]
+        for handle in handles:
+            if get_mastodon_dm_allowed(handle):
+                return 'mastodon', {'handle': handle}
+
+    # try github issue on their most-starred repo
+    if profile.get('top_repos'):
+        # find repo with issues enabled and good stars
+        for repo in sorted(profile['top_repos'], key=lambda r: r.get('stars', 0), reverse=True):
+            if repo.get('stars', 0) >= 10:
+                repo_name = repo.get('name')
+                if repo_name:
+                    return 'github_issue', {
+                        'repo': f"{profile['username']}/{repo_name}",
+                        'stars': repo.get('stars'),
+                    }
+
+    # manual review needed
+    return 'manual', {
+        'reason': 'no email, mastodon, or suitable repo found',
+        'available': {
+            'twitter': profile.get('twitter'),
+            'websites': profile.get('websites'),
+            'matrix': profile.get('matrix'),
+        }
+    }
+
+
+def extract_links_from_text(text):
+    """extract social links from bio/readme text"""
+    if not text:
+        return {}
+
+    links = {
+        'mastodon': [],
+        'twitter': [],
+        'github': [],
+        'matrix': [],
+        'email': [],
+        'websites': [],
+    }
+
+    # mastodon handles - only accept known instances or ones with 'mastodon'/'social' in name
+    for match in re.finditer(MASTODON_PATTERN, text):
+        user, instance = match.groups()
+        instance_lower = instance.lower()
+        # validate it's a known instance or looks like one
+        is_known = instance_lower in KNOWN_INSTANCES
+        looks_like_masto = any(x in instance_lower for x in ['mastodon', 'social', 'fedi', '.town', '.cafe'])
+        if is_known or looks_like_masto:
+            links['mastodon'].append(f"{user}@{instance}")
+
+    # twitter
+    for match in re.finditer(TWITTER_PATTERN, text, re.IGNORECASE):
+        links['twitter'].append(match.group(1))
+
+    # github (for cross-referencing)
+    for match in re.finditer(GITHUB_PATTERN, text, re.IGNORECASE):
+        links['github'].append(match.group(1))
+
+    # matrix
+    for match in re.finditer(MATRIX_PATTERN, text):
+        user, server = match.groups()
+        links['matrix'].append(f"@{user}:{server}")
+
+    # email
+    for match in re.finditer(EMAIL_PATTERN, text):
+        email = match.group(0)
+        # filter out obvious non-personal emails
+        if not any(x in email.lower() for x in ['noreply', 'no-reply', 'example.com', 'users.noreply']):
+            links['email'].append(email)
+
+    # websites (http/https links that aren't social platforms)
+    url_pattern = r'https?://([a-zA-Z0-9.-]+\.[a-z]{2,})[/\w.-]*'
+    for match in re.finditer(url_pattern, text):
+        domain = match.group(1).lower()
+        if not any(x in domain for x in ['github.com', 'twitter.com', 'mastodon', 'linkedin.com', 't.co']):
+            links['websites'].append(match.group(0))
+
+    # dedupe
+    for key in links:
+        links[key] = list(set(links[key]))
+
+    return links
+
+
+def is_personal_website(url):
+    """check if URL looks like a personal website vs corporate site"""
+    domain = urlparse(url).netloc.lower()
+
+    # skip obvious corporate/platform sites
+    skip_domains = [
+        'github.com', 'gitlab.com', 'bitbucket.org',
+        'twitter.com', 'x.com', 'linkedin.com', 'facebook.com',
+        'youtube.com', 'medium.com', 'dev.to', 'hashnode.com',
+        'wedo.com', 'google.com', 'microsoft.com', 'apple.com',
+        'amazon.com', 'stackoverflow.com', 'reddit.com',
+    ]
+
+    if any(skip in domain for skip in skip_domains):
+        return False
+
+    # looks personal if: short domain, has common personal TLDs, contains username-like string
+    personal_tlds = ['.io', '.dev', '.me', '.co', '.xyz', '.page', '.codes', '.software']
+    if any(domain.endswith(tld) for tld in personal_tlds):
+        return True
+
+    # if domain is just name.com or similar
+    parts = domain.replace('www.', '').split('.')
+    if len(parts) == 2 and len(parts[0]) < 20:
+        return True
+
+    return False
+
+
+def scrape_website_for_links(url, timeout=10):
+    """scrape a personal website for more social links"""
+    if not is_personal_website(url):
+        return {}
+
+    try:
+        resp = requests.get(url, timeout=timeout, headers={'User-Agent': 'connectd/1.0'})
+        resp.raise_for_status()
+        return extract_links_from_text(resp.text)
+    except:
+        return {}
+
+
+def get_mastodon_profile(handle):
+    """
+    fetch mastodon profile from handle like user@instance
+    returns profile data or None
+    """
+    if '@' not in handle:
+        return None
+
+    parts = handle.split('@')
+    if len(parts) == 2:
+        user, instance = parts
+    elif len(parts) == 3 and parts[0] == '':
+        # @user@instance format
+        user, instance = parts[1], parts[2]
+    else:
+        return None
+
+    # try to look up via webfinger
+    try:
+        webfinger_url = f"https://{instance}/.well-known/webfinger"
+        resp = requests.get(
+            webfinger_url,
+            params={'resource': f'acct:{user}@{instance}'},
+            timeout=10,
+            headers={'Accept': 'application/json'}
+        )
+        if resp.status_code == 200:
+            data = resp.json()
+            # find the profile link
+            for link in data.get('links', []):
+                if link.get('type') == 'application/activity+json':
+                    profile_url = link.get('href')
+                    # fetch the profile
+                    profile_resp = requests.get(
+                        profile_url,
+                        timeout=10,
+                        headers={'Accept': 'application/activity+json'}
+                    )
+                    if profile_resp.status_code == 200:
+                        return profile_resp.json()
+    except:
+        pass
+
+    # fallback: try direct API
+    try:
+        search_url = f"https://{instance}/api/v1/accounts/lookup"
+        resp = requests.get(search_url, params={'acct': user}, timeout=10)
+        if resp.status_code == 200:
+            return resp.json()
+    except:
+        pass
+
+    return None
+
+
+def deep_scrape_github_user(login, scrape_commits=True):
+    """
+    deep scrape a github user - follow all links, build complete profile
+
+    email discovery sources:
+    1. github profile (if public)
+    2. git commit history (if scrape_commits=True)
+    3. personal website/blog contact pages
+    4. README "contact me" sections
+    5. mastodon bio
+    """
+    print(f"  deep scraping {login}...")
+
+    user = get_github_user(login)
+    if not user:
+        return None
+
+    repos = get_user_repos(login, per_page=50)
+
+    # collect all text to search for links
+    all_text = []
+    readme_text = None
+
+    if user.get('bio'):
+        all_text.append(user['bio'])
+    if user.get('blog'):
+        all_text.append(user['blog'])
+    if user.get('company'):
+        all_text.append(user['company'])
+
+    # check readme of profile repo (username/username)
+    for branch in ['main', 'master']:
+        readme_url = f"https://raw.githubusercontent.com/{login}/{login}/{branch}/README.md"
+        try:
+            resp = requests.get(readme_url, timeout=10)
+            if resp.status_code == 200:
+                readme_text = resp.text
+                all_text.append(readme_text)
+                break
+        except:
+            pass
+
+    # extract links from all collected text
+    combined_text = '\n'.join(all_text)
+    found_links = extract_links_from_text(combined_text)
+
+    # ensure all keys exist
+    for key in ['email', 'twitter', 'github', 'matrix', 'mastodon', 'websites']:
+        if key not in found_links:
+            found_links[key] = []
+
+    # add explicit github fields
+    if user.get('email'):
+        found_links['email'].append(user['email'])
+    if user.get('twitter_username'):
+        found_links['twitter'].append(user['twitter_username'])
+    if user.get('blog'):
+        found_links['websites'].append(user['blog'])
+
+    # EMAIL DISCOVERY: extract emails from README contact sections
+    if readme_text:
+        readme_emails = extract_emails_from_readme(readme_text)
+        found_links['email'].extend(readme_emails)
+        if readme_emails:
+            print(f"    found {len(readme_emails)} email(s) in README")
+
+    # dedupe
+    for key in found_links:
+        found_links[key] = list(set(found_links[key]))
+
+    # now follow the links to gather more data
+    profile = {
+        'source': 'github',
+        'username': login,
+        'url': f"https://github.com/{login}",
+        'real_name': user.get('name'),
+        'bio': user.get('bio'),
+        'location': user.get('location'),
+        'company': user.get('company'),
+        'hireable': user.get('hireable'),
+        'created_at': user.get('created_at'),
+        'public_repos': user.get('public_repos'),
+        'followers': user.get('followers'),
+
+        # contact points
+        'email': found_links['email'][0] if found_links['email'] else user.get('email'),
+        'emails': list(found_links['email']),
+        'twitter': found_links['twitter'][0] if found_links['twitter'] else user.get('twitter_username'),
+        'mastodon': found_links['mastodon'],
+        'matrix': found_links['matrix'],
+        'websites': found_links['websites'],
+
+        # cross-platform profiles we find
+        'linked_profiles': {},
+
+        # repos and languages
+        'top_repos': [],
+        'languages': {},
+        'topics': [],
+        'orgs': [],
+
+        # contact method (will be determined at end)
+        'contact_method': None,
+        'contact_details': None,
+    }
+
+    # analyze repos
+    top_starred_repo = None
+    for repo in repos[:30]:
+        if not repo.get('fork'):
+            repo_info = {
+                'name': repo.get('name'),
+                'description': repo.get('description'),
+                'stars': repo.get('stargazers_count'),
+                'language': repo.get('language'),
+                'topics': repo.get('topics', []),
+                'html_url': repo.get('html_url'),
+                'pushed_at': repo.get('pushed_at'),  # for activity-based contact selection
+            }
+            profile['top_repos'].append(repo_info)
+
+            # track top starred for commit email scraping
+            if not top_starred_repo or repo.get('stargazers_count', 0) > top_starred_repo.get('stars', 0):
+                top_starred_repo = repo_info
+
+            if repo.get('language'):
+                lang = repo['language']
+                profile['languages'][lang] = profile['languages'].get(lang, 0) + 1
+
+            profile['topics'].extend(repo.get('topics', []))
+
+    profile['topics'] = list(set(profile['topics']))
+
+    # get orgs - check cache first
+    cached_orgs = get_cached_orgs(login)
+    if cached_orgs is not None:
+        print(f"    using cached orgs: {cached_orgs}")
+        profile['orgs'] = cached_orgs
+    else:
+        orgs_url = f"https://api.github.com/users/{login}/orgs"
+        orgs_data = github_api(orgs_url) or []
+        profile['orgs'] = [o.get('login') for o in orgs_data]
+        # cache for future use
+        cache_orgs(login, profile['orgs'])
+        if profile['orgs']:
+            print(f"    fetched & cached orgs: {profile['orgs']}")
+
+    # EMAIL DISCOVERY: scrape commit history from top repo
+    if scrape_commits and top_starred_repo and not profile['emails']:
+        repo_url = f"https://github.com/{login}/{top_starred_repo['name']}.git"
+        print(f"    checking commit history in {top_starred_repo['name']}...")
+        commit_emails = get_emails_from_commit_history(repo_url)
+        if commit_emails:
+            print(f"    found {len(commit_emails)} email(s) in commits")
+            profile['emails'].extend(commit_emails)
+
+    # follow mastodon links
+    for masto_handle in found_links['mastodon'][:2]:  # limit to 2
+        print(f"    following mastodon: {masto_handle}")
+        masto_profile = get_mastodon_profile(masto_handle)
+        if masto_profile:
+            profile['linked_profiles']['mastodon'] = {
+                'handle': masto_handle,
+                'display_name': masto_profile.get('display_name') or masto_profile.get('name'),
+                'bio': masto_profile.get('note') or masto_profile.get('summary'),
+                'followers': masto_profile.get('followers_count'),
+                'url': masto_profile.get('url'),
+                'locked': masto_profile.get('locked', False),
+            }
+            # extract more links from mastodon bio
+            masto_bio = masto_profile.get('note') or masto_profile.get('summary') or ''
+            masto_links = extract_links_from_text(masto_bio)
+            profile['emails'].extend(masto_links.get('email', []))
+            profile['websites'].extend(masto_links.get('websites', []))
+
+    # EMAIL DISCOVERY: scrape personal website for contact info
+    for website in found_links['websites'][:2]:  # check up to 2 sites
+        print(f"    following website: {website}")
+
+        # basic link extraction
+        site_links = scrape_website_for_links(website)
+        if site_links.get('mastodon') and not profile['mastodon']:
+            profile['mastodon'] = site_links['mastodon']
+
+        # enhanced email discovery - check contact pages
+        website_emails = scrape_website_for_emails(website)
+        if website_emails:
+            print(f"    found {len(website_emails)} email(s) on website")
+            profile['emails'].extend(website_emails)
+
+    # dedupe emails and pick best one
+    profile['emails'] = list(set(profile['emails']))
+
+    # rank emails by preference
+    def email_score(email):
+        email_lower = email.lower()
+        score = 0
+        # prefer personal domains
+        if any(x in email_lower for x in ['@gmail', '@proton', '@hey.com', '@fastmail']):
+            score += 10
+        # deprioritize github emails
+        if 'github' in email_lower:
+            score -= 20
+        # deprioritize noreply
+        if 'noreply' in email_lower:
+            score -= 50
+        # prefer emails matching username
+        if login.lower() in email_lower:
+            score += 5
+        return score
+
+    if profile['emails']:
+        profile['emails'].sort(key=email_score, reverse=True)
+        profile['email'] = profile['emails'][0]
+
+    # COMPREHENSIVE HANDLE DISCOVERY
+    # find ALL social handles from website, README, rel="me" links, etc.
+    discovered_handles, discovered_emails = discover_all_handles(user)
+
+    # merge discovered handles into profile
+    profile['handles'] = discovered_handles
+
+    # update individual fields from discovered handles
+    if discovered_handles.get('mastodon') and not profile.get('mastodon'):
+        profile['mastodon'] = discovered_handles['mastodon']
+    if discovered_handles.get('twitter') and not profile.get('twitter'):
+        profile['twitter'] = discovered_handles['twitter']
+    if discovered_handles.get('bluesky'):
+        profile['bluesky'] = discovered_handles['bluesky']
+    if discovered_handles.get('matrix') and not profile.get('matrix'):
+        profile['matrix'] = discovered_handles['matrix']
+    if discovered_handles.get('linkedin'):
+        profile['linkedin'] = discovered_handles['linkedin']
+    if discovered_handles.get('youtube'):
+        profile['youtube'] = discovered_handles['youtube']
+    if discovered_handles.get('discord'):
+        profile['discord'] = discovered_handles['discord']
+    if discovered_handles.get('telegram'):
+        profile['telegram'] = discovered_handles['telegram']
+
+    # merge discovered emails
+    for email in discovered_emails:
+        if email not in profile['emails']:
+            profile['emails'].append(email)
+
+    print(f"    handles found: {list(discovered_handles.keys())}")
+
+    # determine best contact method
+    contact_method, contact_details = determine_contact_method(profile)
+    profile['contact_method'] = contact_method
+    profile['contact_details'] = contact_details
+    print(f"    contact method: {contact_method}")
+
+    # analyze all text for signals
+    all_profile_text = ' '.join([
+        profile.get('bio') or '',
+        profile.get('company') or '',
+        profile.get('location') or '',
+        ' '.join(profile.get('topics', [])),
+    ])
+
+    for linked in profile.get('linked_profiles', {}).values():
+        if linked.get('bio'):
+            all_profile_text += ' ' + linked['bio']
+
+    text_score, signals, negative = analyze_text(all_profile_text)
+    profile['signals'] = signals
+    profile['negative_signals'] = negative
+    profile['score'] = text_score
+
+    # add builder score
+    if len(repos) > 20:
+        profile['score'] += 15
+    elif len(repos) > 10:
+        profile['score'] += 10
+
+    # add topic alignment
+    from .signals import TARGET_TOPICS
+    aligned_topics = set(profile['topics']) & set(TARGET_TOPICS)
+    profile['score'] += len(aligned_topics) * 10
+    profile['aligned_topics'] = list(aligned_topics)
+
+    profile['scraped_at'] = datetime.now().isoformat()
+
+    return profile
+
+
+def check_mutual_github_follows(user_a, user_b):
+    """check if two github users follow each other"""
+    # check if a follows b
+    url = f"https://api.github.com/users/{user_a}/following/{user_b}"
+    try:
+        resp = requests.get(url, timeout=10, headers={'Accept': 'application/vnd.github.v3+json'})
+        if resp.status_code == 204:  # 204 = follows
+            return True
+    except:
+        pass
+    return False
+
+
+def check_shared_repo_contributions(user_a, user_b):
+    """
+    check if two users have contributed to the same repos
+    returns (bool, list of shared repos)
+    """
+    # this would require checking contribution history
+    # for now, we check via the orgs and top_repos stored in extra
+    # the full implementation would query:
+    # GET /repos/{owner}/{repo}/contributors for their top repos
+    return False, []
+
+
+def check_github_interactions(user_a, user_b):
+    """
+    check if users have had public interactions
+    (comments on each other's issues/PRs)
+    this is expensive - only do for high-score matches
+    """
+    # would need to search:
+    # GET /search/issues?q=author:{user_a}+commenter:{user_b}
+    # GET /search/issues?q=author:{user_b}+commenter:{user_a}
+    return False
+
+
+def check_already_connected(human_a, human_b, deep_check=False):
+    """
+    check if two humans are likely already connected
+    (same org, co-contributors, mutual follows, interactions)
+
+    connectd's job is connecting ISOLATED builders, not re-introducing coworkers
+    """
+    # parse extra data if stored as json string
+    extra_a = human_a.get('extra', {})
+    extra_b = human_b.get('extra', {})
+    if isinstance(extra_a, str):
+        extra_a = json.loads(extra_a) if extra_a else {}
+    if isinstance(extra_b, str):
+        extra_b = json.loads(extra_b) if extra_b else {}
+
+    # 1. same github org - check cache first, then stored data
+    orgs_a = set(extra_a.get('orgs', []))
+    orgs_b = set(extra_b.get('orgs', []))
+
+    # also check org cache for fresher data
+    if human_a.get('platform') == 'github':
+        cached_a = get_cached_orgs(human_a.get('username', ''))
+        if cached_a:
+            orgs_a.update(cached_a)
+    if human_b.get('platform') == 'github':
+        cached_b = get_cached_orgs(human_b.get('username', ''))
+        if cached_b:
+            orgs_b.update(cached_b)
+
+    shared_orgs = orgs_a & orgs_b
+
+    if shared_orgs:
+        return True, f"same org: {', '.join(list(shared_orgs)[:3])}"
+
+    # 2. same company
+    company_a = (extra_a.get('company') or '').lower().strip('@').strip()
+    company_b = (extra_b.get('company') or '').lower().strip('@').strip()
+
+    if company_a and company_b and len(company_a) > 2:
+        if company_a == company_b or company_a in company_b or company_b in company_a:
+            return True, f"same company: {company_a or company_b}"
+
+    # 3. co-contributors to same major repos (from stored top_repos)
+    repos_a = set()
+    repos_b = set()
+    for r in extra_a.get('top_repos', []):
+        if r.get('stars', 0) > 50:  # only significant repos
+            repos_a.add(r.get('name', '').lower())
+    for r in extra_b.get('top_repos', []):
+        if r.get('stars', 0) > 50:
+            repos_b.add(r.get('name', '').lower())
+
+    shared_repos = repos_a & repos_b
+    if len(shared_repos) >= 2:
+        return True, f"co-contributors: {', '.join(list(shared_repos)[:3])}"
+
+    # 4. deep checks (more API calls - only if requested)
+    if deep_check:
+        user_a = human_a.get('username', '')
+        user_b = human_b.get('username', '')
+
+        # check mutual follows
+        if human_a.get('platform') == 'github' and human_b.get('platform') == 'github':
+            if check_mutual_github_follows(user_a, user_b):
+                return True, "mutual github follows"
+            if check_mutual_github_follows(user_b, user_a):
+                return True, "mutual github follows"
+
+    return False, None
+
+
+def save_deep_profile(db, profile):
+    """save a deep-scraped profile to the database"""
+    # convert to standard human format
+    # IMPORTANT: extra field contains ALL data for activity-based contact selection
+    human_data = {
+        'platform': profile['source'],
+        'username': profile['username'],
+        'url': profile['url'],
+        'name': profile.get('real_name'),
+        'bio': profile.get('bio'),
+        'location': profile.get('location'),
+        'score': profile.get('score', 0),
+        'confidence': 0.8 if profile.get('linked_profiles') else 0.5,
+        'signals': profile.get('signals', []),
+        'negative_signals': profile.get('negative_signals', []),
+        'reasons': [],
+        'contact': {
+            'email': profile.get('email'),
+            'emails': profile.get('emails', []),
+            'twitter': profile.get('twitter'),
+            'mastodon': profile.get('mastodon'),
+            'matrix': profile.get('matrix'),
+            'websites': profile.get('websites'),
+            'contact_method': profile.get('contact_method'),
+            'contact_details': profile.get('contact_details'),
+        },
+        'extra': {
+            # identity
+            'real_name': profile.get('real_name'),
+            'company': profile.get('company'),
+            'hireable': profile.get('hireable'),
+            'orgs': profile.get('orgs'),
+
+            # github activity (for activity-based contact)
+            'top_repos': profile.get('top_repos'),
+            'languages': profile.get('languages'),
+            'topics': profile.get('topics'),
+            'aligned_topics': profile.get('aligned_topics'),
+            'followers': profile.get('followers'),
+            'public_repos': profile.get('public_repos'),
+            'commit_count': len(profile.get('emails', [])),  # rough proxy
+
+            # cross-platform links (for activity-based contact)
+            'email': profile.get('email'),
+            'emails': profile.get('emails', []),
+            'twitter': profile.get('twitter'),
+            'mastodon': profile.get('mastodon'),
+            'matrix': profile.get('matrix'),
+            'bluesky': profile.get('bluesky'),
+            'reddit': profile.get('reddit'),
+            'lobsters': profile.get('lobsters'),
+            'linkedin': profile.get('linkedin'),
+            'youtube': profile.get('youtube'),
+            'discord': profile.get('discord'),
+            'telegram': profile.get('telegram'),
+            'linked_profiles': profile.get('linked_profiles'),
+
+            # ALL discovered handles (comprehensive)
+            'handles': profile.get('handles', {}),
+
+            # activity counts (populated by platform scrapers)
+            'mastodon_statuses': profile.get('mastodon_statuses', 0),
+            'twitter_tweets': profile.get('twitter_tweets', 0),
+            'reddit_activity': profile.get('reddit_activity', 0),
+            'reddit_karma': profile.get('reddit_karma', 0),
+            'lobsters_karma': profile.get('lobsters_karma', 0),
+            'bluesky_posts': profile.get('bluesky_posts', 0),
+        },
+        'scraped_at': profile.get('scraped_at'),
+    }
+
+    # build reasons
+    if profile.get('signals'):
+        human_data['reasons'].append(f"signals: {', '.join(profile['signals'][:5])}")
+    if profile.get('aligned_topics'):
+        human_data['reasons'].append(f"topics: {', '.join(profile['aligned_topics'][:5])}")
+    if profile.get('linked_profiles'):
+        platforms = list(profile['linked_profiles'].keys())
+        human_data['reasons'].append(f"also on: {', '.join(platforms)}")
+    if profile.get('location'):
+        human_data['reasons'].append(f"location: {profile['location']}")
+    if profile.get('contact_method'):
+        human_data['reasons'].append(f"contact: {profile['contact_method']}")
+
+    db.save_human(human_data)
+    return human_data
--- a/connectd/scoutd/discord.py
+++ b/connectd/scoutd/discord.py
@ -0,0 +1,323 @@
+"""
+scoutd/discord.py - discord discovery
+
+discord requires a bot token to read messages.
+target servers: programming help, career transition, indie hackers, etc.
+
+SETUP:
+1. create discord app at discord.com/developers
+2. add bot, get token
+3. join target servers with bot
+4. set DISCORD_BOT_TOKEN env var
+"""
+
+import requests
+import json
+import time
+import os
+from datetime import datetime
+from pathlib import Path
+
+from .signals import analyze_text
+from .lost import (
+    analyze_social_for_lost_signals,
+    classify_user,
+)
+
+DISCORD_BOT_TOKEN = os.environ.get('DISCORD_BOT_TOKEN', '')
+DISCORD_API = 'https://discord.com/api/v10'
+
+# default server IDs - values-aligned communities
+# bot must be invited to these servers to scout them
+# invite links for reference (use numeric IDs below):
+#   - self-hosted: discord.gg/self-hosted
+#   - foss-dev: discord.gg/foss-developers-group
+#   - grapheneos: discord.gg/grapheneos
+#   - queer-coded: discord.me/queer-coded
+#   - homelab: discord.gg/homelab
+#   - esphome: discord.gg/n9sdw7pnsn
+#   - home-assistant: discord.gg/home-assistant
+#   - linuxserver: discord.gg/linuxserver
+#   - proxmox-scripts: discord.gg/jsYVk5JBxq
+DEFAULT_SERVERS = [
+    # self-hosted / foss / privacy
+    '693469700109369394',   # self-hosted (selfhosted.show)
+    '920089648842293248',   # foss developers group
+    '1176414688112820234',  # grapheneos
+
+    # queer tech
+    '925804557001437184',   # queer coded
+
+    # home automation / homelab
+    # note: these are large servers, bot needs to be invited
+    # '330944238910963714',  # home assistant (150k+ members)
+    # '429907082951524364',  # esphome (35k members)
+    # '478094546522079232',  # homelab (35k members)
+    # '354974912613449730',  # linuxserver.io (41k members)
+]
+
+# merge env var servers with defaults
+_env_servers = os.environ.get('DISCORD_TARGET_SERVERS', '').split(',')
+_env_servers = [s.strip() for s in _env_servers if s.strip()]
+TARGET_SERVERS = list(set(DEFAULT_SERVERS + _env_servers))
+
+# channels to focus on (keywords in channel name)
+TARGET_CHANNEL_KEYWORDS = [
+    'help', 'career', 'jobs', 'learning', 'beginner',
+    'general', 'introductions', 'showcase', 'projects',
+]
+
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'discord'
+CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def get_headers():
+    """get discord api headers"""
+    if not DISCORD_BOT_TOKEN:
+        return None
+    return {
+        'Authorization': f'Bot {DISCORD_BOT_TOKEN}',
+        'Content-Type': 'application/json',
+    }
+
+
+def get_guild_channels(guild_id):
+    """get channels in a guild"""
+    headers = get_headers()
+    if not headers:
+        return []
+
+    try:
+        resp = requests.get(
+            f'{DISCORD_API}/guilds/{guild_id}/channels',
+            headers=headers,
+            timeout=30
+        )
+        if resp.status_code == 200:
+            return resp.json()
+        return []
+    except Exception:
+        return []
+
+
+def get_channel_messages(channel_id, limit=100):
+    """get recent messages from a channel"""
+    headers = get_headers()
+    if not headers:
+        return []
+
+    try:
+        resp = requests.get(
+            f'{DISCORD_API}/channels/{channel_id}/messages',
+            headers=headers,
+            params={'limit': limit},
+            timeout=30
+        )
+        if resp.status_code == 200:
+            return resp.json()
+        return []
+    except Exception:
+        return []
+
+
+def get_user_info(user_id):
+    """get discord user info"""
+    headers = get_headers()
+    if not headers:
+        return None
+
+    try:
+        resp = requests.get(
+            f'{DISCORD_API}/users/{user_id}',
+            headers=headers,
+            timeout=30
+        )
+        if resp.status_code == 200:
+            return resp.json()
+        return None
+    except Exception:
+        return None
+
+
+def analyze_discord_user(user_data, messages=None):
+    """analyze a discord user for values alignment and lost signals"""
+    username = user_data.get('username', '')
+    display_name = user_data.get('global_name') or username
+    user_id = user_data.get('id')
+
+    # analyze messages
+    all_signals = []
+    all_text = []
+    total_score = 0
+
+    if messages:
+        for msg in messages[:20]:
+            content = msg.get('content', '')
+            if not content or len(content) < 20:
+                continue
+
+            all_text.append(content)
+            score, signals, _ = analyze_text(content)
+            all_signals.extend(signals)
+            total_score += score
+
+    all_signals = list(set(all_signals))
+
+    # lost builder detection
+    profile_for_lost = {
+        'bio': '',
+        'message_count': len(messages) if messages else 0,
+    }
+    posts_for_lost = [{'text': t} for t in all_text]
+
+    lost_signals, lost_weight = analyze_social_for_lost_signals(profile_for_lost, posts_for_lost)
+    lost_potential_score = lost_weight
+    user_type = classify_user(lost_potential_score, 50, total_score)
+
+    return {
+        'platform': 'discord',
+        'username': username,
+        'url': f"https://discord.com/users/{user_id}",
+        'name': display_name,
+        'bio': '',
+        'location': None,
+        'score': total_score,
+        'confidence': min(0.8, 0.2 + len(all_signals) * 0.1),
+        'signals': all_signals,
+        'negative_signals': [],
+        'reasons': [],
+        'contact': {'discord': f"{username}#{user_data.get('discriminator', '0')}"},
+        'extra': {
+            'user_id': user_id,
+            'message_count': len(messages) if messages else 0,
+        },
+        'lost_potential_score': lost_potential_score,
+        'lost_signals': lost_signals,
+        'user_type': user_type,
+    }
+
+
+def scrape_discord(db, limit_per_channel=50):
+    """scrape discord servers for aligned builders"""
+    if not DISCORD_BOT_TOKEN:
+        print("discord: DISCORD_BOT_TOKEN not set, skipping")
+        return 0
+
+    if not TARGET_SERVERS or TARGET_SERVERS == ['']:
+        print("discord: DISCORD_TARGET_SERVERS not set, skipping")
+        return 0
+
+    print("scouting discord...")
+
+    found = 0
+    lost_found = 0
+    seen_users = set()
+
+    for guild_id in TARGET_SERVERS:
+        if not guild_id:
+            continue
+
+        guild_id = guild_id.strip()
+        channels = get_guild_channels(guild_id)
+
+        if not channels:
+            print(f"  guild {guild_id}: no access or no channels")
+            continue
+
+        # filter to relevant channels
+        target_channels = []
+        for ch in channels:
+            if ch.get('type') != 0:  # text channels only
+                continue
+            name = ch.get('name', '').lower()
+            if any(kw in name for kw in TARGET_CHANNEL_KEYWORDS):
+                target_channels.append(ch)
+
+        print(f"  guild {guild_id}: {len(target_channels)} relevant channels")
+
+        for channel in target_channels[:5]:  # limit channels per server
+            messages = get_channel_messages(channel['id'], limit=limit_per_channel)
+
+            if not messages:
+                continue
+
+            # group messages by user
+            user_messages = {}
+            for msg in messages:
+                author = msg.get('author', {})
+                if author.get('bot'):
+                    continue
+
+                user_id = author.get('id')
+                if not user_id or user_id in seen_users:
+                    continue
+
+                if user_id not in user_messages:
+                    user_messages[user_id] = {'user': author, 'messages': []}
+                user_messages[user_id]['messages'].append(msg)
+
+            # analyze each user
+            for user_id, data in user_messages.items():
+                if user_id in seen_users:
+                    continue
+                seen_users.add(user_id)
+
+                result = analyze_discord_user(data['user'], data['messages'])
+                if not result:
+                    continue
+
+                if result['score'] >= 20 or result.get('lost_potential_score', 0) >= 30:
+                    db.save_human(result)
+                    found += 1
+
+                    if result.get('user_type') in ['lost', 'both']:
+                        lost_found += 1
+
+            time.sleep(1)  # rate limit between channels
+
+        time.sleep(2)  # between guilds
+
+    print(f"discord: found {found} humans ({lost_found} lost builders)")
+    return found
+
+
+def send_discord_dm(user_id, message, dry_run=False):
+    """send a DM to a discord user"""
+    if not DISCORD_BOT_TOKEN:
+        return False, "DISCORD_BOT_TOKEN not set"
+
+    if dry_run:
+        print(f"  [dry run] would DM discord user {user_id}")
+        return True, "dry run"
+
+    headers = get_headers()
+
+    try:
+        # create DM channel
+        dm_resp = requests.post(
+            f'{DISCORD_API}/users/@me/channels',
+            headers=headers,
+            json={'recipient_id': user_id},
+            timeout=30
+        )
+
+        if dm_resp.status_code not in [200, 201]:
+            return False, f"couldn't create DM channel: {dm_resp.status_code}"
+
+        channel_id = dm_resp.json().get('id')
+
+        # send message
+        msg_resp = requests.post(
+            f'{DISCORD_API}/channels/{channel_id}/messages',
+            headers=headers,
+            json={'content': message},
+            timeout=30
+        )
+
+        if msg_resp.status_code in [200, 201]:
+            return True, f"sent to {user_id}"
+        else:
+            return False, f"send failed: {msg_resp.status_code}"
+
+    except Exception as e:
+        return False, str(e)
--- a/connectd/scoutd/github.py
+++ b/connectd/scoutd/github.py
@ -0,0 +1,330 @@
+"""
+scoutd/github.py - github discovery
+scrapes repos, bios, commit patterns to find aligned builders
+also detects lost builders - people with potential who haven't started yet
+"""
+
+import requests
+import json
+import time
+import os
+from datetime import datetime
+from pathlib import Path
+from collections import defaultdict
+
+from .signals import analyze_text, TARGET_TOPICS, ECOSYSTEM_REPOS
+from .lost import (
+    analyze_github_for_lost_signals,
+    analyze_text_for_lost_signals,
+    classify_user,
+    get_signal_descriptions,
+)
+from .handles import discover_all_handles
+
+# rate limit: 60/hr unauthenticated, 5000/hr with token
+GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
+HEADERS = {'Accept': 'application/vnd.github.v3+json'}
+if GITHUB_TOKEN:
+    HEADERS['Authorization'] = f'token {GITHUB_TOKEN}'
+
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'github'
+
+
+def _api_get(url, params=None):
+    """rate-limited api request with caching"""
+    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
+    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    # check cache (1 hour expiry)
+    if cache_file.exists():
+        try:
+            data = json.loads(cache_file.read_text())
+            if time.time() - data.get('_cached_at', 0) < 3600:
+                return data.get('_data')
+        except:
+            pass
+
+    # rate limit
+    time.sleep(0.5 if GITHUB_TOKEN else 2)
+
+    try:
+        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp.raise_for_status()
+        result = resp.json()
+
+        # cache
+        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
+        return result
+    except requests.exceptions.RequestException as e:
+        print(f"  github api error: {e}")
+        return None
+
+
+def search_repos_by_topic(topic, per_page=100):
+    """search repos by topic tag"""
+    url = 'https://api.github.com/search/repositories'
+    params = {'q': f'topic:{topic}', 'sort': 'stars', 'order': 'desc', 'per_page': per_page}
+    data = _api_get(url, params)
+    return data.get('items', []) if data else []
+
+
+def get_repo_contributors(repo_full_name, per_page=100):
+    """get top contributors to a repo"""
+    url = f'https://api.github.com/repos/{repo_full_name}/contributors'
+    return _api_get(url, {'per_page': per_page}) or []
+
+
+def get_github_user(login):
+    """get full user profile"""
+    url = f'https://api.github.com/users/{login}'
+    return _api_get(url)
+
+
+def get_user_repos(login, per_page=100):
+    """get user's repos"""
+    url = f'https://api.github.com/users/{login}/repos'
+    return _api_get(url, {'per_page': per_page, 'sort': 'pushed'}) or []
+
+
+def analyze_github_user(login):
+    """
+    analyze a github user for values alignment
+    returns dict with score, confidence, signals, contact info
+    """
+    user = get_github_user(login)
+    if not user:
+        return None
+
+    repos = get_user_repos(login)
+
+    # collect text corpus
+    text_parts = []
+    if user.get('bio'):
+        text_parts.append(user['bio'])
+    if user.get('company'):
+        text_parts.append(user['company'])
+    if user.get('location'):
+        text_parts.append(user['location'])
+
+    # analyze repos
+    all_topics = []
+    languages = defaultdict(int)
+    total_stars = 0
+
+    for repo in repos:
+        if repo.get('description'):
+            text_parts.append(repo['description'])
+        if repo.get('topics'):
+            all_topics.extend(repo['topics'])
+        if repo.get('language'):
+            languages[repo['language']] += 1
+        total_stars += repo.get('stargazers_count', 0)
+
+    full_text = ' '.join(text_parts)
+
+    # analyze signals
+    text_score, positive_signals, negative_signals = analyze_text(full_text)
+
+    # topic alignment
+    aligned_topics = set(all_topics) & set(TARGET_TOPICS)
+    topic_score = len(aligned_topics) * 10
+
+    # builder score (repos indicate building, not just talking)
+    builder_score = 0
+    if len(repos) > 20:
+        builder_score = 15
+    elif len(repos) > 10:
+        builder_score = 10
+    elif len(repos) > 5:
+        builder_score = 5
+
+    # hireable bonus
+    hireable_score = 5 if user.get('hireable') else 0
+
+    # total score
+    total_score = text_score + topic_score + builder_score + hireable_score
+
+    # === LOST BUILDER DETECTION ===
+    # build profile dict for lost analysis
+    profile_for_lost = {
+        'bio': user.get('bio'),
+        'repos': repos,
+        'public_repos': user.get('public_repos', len(repos)),
+        'followers': user.get('followers', 0),
+        'following': user.get('following', 0),
+        'extra': {
+            'top_repos': repos[:10],
+        },
+    }
+
+    # analyze for lost signals
+    lost_signals, lost_weight = analyze_github_for_lost_signals(profile_for_lost)
+
+    # also check text for lost language patterns
+    text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
+    for sig in text_lost_signals:
+        if sig not in lost_signals:
+            lost_signals.append(sig)
+            lost_weight += text_lost_weight
+
+    lost_potential_score = lost_weight
+
+    # classify: builder, lost, both, or none
+    user_type = classify_user(lost_potential_score, builder_score, total_score)
+
+    # confidence based on data richness
+    confidence = 0.3
+    if user.get('bio'):
+        confidence += 0.15
+    if len(repos) > 5:
+        confidence += 0.15
+    if len(text_parts) > 5:
+        confidence += 0.15
+    if user.get('email') or user.get('blog') or user.get('twitter_username'):
+        confidence += 0.15
+    if total_stars > 100:
+        confidence += 0.1
+    confidence = min(confidence, 1.0)
+
+    # build reasons
+    reasons = []
+    if positive_signals:
+        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
+    if aligned_topics:
+        reasons.append(f"topics: {', '.join(list(aligned_topics)[:5])}")
+    if builder_score > 0:
+        reasons.append(f"builder ({len(repos)} repos)")
+    if negative_signals:
+        reasons.append(f"WARNING: {', '.join(negative_signals)}")
+
+    # add lost reasons if applicable
+    if user_type == 'lost' or user_type == 'both':
+        lost_descriptions = get_signal_descriptions(lost_signals)
+        if lost_descriptions:
+            reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
+
+    # === DEEP HANDLE DISCOVERY ===
+    # follow blog links, scrape websites, find ALL social handles
+    handles, discovered_emails = discover_all_handles(user)
+
+    # merge discovered emails with github email
+    all_emails = discovered_emails or []
+    if user.get('email'):
+        all_emails.append(user['email'])
+    all_emails = list(set(e for e in all_emails if e and 'noreply' not in e.lower()))
+
+    return {
+        'platform': 'github',
+        'username': login,
+        'url': f"https://github.com/{login}",
+        'name': user.get('name'),
+        'bio': user.get('bio'),
+        'location': user.get('location'),
+        'score': total_score,
+        'confidence': confidence,
+        'signals': positive_signals,
+        'negative_signals': negative_signals,
+        'topics': list(aligned_topics),
+        'languages': dict(languages),
+        'repo_count': len(repos),
+        'total_stars': total_stars,
+        'reasons': reasons,
+        'contact': {
+            'email': all_emails[0] if all_emails else None,
+            'emails': all_emails,
+            'blog': user.get('blog'),
+            'twitter': user.get('twitter_username') or handles.get('twitter'),
+            'mastodon': handles.get('mastodon'),
+            'bluesky': handles.get('bluesky'),
+            'matrix': handles.get('matrix'),
+            'lemmy': handles.get('lemmy'),
+        },
+        'extra': {
+            'topics': list(aligned_topics),
+            'languages': dict(languages),
+            'repo_count': len(repos),
+            'total_stars': total_stars,
+            'hireable': user.get('hireable', False),
+            'handles': handles,  # all discovered handles
+        },
+        'hireable': user.get('hireable', False),
+        'scraped_at': datetime.now().isoformat(),
+        # lost builder fields
+        'lost_potential_score': lost_potential_score,
+        'lost_signals': lost_signals,
+        'user_type': user_type,  # 'builder', 'lost', 'both', 'none'
+    }
+
+
+def scrape_github(db, limit_per_source=50):
+    """
+    full github scrape
+    returns list of analyzed users
+    """
+    print("scoutd/github: starting scrape...")
+
+    all_logins = set()
+
+    # 1. ecosystem repo contributors
+    print("  scraping ecosystem repo contributors...")
+    for repo in ECOSYSTEM_REPOS:
+        contributors = get_repo_contributors(repo, per_page=limit_per_source)
+        for c in contributors:
+            login = c.get('login')
+            if login and not login.endswith('[bot]'):
+                all_logins.add(login)
+        print(f"    {repo}: {len(contributors)} contributors")
+
+    # 2. topic repos
+    print("  scraping topic repos...")
+    for topic in TARGET_TOPICS[:10]:
+        repos = search_repos_by_topic(topic, per_page=30)
+        for repo in repos:
+            owner = repo.get('owner', {}).get('login')
+            if owner and not owner.endswith('[bot]'):
+                all_logins.add(owner)
+        print(f"    #{topic}: {len(repos)} repos")
+
+    print(f"  found {len(all_logins)} unique users to analyze")
+
+    # analyze each
+    results = []
+    builders_found = 0
+    lost_found = 0
+
+    for i, login in enumerate(all_logins):
+        if i % 20 == 0:
+            print(f"  analyzing... {i}/{len(all_logins)}")
+
+        try:
+            result = analyze_github_user(login)
+            if result and result['score'] > 0:
+                results.append(result)
+                db.save_human(result)
+
+                user_type = result.get('user_type', 'none')
+
+                if user_type == 'builder':
+                    builders_found += 1
+                    if result['score'] >= 50:
+                        print(f"    ★ {login}: {result['score']} pts, {result['confidence']:.0%} conf")
+
+                elif user_type == 'lost':
+                    lost_found += 1
+                    lost_score = result.get('lost_potential_score', 0)
+                    if lost_score >= 40:
+                        print(f"    💔 {login}: lost_score={lost_score}, values={result['score']} pts")
+
+                elif user_type == 'both':
+                    builders_found += 1
+                    lost_found += 1
+                    print(f"    ⚡ {login}: recovering builder (lost={result.get('lost_potential_score', 0)}, active={result['score']})")
+
+        except Exception as e:
+            print(f"    error on {login}: {e}")
+
+    print(f"scoutd/github: found {len(results)} aligned humans")
+    print(f"  - {builders_found} active builders")
+    print(f"  - {lost_found} lost builders (need encouragement)")
+    return results
--- a/connectd/scoutd/handles.py
+++ b/connectd/scoutd/handles.py
@ -0,0 +1,507 @@
+"""
+scoutd/handles.py - comprehensive social handle discovery
+
+finds ALL social handles from:
+- github bio/profile
+- personal websites (rel="me", footers, contact pages, json-ld)
+- README files
+- linktree/bio.link/carrd pages
+- any linked pages
+
+stores structured handle data for activity-based contact selection
+"""
+
+import re
+import json
+import requests
+from urllib.parse import urlparse, urljoin
+from bs4 import BeautifulSoup
+
+HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; connectd/1.0)'}
+
+# platform URL patterns -> (platform, handle_extractor)
+PLATFORM_PATTERNS = {
+    # fediverse
+    'mastodon': [
+        (r'https?://([^/]+)/@([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
+        (r'https?://([^/]+)/users/([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
+        (r'https?://mastodon\.social/@([^/?#]+)', lambda m: f"@{m.group(1)}@mastodon.social"),
+    ],
+    'pixelfed': [
+        (r'https?://pixelfed\.social/@([^/?#]+)', lambda m: f"@{m.group(1)}@pixelfed.social"),
+        (r'https?://([^/]*pixelfed[^/]*)/@([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
+    ],
+    'lemmy': [
+        (r'https?://([^/]+)/u/([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
+        (r'https?://lemmy\.([^/]+)/u/([^/?#]+)', lambda m: f"@{m.group(2)}@lemmy.{m.group(1)}"),
+    ],
+
+    # mainstream
+    'twitter': [
+        (r'https?://(?:www\.)?(?:twitter|x)\.com/([^/?#]+)', lambda m: f"@{m.group(1)}"),
+    ],
+    'bluesky': [
+        (r'https?://bsky\.app/profile/([^/?#]+)', lambda m: m.group(1)),
+        (r'https?://([^.]+)\.bsky\.social', lambda m: f"{m.group(1)}.bsky.social"),
+    ],
+    'threads': [
+        (r'https?://(?:www\.)?threads\.net/@([^/?#]+)', lambda m: f"@{m.group(1)}"),
+    ],
+    'instagram': [
+        (r'https?://(?:www\.)?instagram\.com/([^/?#]+)', lambda m: f"@{m.group(1)}"),
+    ],
+    'facebook': [
+        (r'https?://(?:www\.)?facebook\.com/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'linkedin': [
+        (r'https?://(?:www\.)?linkedin\.com/in/([^/?#]+)', lambda m: m.group(1)),
+        (r'https?://(?:www\.)?linkedin\.com/company/([^/?#]+)', lambda m: f"company/{m.group(1)}"),
+    ],
+
+    # dev platforms
+    'github': [
+        (r'https?://(?:www\.)?github\.com/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'gitlab': [
+        (r'https?://(?:www\.)?gitlab\.com/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'codeberg': [
+        (r'https?://codeberg\.org/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'sourcehut': [
+        (r'https?://sr\.ht/~([^/?#]+)', lambda m: f"~{m.group(1)}"),
+        (r'https?://git\.sr\.ht/~([^/?#]+)', lambda m: f"~{m.group(1)}"),
+    ],
+
+    # chat
+    'matrix': [
+        (r'https?://matrix\.to/#/(@[^:]+:[^/?#]+)', lambda m: m.group(1)),
+    ],
+    'discord': [
+        (r'https?://discord\.gg/([^/?#]+)', lambda m: f"invite/{m.group(1)}"),
+        (r'https?://discord\.com/invite/([^/?#]+)', lambda m: f"invite/{m.group(1)}"),
+    ],
+    'telegram': [
+        (r'https?://t\.me/([^/?#]+)', lambda m: f"@{m.group(1)}"),
+    ],
+
+    # content
+    'youtube': [
+        (r'https?://(?:www\.)?youtube\.com/@([^/?#]+)', lambda m: f"@{m.group(1)}"),
+        (r'https?://(?:www\.)?youtube\.com/c(?:hannel)?/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'twitch': [
+        (r'https?://(?:www\.)?twitch\.tv/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'substack': [
+        (r'https?://([^.]+)\.substack\.com', lambda m: m.group(1)),
+    ],
+    'medium': [
+        (r'https?://(?:www\.)?medium\.com/@([^/?#]+)', lambda m: f"@{m.group(1)}"),
+        (r'https?://([^.]+)\.medium\.com', lambda m: m.group(1)),
+    ],
+    'devto': [
+        (r'https?://dev\.to/([^/?#]+)', lambda m: m.group(1)),
+    ],
+
+    # funding
+    'kofi': [
+        (r'https?://ko-fi\.com/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'patreon': [
+        (r'https?://(?:www\.)?patreon\.com/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'liberapay': [
+        (r'https?://liberapay\.com/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'github_sponsors': [
+        (r'https?://github\.com/sponsors/([^/?#]+)', lambda m: m.group(1)),
+    ],
+
+    # link aggregators (we'll parse these specially)
+    'linktree': [
+        (r'https?://linktr\.ee/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'biolink': [
+        (r'https?://bio\.link/([^/?#]+)', lambda m: m.group(1)),
+    ],
+    'carrd': [
+        (r'https?://([^.]+)\.carrd\.co', lambda m: m.group(1)),
+    ],
+}
+
+# fediverse handle pattern: @user@instance
+FEDIVERSE_HANDLE_PATTERN = re.compile(r'@([\w.-]+)@([\w.-]+\.[\w]+)')
+
+# email pattern
+EMAIL_PATTERN = re.compile(r'\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b')
+
+# known fediverse instances (for context-free handle detection)
+KNOWN_FEDIVERSE_INSTANCES = [
+    'mastodon.social', 'mastodon.online', 'mstdn.social', 'mas.to',
+    'tech.lgbt', 'fosstodon.org', 'hackers.town', 'social.coop',
+    'kolektiva.social', 'solarpunk.moe', 'wandering.shop',
+    'elekk.xyz', 'cybre.space', 'octodon.social', 'chaos.social',
+    'infosec.exchange', 'ruby.social', 'phpc.social', 'toot.cafe',
+    'mstdn.io', 'pixelfed.social', 'lemmy.ml', 'lemmy.world',
+    'kbin.social', 'pleroma.site', 'akkoma.dev',
+]
+
+
+def extract_handle_from_url(url):
+    """extract platform and handle from a URL"""
+    for platform, patterns in PLATFORM_PATTERNS.items():
+        for pattern, extractor in patterns:
+            match = re.match(pattern, url, re.I)
+            if match:
+                return platform, extractor(match)
+    return None, None
+
+
+def extract_fediverse_handles(text):
+    """find @user@instance.tld patterns in text"""
+    handles = []
+    for match in FEDIVERSE_HANDLE_PATTERN.finditer(text):
+        user, instance = match.groups()
+        handles.append(f"@{user}@{instance}")
+    return handles
+
+
+def extract_emails(text):
+    """find email addresses in text"""
+    emails = []
+    for match in EMAIL_PATTERN.finditer(text):
+        email = match.group(1)
+        # filter out common non-personal emails
+        if not any(x in email.lower() for x in ['noreply', 'no-reply', 'donotreply', 'example.com']):
+            emails.append(email)
+    return emails
+
+
+def scrape_page(url, timeout=15):
+    """fetch and parse a web page"""
+    try:
+        resp = requests.get(url, headers=HEADERS, timeout=timeout, allow_redirects=True)
+        resp.raise_for_status()
+        return BeautifulSoup(resp.text, 'html.parser'), resp.text
+    except Exception as e:
+        return None, None
+
+
+def extract_rel_me_links(soup):
+    """extract rel="me" links (used for verification)"""
+    links = []
+    if not soup:
+        return links
+
+    for a in soup.find_all('a', rel=lambda x: x and 'me' in x):
+        href = a.get('href')
+        if href:
+            links.append(href)
+
+    return links
+
+
+def extract_social_links_from_page(soup, base_url=None):
+    """extract all social links from a page"""
+    links = []
+    if not soup:
+        return links
+
+    # all links
+    for a in soup.find_all('a', href=True):
+        href = a['href']
+        if base_url and not href.startswith('http'):
+            href = urljoin(base_url, href)
+
+        # check if it's a known social platform
+        platform, handle = extract_handle_from_url(href)
+        if platform:
+            links.append({'platform': platform, 'handle': handle, 'url': href})
+
+    return links
+
+
+def extract_json_ld(soup):
+    """extract structured data from JSON-LD"""
+    data = {}
+    if not soup:
+        return data
+
+    for script in soup.find_all('script', type='application/ld+json'):
+        try:
+            ld = json.loads(script.string)
+            # look for sameAs links (social profiles)
+            if isinstance(ld, dict):
+                same_as = ld.get('sameAs', [])
+                if isinstance(same_as, str):
+                    same_as = [same_as]
+                for url in same_as:
+                    platform, handle = extract_handle_from_url(url)
+                    if platform:
+                        data[platform] = handle
+        except:
+            pass
+
+    return data
+
+
+def scrape_linktree(url):
+    """scrape a linktree/bio.link/carrd page for all links"""
+    handles = {}
+    soup, raw = scrape_page(url)
+    if not soup:
+        return handles
+
+    # linktree uses data attributes and JS, but links are often in the HTML
+    links = extract_social_links_from_page(soup, url)
+    for link in links:
+        if link['platform'] not in ['linktree', 'biolink', 'carrd']:
+            handles[link['platform']] = link['handle']
+
+    # also check for fediverse handles in text
+    if raw:
+        fedi_handles = extract_fediverse_handles(raw)
+        if fedi_handles:
+            handles['mastodon'] = fedi_handles[0]
+
+    return handles
+
+
+def scrape_website_for_handles(url, follow_links=True):
+    """
+    comprehensive website scrape for social handles
+
+    checks:
+    - rel="me" links
+    - social links in page
+    - json-ld structured data
+    - /about and /contact pages
+    - fediverse handles in text
+    - emails
+    """
+    handles = {}
+    emails = []
+
+    soup, raw = scrape_page(url)
+    if not soup:
+        return handles, emails
+
+    # 1. rel="me" links (most authoritative)
+    rel_me = extract_rel_me_links(soup)
+    for link in rel_me:
+        platform, handle = extract_handle_from_url(link)
+        if platform and platform not in handles:
+            handles[platform] = handle
+
+    # 2. all social links on page
+    social_links = extract_social_links_from_page(soup, url)
+    for link in social_links:
+        if link['platform'] not in handles:
+            handles[link['platform']] = link['handle']
+
+    # 3. json-ld structured data
+    json_ld = extract_json_ld(soup)
+    for platform, handle in json_ld.items():
+        if platform not in handles:
+            handles[platform] = handle
+
+    # 4. fediverse handles in text
+    if raw:
+        fedi = extract_fediverse_handles(raw)
+        if fedi and 'mastodon' not in handles:
+            handles['mastodon'] = fedi[0]
+
+        # emails
+        emails = extract_emails(raw)
+
+    # 5. follow links to /about, /contact
+    if follow_links:
+        parsed = urlparse(url)
+        base = f"{parsed.scheme}://{parsed.netloc}"
+
+        for path in ['/about', '/contact', '/links', '/social']:
+            try:
+                sub_soup, sub_raw = scrape_page(base + path)
+                if sub_soup:
+                    sub_links = extract_social_links_from_page(sub_soup, base)
+                    for link in sub_links:
+                        if link['platform'] not in handles:
+                            handles[link['platform']] = link['handle']
+
+                    if sub_raw:
+                        fedi = extract_fediverse_handles(sub_raw)
+                        if fedi and 'mastodon' not in handles:
+                            handles['mastodon'] = fedi[0]
+
+                        emails.extend(extract_emails(sub_raw))
+            except:
+                pass
+
+    # 6. check for linktree etc in links and follow them
+    for platform in ['linktree', 'biolink', 'carrd']:
+        if platform in handles:
+            # this is actually a link aggregator, scrape it
+            link_url = None
+            for link in social_links:
+                if link['platform'] == platform:
+                    link_url = link['url']
+                    break
+
+            if link_url:
+                aggregator_handles = scrape_linktree(link_url)
+                for p, h in aggregator_handles.items():
+                    if p not in handles:
+                        handles[p] = h
+
+            del handles[platform]  # remove the aggregator itself
+
+    return handles, list(set(emails))
+
+
+def extract_handles_from_text(text):
+    """extract handles from plain text (bio, README, etc)"""
+    handles = {}
+
+    if not text:
+        return handles
+
+    # fediverse handles
+    fedi = extract_fediverse_handles(text)
+    if fedi:
+        handles['mastodon'] = fedi[0]
+
+    # URL patterns in text
+    url_pattern = re.compile(r'https?://[^\s<>"\']+')
+    for match in url_pattern.finditer(text):
+        url = match.group(0).rstrip('.,;:!?)')
+        platform, handle = extract_handle_from_url(url)
+        if platform and platform not in handles:
+            handles[platform] = handle
+
+    # twitter-style @mentions (only if looks like twitter context)
+    if 'twitter' in text.lower() or 'x.com' in text.lower():
+        twitter_pattern = re.compile(r'(?:^|[^\w])@(\w{1,15})(?:[^\w]|$)')
+        for match in twitter_pattern.finditer(text):
+            if 'twitter' not in handles:
+                handles['twitter'] = f"@{match.group(1)}"
+
+    # matrix handles
+    matrix_pattern = re.compile(r'@([\w.-]+):([\w.-]+)')
+    for match in matrix_pattern.finditer(text):
+        if 'matrix' not in handles:
+            handles['matrix'] = f"@{match.group(1)}:{match.group(2)}"
+
+    return handles
+
+
+def scrape_github_readme(username):
+    """scrape user's profile README (username/username repo)"""
+    handles = {}
+    emails = []
+
+    url = f"https://raw.githubusercontent.com/{username}/{username}/main/README.md"
+    try:
+        resp = requests.get(url, headers=HEADERS, timeout=10)
+        if resp.status_code == 200:
+            text = resp.text
+
+            # extract handles from text
+            handles = extract_handles_from_text(text)
+
+            # extract emails
+            emails = extract_emails(text)
+
+            return handles, emails
+    except:
+        pass
+
+    # try master branch
+    url = f"https://raw.githubusercontent.com/{username}/{username}/master/README.md"
+    try:
+        resp = requests.get(url, headers=HEADERS, timeout=10)
+        if resp.status_code == 200:
+            text = resp.text
+            handles = extract_handles_from_text(text)
+            emails = extract_emails(text)
+    except:
+        pass
+
+    return handles, emails
+
+
+def discover_all_handles(github_profile):
+    """
+    comprehensive handle discovery from a github profile dict
+
+    github_profile should contain:
+    - username
+    - bio
+    - blog (website URL)
+    - twitter_username
+    - etc.
+    """
+    handles = {}
+    emails = []
+
+    username = github_profile.get('login') or github_profile.get('username')
+
+    print(f"    discovering handles for {username}...")
+
+    # 1. github bio
+    bio = github_profile.get('bio', '')
+    if bio:
+        bio_handles = extract_handles_from_text(bio)
+        handles.update(bio_handles)
+        emails.extend(extract_emails(bio))
+
+    # 2. twitter from github profile
+    twitter = github_profile.get('twitter_username')
+    if twitter and 'twitter' not in handles:
+        handles['twitter'] = f"@{twitter}"
+
+    # 3. website from github profile
+    website = github_profile.get('blog')
+    if website:
+        if not website.startswith('http'):
+            website = f"https://{website}"
+
+        print(f"      scraping website: {website}")
+        site_handles, site_emails = scrape_website_for_handles(website)
+        for p, h in site_handles.items():
+            if p not in handles:
+                handles[p] = h
+        emails.extend(site_emails)
+
+    # 4. profile README
+    if username:
+        print(f"      checking profile README...")
+        readme_handles, readme_emails = scrape_github_readme(username)
+        for p, h in readme_handles.items():
+            if p not in handles:
+                handles[p] = h
+        emails.extend(readme_emails)
+
+    # 5. email from github profile
+    github_email = github_profile.get('email')
+    if github_email:
+        emails.append(github_email)
+
+    # dedupe emails
+    emails = list(set(e for e in emails if e and '@' in e and 'noreply' not in e.lower()))
+
+    print(f"      found {len(handles)} handles, {len(emails)} emails")
+
+    return handles, emails
+
+
+def merge_handles(existing, new):
+    """merge new handles into existing, preferring more specific handles"""
+    for platform, handle in new.items():
+        if platform not in existing:
+            existing[platform] = handle
+        elif len(handle) > len(existing[platform]):
+            # prefer longer/more specific handles
+            existing[platform] = handle
+
+    return existing
--- a/connectd/scoutd/lemmy.py
+++ b/connectd/scoutd/lemmy.py
@ -0,0 +1,322 @@
+"""
+scoutd/lemmy.py - lemmy (fediverse reddit) discovery
+
+lemmy is federated so we hit multiple instances.
+great for finding lost builders in communities like:
+- /c/programming, /c/technology, /c/linux
+- /c/antiwork, /c/workreform (lost builders!)
+- /c/selfhosted, /c/privacy, /c/opensource
+
+supports authenticated access for private instances and DM delivery.
+"""
+
+import requests
+import json
+import time
+import os
+from datetime import datetime
+from pathlib import Path
+
+from .signals import analyze_text
+from .lost import (
+    analyze_social_for_lost_signals,
+    analyze_text_for_lost_signals,
+    classify_user,
+)
+
+# auth config from environment
+LEMMY_INSTANCE = os.environ.get('LEMMY_INSTANCE', '')
+LEMMY_USERNAME = os.environ.get('LEMMY_USERNAME', '')
+LEMMY_PASSWORD = os.environ.get('LEMMY_PASSWORD', '')
+
+# auth token cache
+_auth_token = None
+
+# popular lemmy instances
+LEMMY_INSTANCES = [
+    'lemmy.ml',
+    'lemmy.world',
+    'programming.dev',
+    'lemm.ee',
+    'sh.itjust.works',
+]
+
+# communities to scout (format: community@instance or just community for local)
+TARGET_COMMUNITIES = [
+    # builder communities
+    'programming',
+    'selfhosted',
+    'linux',
+    'opensource',
+    'privacy',
+    'technology',
+    'webdev',
+    'rust',
+    'python',
+    'golang',
+
+    # lost builder communities (people struggling, stuck, seeking)
+    'antiwork',
+    'workreform',
+    'careerguidance',
+    'cscareerquestions',
+    'learnprogramming',
+    'findapath',
+]
+
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'lemmy'
+CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def get_auth_token(instance=None):
+    """get auth token for lemmy instance"""
+    global _auth_token
+
+    if _auth_token:
+        return _auth_token
+
+    instance = instance or LEMMY_INSTANCE
+    if not all([instance, LEMMY_USERNAME, LEMMY_PASSWORD]):
+        return None
+
+    try:
+        url = f"https://{instance}/api/v3/user/login"
+        resp = requests.post(url, json={
+            'username_or_email': LEMMY_USERNAME,
+            'password': LEMMY_PASSWORD,
+        }, timeout=30)
+
+        if resp.status_code == 200:
+            _auth_token = resp.json().get('jwt')
+            return _auth_token
+        return None
+    except Exception as e:
+        print(f"lemmy auth error: {e}")
+        return None
+
+
+def send_lemmy_dm(recipient_username, message, dry_run=False):
+    """send a private message via lemmy"""
+    if not LEMMY_INSTANCE:
+        return False, "LEMMY_INSTANCE not configured"
+
+    if dry_run:
+        print(f"[dry run] would send lemmy DM to {recipient_username}")
+        return True, None
+
+    token = get_auth_token()
+    if not token:
+        return False, "failed to authenticate with lemmy"
+
+    try:
+        # parse recipient - could be username@instance or just username
+        if '@' in recipient_username:
+            username, instance = recipient_username.split('@', 1)
+        else:
+            username = recipient_username
+            instance = LEMMY_INSTANCE
+
+        # get recipient user id
+        user_url = f"https://{LEMMY_INSTANCE}/api/v3/user"
+        resp = requests.get(user_url, params={'username': f"{username}@{instance}"}, timeout=30)
+
+        if resp.status_code != 200:
+            # try without instance suffix for local users
+            resp = requests.get(user_url, params={'username': username}, timeout=30)
+
+        if resp.status_code != 200:
+            return False, f"could not find user {recipient_username}"
+
+        recipient_id = resp.json().get('person_view', {}).get('person', {}).get('id')
+        if not recipient_id:
+            return False, "could not get recipient id"
+
+        # send DM
+        dm_url = f"https://{LEMMY_INSTANCE}/api/v3/private_message"
+        resp = requests.post(dm_url,
+            headers={'Authorization': f'Bearer {token}'},
+            json={
+                'content': message,
+                'recipient_id': recipient_id,
+            },
+            timeout=30
+        )
+
+        if resp.status_code == 200:
+            return True, None
+        else:
+            return False, f"lemmy DM error: {resp.status_code} - {resp.text}"
+
+    except Exception as e:
+        return False, f"lemmy DM error: {str(e)}"
+
+
+def get_community_posts(instance, community, limit=50, sort='New'):
+    """get posts from a lemmy community"""
+    try:
+        url = f"https://{instance}/api/v3/post/list"
+        params = {
+            'community_name': community,
+            'sort': sort,
+            'limit': limit,
+        }
+
+        resp = requests.get(url, params=params, timeout=30)
+        if resp.status_code == 200:
+            return resp.json().get('posts', [])
+        return []
+    except Exception as e:
+        return []
+
+
+def get_user_profile(instance, username):
+    """get lemmy user profile"""
+    try:
+        url = f"https://{instance}/api/v3/user"
+        params = {'username': username}
+
+        resp = requests.get(url, params=params, timeout=30)
+        if resp.status_code == 200:
+            return resp.json()
+        return None
+    except Exception:
+        return None
+
+
+def analyze_lemmy_user(instance, username, posts=None):
+    """analyze a lemmy user for values alignment and lost signals"""
+    profile = get_user_profile(instance, username)
+    if not profile:
+        return None
+
+    person = profile.get('person_view', {}).get('person', {})
+    counts = profile.get('person_view', {}).get('counts', {})
+
+    bio = person.get('bio', '') or ''
+    display_name = person.get('display_name') or person.get('name', username)
+
+    # analyze bio
+    bio_score, bio_signals, bio_reasons = analyze_text(bio)
+
+    # analyze posts if provided
+    post_signals = []
+    post_text = []
+    if posts:
+        for post in posts[:10]:
+            post_data = post.get('post', {})
+            title = post_data.get('name', '')
+            body = post_data.get('body', '')
+            post_text.append(f"{title} {body}")
+
+            _, signals, _ = analyze_text(f"{title} {body}")
+            post_signals.extend(signals)
+
+    all_signals = list(set(bio_signals + post_signals))
+    total_score = bio_score + len(post_signals) * 5
+
+    # lost builder detection
+    profile_for_lost = {
+        'bio': bio,
+        'post_count': counts.get('post_count', 0),
+        'comment_count': counts.get('comment_count', 0),
+    }
+    posts_for_lost = [{'text': t} for t in post_text]
+
+    lost_signals, lost_weight = analyze_social_for_lost_signals(profile_for_lost, posts_for_lost)
+    lost_potential_score = lost_weight
+    user_type = classify_user(lost_potential_score, 50, total_score)
+
+    return {
+        'platform': 'lemmy',
+        'username': f"{username}@{instance}",
+        'url': f"https://{instance}/u/{username}",
+        'name': display_name,
+        'bio': bio,
+        'location': None,
+        'score': total_score,
+        'confidence': min(0.9, 0.3 + len(all_signals) * 0.1),
+        'signals': all_signals,
+        'negative_signals': [],
+        'reasons': bio_reasons,
+        'contact': {},
+        'extra': {
+            'instance': instance,
+            'post_count': counts.get('post_count', 0),
+            'comment_count': counts.get('comment_count', 0),
+        },
+        'lost_potential_score': lost_potential_score,
+        'lost_signals': lost_signals,
+        'user_type': user_type,
+    }
+
+
+def scrape_lemmy(db, limit_per_community=30):
+    """scrape lemmy instances for aligned builders"""
+    print("scouting lemmy...")
+
+    found = 0
+    lost_found = 0
+    seen_users = set()
+
+    # build instance list - user's instance first if configured
+    instances = list(LEMMY_INSTANCES)
+    if LEMMY_INSTANCE and LEMMY_INSTANCE not in instances:
+        instances.insert(0, LEMMY_INSTANCE)
+
+    for instance in instances:
+        print(f"  instance: {instance}")
+
+        for community in TARGET_COMMUNITIES:
+            posts = get_community_posts(instance, community, limit=limit_per_community)
+
+            if not posts:
+                continue
+
+            print(f"    /c/{community}: {len(posts)} posts")
+
+            # group posts by user
+            user_posts = {}
+            for post in posts:
+                creator = post.get('creator', {})
+                username = creator.get('name')
+                if not username:
+                    continue
+
+                user_key = f"{username}@{instance}"
+                if user_key in seen_users:
+                    continue
+
+                if user_key not in user_posts:
+                    user_posts[user_key] = []
+                user_posts[user_key].append(post)
+
+            # analyze each user
+            for user_key, posts in user_posts.items():
+                username = user_key.split('@')[0]
+
+                if user_key in seen_users:
+                    continue
+                seen_users.add(user_key)
+
+                result = analyze_lemmy_user(instance, username, posts)
+                if not result:
+                    continue
+
+                if result['score'] >= 20 or result.get('lost_potential_score', 0) >= 30:
+                    db.save_human(result)
+                    found += 1
+
+                    if result.get('user_type') in ['lost', 'both']:
+                        lost_found += 1
+                        print(f"      {result['username']}: {result['score']:.0f} (lost: {result['lost_potential_score']:.0f})")
+                    elif result['score'] >= 40:
+                        print(f"      {result['username']}: {result['score']:.0f}")
+
+                time.sleep(0.5)  # rate limit
+
+            time.sleep(1)  # between communities
+
+        time.sleep(2)  # between instances
+
+    print(f"lemmy: found {found} humans ({lost_found} lost builders)")
+    return found
--- a/connectd/scoutd/lobsters.py
+++ b/connectd/scoutd/lobsters.py
@ -0,0 +1,169 @@
+"""
+scoutd/lobsters.py - lobste.rs discovery
+high-signal invite-only tech community
+"""
+
+import requests
+import json
+import time
+from datetime import datetime
+from pathlib import Path
+
+from .signals import analyze_text
+
+HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'lobsters'
+
+ALIGNED_TAGS = ['privacy', 'security', 'distributed', 'rust', 'linux', 'culture', 'practices']
+
+
+def _api_get(url, params=None):
+    """rate-limited request"""
+    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
+    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    if cache_file.exists():
+        try:
+            data = json.loads(cache_file.read_text())
+            if time.time() - data.get('_cached_at', 0) < 3600:
+                return data.get('_data')
+        except:
+            pass
+
+    time.sleep(2)
+
+    try:
+        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp.raise_for_status()
+        result = resp.json()
+        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
+        return result
+    except requests.exceptions.RequestException as e:
+        print(f"  lobsters api error: {e}")
+        return None
+
+
+def get_stories_by_tag(tag):
+    """get recent stories by tag"""
+    url = f'https://lobste.rs/t/{tag}.json'
+    return _api_get(url) or []
+
+
+def get_newest_stories():
+    """get newest stories"""
+    return _api_get('https://lobste.rs/newest.json') or []
+
+
+def get_user(username):
+    """get user profile"""
+    return _api_get(f'https://lobste.rs/u/{username}.json')
+
+
+def analyze_lobsters_user(username):
+    """analyze a lobste.rs user"""
+    user = get_user(username)
+    if not user:
+        return None
+
+    text_parts = []
+    if user.get('about'):
+        text_parts.append(user['about'])
+
+    full_text = ' '.join(text_parts)
+    text_score, positive_signals, negative_signals = analyze_text(full_text)
+
+    # lobsters base bonus (invite-only, high signal)
+    base_score = 15
+
+    # karma bonus
+    karma = user.get('karma', 0)
+    karma_score = 0
+    if karma > 100:
+        karma_score = 10
+    elif karma > 50:
+        karma_score = 5
+
+    # github presence
+    github_score = 5 if user.get('github_username') else 0
+
+    # homepage
+    homepage_score = 5 if user.get('homepage') else 0
+
+    total_score = text_score + base_score + karma_score + github_score + homepage_score
+
+    # confidence
+    confidence = 0.4  # higher base for invite-only
+    if text_parts:
+        confidence += 0.2
+    if karma > 50:
+        confidence += 0.2
+    confidence = min(confidence, 0.9)
+
+    reasons = ['on lobste.rs (invite-only)']
+    if karma > 50:
+        reasons.append(f"active ({karma} karma)")
+    if positive_signals:
+        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
+    if negative_signals:
+        reasons.append(f"WARNING: {', '.join(negative_signals)}")
+
+    return {
+        'platform': 'lobsters',
+        'username': username,
+        'url': f"https://lobste.rs/u/{username}",
+        'score': total_score,
+        'confidence': confidence,
+        'signals': positive_signals,
+        'negative_signals': negative_signals,
+        'karma': karma,
+        'reasons': reasons,
+        'contact': {
+            'github': user.get('github_username'),
+            'twitter': user.get('twitter_username'),
+            'homepage': user.get('homepage'),
+        },
+        'scraped_at': datetime.now().isoformat(),
+    }
+
+
+def scrape_lobsters(db):
+    """full lobste.rs scrape"""
+    print("scoutd/lobsters: starting scrape...")
+
+    all_users = set()
+
+    # stories by aligned tags
+    for tag in ALIGNED_TAGS:
+        print(f"  tag: {tag}...")
+        stories = get_stories_by_tag(tag)
+        for story in stories:
+            submitter = story.get('submitter_user', {}).get('username')
+            if submitter:
+                all_users.add(submitter)
+
+    # newest stories
+    print("  newest stories...")
+    for story in get_newest_stories():
+        submitter = story.get('submitter_user', {}).get('username')
+        if submitter:
+            all_users.add(submitter)
+
+    print(f"  {len(all_users)} unique users to analyze")
+
+    # analyze
+    results = []
+    for username in all_users:
+        try:
+            result = analyze_lobsters_user(username)
+            if result and result['score'] > 0:
+                results.append(result)
+                db.save_human(result)
+
+                if result['score'] >= 30:
+                    print(f"    ★ {username}: {result['score']} pts")
+        except Exception as e:
+            print(f"    error on {username}: {e}")
+
+    print(f"scoutd/lobsters: found {len(results)} aligned humans")
+    return results
--- a/connectd/scoutd/lost.py
+++ b/connectd/scoutd/lost.py
@ -0,0 +1,491 @@
+"""
+scoutd/lost.py - lost builder detection
+
+finds people with potential who haven't found it yet, gave up, or are too beaten down to try.
+
+these aren't failures. they're seeds that never got water.
+
+detection signals:
+- github: forked but never modified, starred many but built nothing, learning repos abandoned
+- reddit/forums: "i wish i could...", stuck asking beginner questions for years, helping others but never sharing
+- social: retoots builders but never posts own work, imposter syndrome language, isolation signals
+- profiles: bio says what they WANT to be, "aspiring" for 2+ years, empty portfolios
+
+the goal isn't to recruit them. it's to show them the door exists.
+"""
+
+import re
+from datetime import datetime, timedelta
+from collections import defaultdict
+
+
+# signal definitions with weights
+LOST_SIGNALS = {
+    # github signals
+    'forked_never_modified': {
+        'weight': 15,
+        'category': 'github',
+        'description': 'forked repos but never pushed changes',
+    },
+    'starred_many_built_nothing': {
+        'weight': 20,
+        'category': 'github',
+        'description': 'starred 50+ repos but has 0-2 own repos',
+    },
+    'account_no_repos': {
+        'weight': 10,
+        'category': 'github',
+        'description': 'account exists but no public repos',
+    },
+    'inactivity_bursts': {
+        'weight': 15,
+        'category': 'github',
+        'description': 'long gaps then brief activity bursts',
+    },
+    'only_issues_comments': {
+        'weight': 12,
+        'category': 'github',
+        'description': 'only activity is issues/comments on others work',
+    },
+    'abandoned_learning_repos': {
+        'weight': 18,
+        'category': 'github',
+        'description': 'learning/tutorial repos that were never finished',
+    },
+    'readme_only_repos': {
+        'weight': 10,
+        'category': 'github',
+        'description': 'repos with just README, no actual code',
+    },
+
+    # language signals (from posts/comments/bio)
+    'wish_i_could': {
+        'weight': 12,
+        'category': 'language',
+        'description': '"i wish i could..." language',
+        'patterns': [
+            r'i wish i could',
+            r'i wish i knew how',
+            r'wish i had the (time|energy|motivation|skills?)',
+        ],
+    },
+    'someday_want': {
+        'weight': 10,
+        'category': 'language',
+        'description': '"someday i want to..." language',
+        'patterns': [
+            r'someday i (want|hope|plan) to',
+            r'one day i\'ll',
+            r'eventually i\'ll',
+            r'when i have time i\'ll',
+        ],
+    },
+    'stuck_beginner': {
+        'weight': 20,
+        'category': 'language',
+        'description': 'asking beginner questions for years',
+        'patterns': [
+            r'still (trying|learning|struggling) (to|with)',
+            r'can\'t seem to (get|understand|figure)',
+            r'been trying for (months|years)',
+        ],
+    },
+    'self_deprecating': {
+        'weight': 15,
+        'category': 'language',
+        'description': 'self-deprecating about abilities',
+        'patterns': [
+            r'i\'m (not smart|too dumb|not good) enough',
+            r'i (suck|am terrible) at',
+            r'i\'ll never be able to',
+            r'people like me (can\'t|don\'t)',
+            r'i\'m just not (a|the) (type|kind)',
+        ],
+    },
+    'no_energy': {
+        'weight': 18,
+        'category': 'language',
+        'description': '"how do people have energy" posts',
+        'patterns': [
+            r'how do (people|you|they) have (the )?(energy|time|motivation)',
+            r'where do (people|you|they) find (the )?(energy|motivation)',
+            r'i\'m (always|constantly) (tired|exhausted|drained)',
+            r'no (energy|motivation) (left|anymore)',
+        ],
+    },
+    'imposter_syndrome': {
+        'weight': 15,
+        'category': 'language',
+        'description': 'imposter syndrome language',
+        'patterns': [
+            r'imposter syndrome',
+            r'feel like (a |an )?(fraud|fake|imposter)',
+            r'don\'t (belong|deserve)',
+            r'everyone else (seems|is) (so much )?(better|smarter)',
+            r'they\'ll (find out|realize) i\'m',
+        ],
+    },
+    'should_really': {
+        'weight': 8,
+        'category': 'language',
+        'description': '"i should really..." posts',
+        'patterns': [
+            r'i (should|need to) really',
+            r'i keep (meaning|wanting) to',
+            r'i\'ve been (meaning|wanting) to',
+        ],
+    },
+    'isolation_signals': {
+        'weight': 20,
+        'category': 'language',
+        'description': 'isolation/loneliness language',
+        'patterns': [
+            r'no one (understands|gets it|to talk to)',
+            r'(feel|feeling) (so )?(alone|isolated|lonely)',
+            r'don\'t have anyone (to|who)',
+            r'wish i (had|knew) (someone|people)',
+        ],
+    },
+    'enthusiasm_for_others': {
+        'weight': 10,
+        'category': 'behavior',
+        'description': 'celebrates others but dismissive of self',
+    },
+
+    # subreddit/community signals
+    'stuck_communities': {
+        'weight': 15,
+        'category': 'community',
+        'description': 'active in stuck/struggling communities',
+        'subreddits': [
+            'learnprogramming',
+            'findapath',
+            'getdisciplined',
+            'getmotivated',
+            'decidingtobebetter',
+            'selfimprovement',
+            'adhd',
+            'depression',
+            'anxiety',
+        ],
+    },
+
+    # profile signals
+    'aspirational_bio': {
+        'weight': 12,
+        'category': 'profile',
+        'description': 'bio says what they WANT to be',
+        'patterns': [
+            r'aspiring',
+            r'future',
+            r'want(ing)? to (be|become)',
+            r'learning to',
+            r'trying to (become|be|learn)',
+            r'hoping to',
+        ],
+    },
+    'empty_portfolio': {
+        'weight': 15,
+        'category': 'profile',
+        'description': 'links to empty portfolio sites',
+    },
+    'long_aspiring': {
+        'weight': 20,
+        'category': 'profile',
+        'description': '"aspiring" in bio for 2+ years',
+    },
+}
+
+# subreddits that indicate someone might be stuck
+STUCK_SUBREDDITS = {
+    'learnprogramming': 8,
+    'findapath': 15,
+    'getdisciplined': 12,
+    'getmotivated': 10,
+    'decidingtobebetter': 12,
+    'selfimprovement': 8,
+    'adhd': 10,
+    'depression': 15,
+    'anxiety': 12,
+    'socialanxiety': 12,
+    'neet': 20,
+    'lostgeneration': 15,
+    'antiwork': 5,  # could be aligned OR stuck
+    'careerguidance': 8,
+    'cscareerquestions': 5,
+}
+
+
+def analyze_text_for_lost_signals(text):
+    """analyze text for lost builder language patterns"""
+    if not text:
+        return [], 0
+
+    text_lower = text.lower()
+    signals_found = []
+    total_weight = 0
+
+    for signal_name, signal_data in LOST_SIGNALS.items():
+        if 'patterns' not in signal_data:
+            continue
+
+        for pattern in signal_data['patterns']:
+            if re.search(pattern, text_lower):
+                signals_found.append(signal_name)
+                total_weight += signal_data['weight']
+                break  # only count each signal once
+
+    return signals_found, total_weight
+
+
+def analyze_github_for_lost_signals(profile):
+    """analyze github profile for lost builder signals"""
+    signals_found = []
+    total_weight = 0
+
+    if not profile:
+        return signals_found, total_weight
+
+    repos = profile.get('repos', []) or profile.get('top_repos', [])
+    extra = profile.get('extra', {})
+
+    public_repos = profile.get('public_repos', len(repos))
+    followers = profile.get('followers', 0)
+    following = profile.get('following', 0)
+
+    # starred many but built nothing
+    # (we'd need to fetch starred count separately, approximate with following ratio)
+    if public_repos <= 2 and following > 50:
+        signals_found.append('starred_many_built_nothing')
+        total_weight += LOST_SIGNALS['starred_many_built_nothing']['weight']
+
+    # account but no repos
+    if public_repos == 0:
+        signals_found.append('account_no_repos')
+        total_weight += LOST_SIGNALS['account_no_repos']['weight']
+
+    # check repos for signals
+    forked_count = 0
+    forked_modified = 0
+    learning_repos = 0
+    readme_only = 0
+
+    learning_keywords = ['learning', 'tutorial', 'course', 'practice', 'exercise',
+                         'bootcamp', 'udemy', 'freecodecamp', 'odin', 'codecademy']
+
+    for repo in repos:
+        name = (repo.get('name') or '').lower()
+        description = (repo.get('description') or '').lower()
+        language = repo.get('language')
+        is_fork = repo.get('fork', False)
+
+        # forked but never modified
+        if is_fork:
+            forked_count += 1
+            # if pushed_at is close to created_at, never modified
+            # (simplified: just count forks for now)
+
+        # learning/tutorial repos
+        if any(kw in name or kw in description for kw in learning_keywords):
+            learning_repos += 1
+
+        # readme only (no language detected usually means no code)
+        if not language and not is_fork:
+            readme_only += 1
+
+    if forked_count >= 5 and public_repos - forked_count <= 2:
+        signals_found.append('forked_never_modified')
+        total_weight += LOST_SIGNALS['forked_never_modified']['weight']
+
+    if learning_repos >= 3:
+        signals_found.append('abandoned_learning_repos')
+        total_weight += LOST_SIGNALS['abandoned_learning_repos']['weight']
+
+    if readme_only >= 2:
+        signals_found.append('readme_only_repos')
+        total_weight += LOST_SIGNALS['readme_only_repos']['weight']
+
+    # check bio for lost signals
+    bio = profile.get('bio') or ''
+    bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
+    signals_found.extend(bio_signals)
+    total_weight += bio_weight
+
+    # aspirational bio check
+    bio_lower = bio.lower()
+    if any(re.search(p, bio_lower) for p in LOST_SIGNALS['aspirational_bio']['patterns']):
+        if 'aspirational_bio' not in signals_found:
+            signals_found.append('aspirational_bio')
+            total_weight += LOST_SIGNALS['aspirational_bio']['weight']
+
+    return signals_found, total_weight
+
+
+def analyze_reddit_for_lost_signals(activity, subreddits):
+    """analyze reddit activity for lost builder signals"""
+    signals_found = []
+    total_weight = 0
+
+    # check subreddit activity
+    stuck_sub_activity = 0
+    for sub in subreddits:
+        if sub.lower() in STUCK_SUBREDDITS:
+            stuck_sub_activity += STUCK_SUBREDDITS[sub.lower()]
+
+    if stuck_sub_activity >= 20:
+        signals_found.append('stuck_communities')
+        total_weight += min(stuck_sub_activity, 30)  # cap at 30
+
+    # analyze post/comment text
+    all_text = []
+    for item in activity:
+        if item.get('title'):
+            all_text.append(item['title'])
+        if item.get('body'):
+            all_text.append(item['body'])
+
+    combined_text = ' '.join(all_text)
+    text_signals, text_weight = analyze_text_for_lost_signals(combined_text)
+    signals_found.extend(text_signals)
+    total_weight += text_weight
+
+    # check for helping others but never sharing own work
+    help_count = 0
+    share_count = 0
+    for item in activity:
+        body = (item.get('body') or '').lower()
+        title = (item.get('title') or '').lower()
+
+        # helping patterns
+        if any(p in body for p in ['try this', 'you could', 'have you tried', 'i recommend']):
+            help_count += 1
+
+        # sharing patterns
+        if any(p in body + title for p in ['i built', 'i made', 'my project', 'check out my', 'i created']):
+            share_count += 1
+
+    if help_count >= 5 and share_count == 0:
+        signals_found.append('enthusiasm_for_others')
+        total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
+
+    return signals_found, total_weight
+
+
+def analyze_social_for_lost_signals(profile, posts):
+    """analyze mastodon/social for lost builder signals"""
+    signals_found = []
+    total_weight = 0
+
+    # check bio
+    bio = profile.get('bio') or profile.get('note') or ''
+    bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
+    signals_found.extend(bio_signals)
+    total_weight += bio_weight
+
+    # check posts
+    boost_count = 0
+    original_count = 0
+    own_work_count = 0
+
+    for post in posts:
+        content = (post.get('content') or '').lower()
+        is_boost = post.get('reblog') is not None or post.get('repost')
+
+        if is_boost:
+            boost_count += 1
+        else:
+            original_count += 1
+
+            # check if sharing own work
+            if any(p in content for p in ['i built', 'i made', 'my project', 'working on', 'just shipped']):
+                own_work_count += 1
+
+        # analyze text
+        text_signals, text_weight = analyze_text_for_lost_signals(content)
+        for sig in text_signals:
+            if sig not in signals_found:
+                signals_found.append(sig)
+                total_weight += LOST_SIGNALS[sig]['weight']
+
+    # boosts builders but never posts own work
+    if boost_count >= 10 and own_work_count == 0:
+        signals_found.append('enthusiasm_for_others')
+        total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
+
+    return signals_found, total_weight
+
+
+def calculate_lost_potential_score(signals_found):
+    """calculate overall lost potential score from signals"""
+    total = 0
+    for signal in signals_found:
+        if signal in LOST_SIGNALS:
+            total += LOST_SIGNALS[signal]['weight']
+    return total
+
+
+def classify_user(lost_score, builder_score, values_score):
+    """
+    classify user as builder, lost, or neither
+
+    returns: 'builder' | 'lost' | 'both' | 'none'
+    """
+    # high builder score = active builder
+    if builder_score >= 50 and lost_score < 30:
+        return 'builder'
+
+    # high lost score + values alignment = lost builder (priority outreach)
+    if lost_score >= 40 and values_score >= 20:
+        return 'lost'
+
+    # both signals = complex case, might be recovering
+    if lost_score >= 30 and builder_score >= 30:
+        return 'both'
+
+    return 'none'
+
+
+def get_signal_descriptions(signals_found):
+    """get human-readable descriptions of detected signals"""
+    descriptions = []
+    for signal in signals_found:
+        if signal in LOST_SIGNALS:
+            descriptions.append(LOST_SIGNALS[signal]['description'])
+    return descriptions
+
+
+def should_outreach_lost(user_data, config=None):
+    """
+    determine if we should reach out to a lost builder
+
+    considers:
+    - lost_potential_score threshold
+    - values alignment
+    - cooldown period
+    - manual review requirement
+    """
+    config = config or {}
+
+    lost_score = user_data.get('lost_potential_score', 0)
+    values_score = user_data.get('score', 0)  # regular alignment score
+
+    # minimum thresholds
+    min_lost = config.get('min_lost_score', 40)
+    min_values = config.get('min_values_score', 20)
+
+    if lost_score < min_lost:
+        return False, 'lost_score too low'
+
+    if values_score < min_values:
+        return False, 'values_score too low'
+
+    # check cooldown
+    last_outreach = user_data.get('last_lost_outreach')
+    if last_outreach:
+        cooldown_days = config.get('cooldown_days', 90)
+        last_dt = datetime.fromisoformat(last_outreach)
+        if datetime.now() - last_dt < timedelta(days=cooldown_days):
+            return False, f'cooldown active (90 days)'
+
+    # always require manual review for lost outreach
+    return True, 'requires_review'
--- a/connectd/scoutd/mastodon.py
+++ b/connectd/scoutd/mastodon.py
@ -0,0 +1,290 @@
+"""
+scoutd/mastodon.py - fediverse discovery
+scrapes high-signal instances: tech.lgbt, social.coop, fosstodon, hackers.town
+also detects lost builders - social isolation, imposter syndrome, struggling folks
+"""
+
+import requests
+import json
+import time
+import re
+from datetime import datetime
+from pathlib import Path
+
+from .signals import analyze_text, ALIGNED_INSTANCES
+from .lost import (
+    analyze_social_for_lost_signals,
+    analyze_text_for_lost_signals,
+    classify_user,
+    get_signal_descriptions,
+)
+
+HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'mastodon'
+
+TARGET_HASHTAGS = [
+    'selfhosted', 'homelab', 'homeassistant', 'foss', 'opensource',
+    'privacy', 'solarpunk', 'cooperative', 'cohousing', 'mutualaid',
+    'intentionalcommunity', 'degoogle', 'fediverse', 'indieweb',
+]
+
+
+def _api_get(url, params=None):
+    """rate-limited request"""
+    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
+    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    if cache_file.exists():
+        try:
+            data = json.loads(cache_file.read_text())
+            if time.time() - data.get('_cached_at', 0) < 3600:
+                return data.get('_data')
+        except:
+            pass
+
+    time.sleep(1)
+
+    try:
+        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp.raise_for_status()
+        result = resp.json()
+        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
+        return result
+    except requests.exceptions.RequestException as e:
+        print(f"  mastodon api error: {e}")
+        return None
+
+
+def strip_html(text):
+    """strip html tags"""
+    return re.sub(r'<[^>]+>', ' ', text) if text else ''
+
+
+def get_instance_directory(instance, limit=40):
+    """get users from instance directory"""
+    url = f'https://{instance}/api/v1/directory'
+    return _api_get(url, {'limit': limit, 'local': 'true'}) or []
+
+
+def get_hashtag_timeline(instance, hashtag, limit=40):
+    """get posts from hashtag"""
+    url = f'https://{instance}/api/v1/timelines/tag/{hashtag}'
+    return _api_get(url, {'limit': limit}) or []
+
+
+def get_user_statuses(instance, user_id, limit=30):
+    """get user's recent posts"""
+    url = f'https://{instance}/api/v1/accounts/{user_id}/statuses'
+    return _api_get(url, {'limit': limit, 'exclude_reblogs': 'true'}) or []
+
+
+def analyze_mastodon_user(account, instance):
+    """analyze a mastodon account"""
+    acct = account.get('acct', '')
+    if '@' not in acct:
+        acct = f"{acct}@{instance}"
+
+    # collect text
+    text_parts = []
+    bio = strip_html(account.get('note', ''))
+    if bio:
+        text_parts.append(bio)
+
+    display_name = account.get('display_name', '')
+    if display_name:
+        text_parts.append(display_name)
+
+    # profile fields
+    for field in account.get('fields', []):
+        if field.get('name'):
+            text_parts.append(field['name'])
+        if field.get('value'):
+            text_parts.append(strip_html(field['value']))
+
+    # get recent posts
+    user_id = account.get('id')
+    if user_id:
+        statuses = get_user_statuses(instance, user_id)
+        for status in statuses:
+            content = strip_html(status.get('content', ''))
+            if content:
+                text_parts.append(content)
+
+    full_text = ' '.join(text_parts)
+    text_score, positive_signals, negative_signals = analyze_text(full_text)
+
+    # instance bonus
+    instance_bonus = ALIGNED_INSTANCES.get(instance, 0)
+    total_score = text_score + instance_bonus
+
+    # pronouns bonus
+    if re.search(r'\b(they/them|she/her|he/him|xe/xem)\b', full_text, re.I):
+        total_score += 10
+        positive_signals.append('pronouns')
+
+    # activity level
+    statuses_count = account.get('statuses_count', 0)
+    followers = account.get('followers_count', 0)
+    if statuses_count > 100:
+        total_score += 5
+
+    # === LOST BUILDER DETECTION ===
+    # build profile and posts for lost analysis
+    profile_for_lost = {
+        'bio': bio,
+        'note': account.get('note'),
+    }
+
+    # convert statuses to posts format for analyze_social_for_lost_signals
+    posts_for_lost = []
+    if user_id:
+        statuses = get_user_statuses(instance, user_id)
+        for status in statuses:
+            posts_for_lost.append({
+                'content': strip_html(status.get('content', '')),
+                'reblog': status.get('reblog'),
+            })
+
+    # analyze for lost signals
+    lost_signals, lost_weight = analyze_social_for_lost_signals(profile_for_lost, posts_for_lost)
+
+    # also check combined text for lost patterns
+    text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
+    for sig in text_lost_signals:
+        if sig not in lost_signals:
+            lost_signals.append(sig)
+            lost_weight += text_lost_weight
+
+    lost_potential_score = lost_weight
+
+    # classify: builder, lost, both, or none
+    # for mastodon, we use statuses_count as a proxy for builder activity
+    builder_activity = 10 if statuses_count > 100 else 5 if statuses_count > 50 else 0
+    user_type = classify_user(lost_potential_score, builder_activity, total_score)
+
+    # confidence
+    confidence = 0.3
+    if len(text_parts) > 5:
+        confidence += 0.2
+    if statuses_count > 50:
+        confidence += 0.2
+    if len(positive_signals) > 3:
+        confidence += 0.2
+    confidence = min(confidence, 0.9)
+
+    reasons = []
+    if instance in ALIGNED_INSTANCES:
+        reasons.append(f"on {instance}")
+    if positive_signals:
+        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
+    if negative_signals:
+        reasons.append(f"WARNING: {', '.join(negative_signals)}")
+
+    # add lost reasons if applicable
+    if user_type == 'lost' or user_type == 'both':
+        lost_descriptions = get_signal_descriptions(lost_signals)
+        if lost_descriptions:
+            reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
+
+    return {
+        'platform': 'mastodon',
+        'username': acct,
+        'url': account.get('url'),
+        'name': display_name,
+        'bio': bio,
+        'instance': instance,
+        'score': total_score,
+        'confidence': confidence,
+        'signals': positive_signals,
+        'negative_signals': negative_signals,
+        'statuses_count': statuses_count,
+        'followers': followers,
+        'reasons': reasons,
+        'scraped_at': datetime.now().isoformat(),
+        # lost builder fields
+        'lost_potential_score': lost_potential_score,
+        'lost_signals': lost_signals,
+        'user_type': user_type,
+    }
+
+
+def scrape_mastodon(db, limit_per_instance=40):
+    """full mastodon scrape"""
+    print("scoutd/mastodon: starting scrape...")
+
+    all_accounts = []
+
+    # 1. instance directories
+    print("  scraping instance directories...")
+    for instance in ALIGNED_INSTANCES:
+        accounts = get_instance_directory(instance, limit=limit_per_instance)
+        for acct in accounts:
+            acct['_instance'] = instance
+            all_accounts.append(acct)
+        print(f"    {instance}: {len(accounts)} users")
+
+    # 2. hashtag timelines
+    print("  scraping hashtags...")
+    seen = set()
+    for tag in TARGET_HASHTAGS[:8]:
+        for instance in ['fosstodon.org', 'tech.lgbt', 'social.coop']:
+            posts = get_hashtag_timeline(instance, tag, limit=20)
+            for post in posts:
+                account = post.get('account', {})
+                acct = account.get('acct', '')
+                if '@' not in acct:
+                    acct = f"{acct}@{instance}"
+
+                if acct not in seen:
+                    seen.add(acct)
+                    account['_instance'] = instance
+                    all_accounts.append(account)
+
+    # dedupe
+    unique = {}
+    for acct in all_accounts:
+        key = acct.get('acct', acct.get('id', ''))
+        if key not in unique:
+            unique[key] = acct
+
+    print(f"  {len(unique)} unique accounts to analyze")
+
+    # analyze
+    results = []
+    builders_found = 0
+    lost_found = 0
+
+    for acct_data in unique.values():
+        instance = acct_data.get('_instance', 'mastodon.social')
+        try:
+            result = analyze_mastodon_user(acct_data, instance)
+            if result and result['score'] > 0:
+                results.append(result)
+                db.save_human(result)
+
+                user_type = result.get('user_type', 'none')
+
+                if user_type == 'builder':
+                    builders_found += 1
+                    if result['score'] >= 40:
+                        print(f"    ★ @{result['username']}: {result['score']} pts")
+
+                elif user_type == 'lost':
+                    lost_found += 1
+                    lost_score = result.get('lost_potential_score', 0)
+                    if lost_score >= 40:
+                        print(f"    💔 @{result['username']}: lost_score={lost_score}, values={result['score']} pts")
+
+                elif user_type == 'both':
+                    builders_found += 1
+                    lost_found += 1
+                    print(f"    ⚡ @{result['username']}: recovering builder")
+
+        except Exception as e:
+            print(f"    error: {e}")
+
+    print(f"scoutd/mastodon: found {len(results)} aligned humans")
+    print(f"  - {builders_found} active builders")
+    print(f"  - {lost_found} lost builders (need encouragement)")
+    return results
--- a/connectd/scoutd/matrix.py
+++ b/connectd/scoutd/matrix.py
@ -0,0 +1,196 @@
+"""
+scoutd/matrix.py - matrix room membership discovery
+finds users in multiple aligned public rooms
+"""
+
+import requests
+import json
+import time
+from datetime import datetime
+from pathlib import Path
+from collections import defaultdict
+
+from .signals import analyze_text
+
+HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'matrix'
+
+# public matrix rooms to check membership
+ALIGNED_ROOMS = [
+    '#homeassistant:matrix.org',
+    '#esphome:matrix.org',
+    '#selfhosted:matrix.org',
+    '#privacy:matrix.org',
+    '#solarpunk:matrix.org',
+    '#cooperative:matrix.org',
+    '#foss:matrix.org',
+    '#linux:matrix.org',
+]
+
+# homeservers to query
+HOMESERVERS = [
+    'matrix.org',
+    'matrix.envs.net',
+    'tchncs.de',
+]
+
+
+def _api_get(url, params=None):
+    """rate-limited request"""
+    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
+    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    if cache_file.exists():
+        try:
+            data = json.loads(cache_file.read_text())
+            if time.time() - data.get('_cached_at', 0) < 3600:
+                return data.get('_data')
+        except:
+            pass
+
+    time.sleep(1)
+
+    try:
+        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp.raise_for_status()
+        result = resp.json()
+        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
+        return result
+    except requests.exceptions.RequestException as e:
+        # matrix apis often fail, don't spam errors
+        return None
+
+
+def get_room_members(homeserver, room_alias):
+    """
+    get members of a public room
+    note: most matrix servers don't expose this publicly
+    this is a best-effort scrape
+    """
+    # resolve room alias to id first
+    try:
+        alias_url = f'https://{homeserver}/_matrix/client/r0/directory/room/{room_alias}'
+        alias_data = _api_get(alias_url)
+        if not alias_data or 'room_id' not in alias_data:
+            return []
+
+        room_id = alias_data['room_id']
+
+        # try to get members (usually requires auth)
+        members_url = f'https://{homeserver}/_matrix/client/r0/rooms/{room_id}/members'
+        members_data = _api_get(members_url)
+
+        if members_data and 'chunk' in members_data:
+            members = []
+            for event in members_data['chunk']:
+                if event.get('type') == 'm.room.member' and event.get('content', {}).get('membership') == 'join':
+                    user_id = event.get('state_key')
+                    display_name = event.get('content', {}).get('displayname')
+                    if user_id:
+                        members.append({'user_id': user_id, 'display_name': display_name})
+            return members
+    except:
+        pass
+
+    return []
+
+
+def get_public_rooms(homeserver, limit=100):
+    """get public rooms directory"""
+    url = f'https://{homeserver}/_matrix/client/r0/publicRooms'
+    data = _api_get(url, {'limit': limit})
+    return data.get('chunk', []) if data else []
+
+
+def analyze_matrix_user(user_id, rooms_joined, display_name=None):
+    """analyze a matrix user based on room membership"""
+    # score based on room membership overlap
+    room_score = len(rooms_joined) * 10
+
+    # multi-room bonus
+    if len(rooms_joined) >= 4:
+        room_score += 20
+    elif len(rooms_joined) >= 2:
+        room_score += 10
+
+    # analyze display name if available
+    text_score = 0
+    signals = []
+    if display_name:
+        text_score, signals, _ = analyze_text(display_name)
+
+    total_score = room_score + text_score
+
+    confidence = 0.3
+    if len(rooms_joined) >= 3:
+        confidence += 0.3
+    if display_name:
+        confidence += 0.1
+    confidence = min(confidence, 0.8)
+
+    reasons = [f"in {len(rooms_joined)} aligned rooms: {', '.join(rooms_joined[:3])}"]
+    if signals:
+        reasons.append(f"signals: {', '.join(signals[:3])}")
+
+    return {
+        'platform': 'matrix',
+        'username': user_id,
+        'url': f"https://matrix.to/#/{user_id}",
+        'name': display_name,
+        'score': total_score,
+        'confidence': confidence,
+        'signals': signals,
+        'rooms': rooms_joined,
+        'reasons': reasons,
+        'scraped_at': datetime.now().isoformat(),
+    }
+
+
+def scrape_matrix(db):
+    """
+    matrix scrape - limited due to auth requirements
+    best effort on public room data
+    """
+    print("scoutd/matrix: starting scrape (limited - most apis require auth)...")
+
+    user_rooms = defaultdict(list)
+
+    # try to get public room directories
+    for homeserver in HOMESERVERS:
+        print(f"  checking {homeserver} public rooms...")
+        rooms = get_public_rooms(homeserver, limit=50)
+
+        for room in rooms:
+            room_alias = room.get('canonical_alias', '')
+            # check if it matches any aligned room patterns
+            aligned_keywords = ['homeassistant', 'selfhosted', 'privacy', 'linux', 'foss', 'cooperative']
+            if any(kw in room_alias.lower() or kw in room.get('name', '').lower() for kw in aligned_keywords):
+                print(f"    found aligned room: {room_alias or room.get('name')}")
+
+    # try to get members from aligned rooms (usually fails without auth)
+    for room_alias in ALIGNED_ROOMS[:3]:  # limit attempts
+        for homeserver in HOMESERVERS[:1]:  # just try matrix.org
+            members = get_room_members(homeserver, room_alias)
+            if members:
+                print(f"  {room_alias}: {len(members)} members")
+                for member in members:
+                    user_rooms[member['user_id']].append(room_alias)
+
+    # filter for multi-room users
+    multi_room = {u: rooms for u, rooms in user_rooms.items() if len(rooms) >= 2}
+    print(f"  {len(multi_room)} users in 2+ aligned rooms")
+
+    # analyze
+    results = []
+    for user_id, rooms in multi_room.items():
+        try:
+            result = analyze_matrix_user(user_id, rooms)
+            if result and result['score'] > 0:
+                results.append(result)
+                db.save_human(result)
+        except Exception as e:
+            print(f"    error: {e}")
+
+    print(f"scoutd/matrix: found {len(results)} aligned humans (limited by auth)")
+    return results
--- a/connectd/scoutd/reddit.py
+++ b/connectd/scoutd/reddit.py
@ -0,0 +1,503 @@
+"""
+scoutd/reddit.py - reddit discovery (DISCOVERY ONLY, NOT OUTREACH)
+
+reddit is a SIGNAL SOURCE, not a contact channel.
+flow:
+1. scrape reddit for users active in target subs
+2. extract their reddit profile
+3. look for links TO other platforms (github, mastodon, website, etc.)
+4. add to scout database with reddit as signal source
+5. reach out via their OTHER platforms, never reddit
+
+if reddit user has no external links:
+   - add to manual_queue with note "reddit-only, needs manual review"
+
+also detects lost builders - stuck in learnprogramming for years, imposter syndrome, etc.
+"""
+
+import requests
+import json
+import time
+import re
+from datetime import datetime
+from pathlib import Path
+from collections import defaultdict
+
+from .signals import analyze_text, ALIGNED_SUBREDDITS, NEGATIVE_SUBREDDITS
+from .lost import (
+    analyze_reddit_for_lost_signals,
+    analyze_text_for_lost_signals,
+    classify_user,
+    get_signal_descriptions,
+    STUCK_SUBREDDITS,
+)
+
+HEADERS = {'User-Agent': 'connectd:v1.0 (community discovery)'}
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'reddit'
+
+# patterns for extracting external platform links
+PLATFORM_PATTERNS = {
+    'github': [
+        r'github\.com/([a-zA-Z0-9_-]+)',
+        r'gh:\s*@?([a-zA-Z0-9_-]+)',
+    ],
+    'mastodon': [
+        r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
+        r'mastodon\.social/@([a-zA-Z0-9_]+)',
+        r'fosstodon\.org/@([a-zA-Z0-9_]+)',
+        r'hachyderm\.io/@([a-zA-Z0-9_]+)',
+        r'tech\.lgbt/@([a-zA-Z0-9_]+)',
+    ],
+    'twitter': [
+        r'twitter\.com/([a-zA-Z0-9_]+)',
+        r'x\.com/([a-zA-Z0-9_]+)',
+        r'(?:^|\s)@([a-zA-Z0-9_]{1,15})(?:\s|$)',  # bare @handle
+    ],
+    'bluesky': [
+        r'bsky\.app/profile/([a-zA-Z0-9_.-]+)',
+        r'([a-zA-Z0-9_-]+)\.bsky\.social',
+    ],
+    'website': [
+        r'https?://([a-zA-Z0-9_-]+\.[a-zA-Z]{2,}[a-zA-Z0-9./_-]*)',
+    ],
+    'matrix': [
+        r'@([a-zA-Z0-9_-]+):([a-zA-Z0-9.-]+)',
+    ],
+}
+
+
+def _api_get(url, params=None):
+    """rate-limited request"""
+    cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
+    cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    if cache_file.exists():
+        try:
+            data = json.loads(cache_file.read_text())
+            if time.time() - data.get('_cached_at', 0) < 3600:
+                return data.get('_data')
+        except:
+            pass
+
+    time.sleep(2)  # reddit rate limit
+
+    try:
+        resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
+        resp.raise_for_status()
+        result = resp.json()
+        cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
+        return result
+    except requests.exceptions.RequestException as e:
+        print(f"  reddit api error: {e}")
+        return None
+
+
+def extract_external_links(text):
+    """extract links to other platforms from text"""
+    links = {}
+
+    if not text:
+        return links
+
+    for platform, patterns in PLATFORM_PATTERNS.items():
+        for pattern in patterns:
+            matches = re.findall(pattern, text, re.IGNORECASE)
+            if matches:
+                if platform == 'mastodon' and isinstance(matches[0], tuple):
+                    # full fediverse handle
+                    links[platform] = f"@{matches[0][0]}@{matches[0][1]}"
+                elif platform == 'matrix' and isinstance(matches[0], tuple):
+                    links[platform] = f"@{matches[0][0]}:{matches[0][1]}"
+                elif platform == 'website':
+                    # skip reddit/imgur/etc
+                    for match in matches:
+                        if not any(x in match.lower() for x in ['reddit', 'imgur', 'redd.it', 'i.redd']):
+                            links[platform] = f"https://{match}"
+                            break
+                else:
+                    links[platform] = matches[0]
+                break
+
+    return links
+
+
+def get_user_profile(username):
+    """get user profile including bio/description"""
+    url = f'https://www.reddit.com/user/{username}/about.json'
+    data = _api_get(url)
+
+    if not data or 'data' not in data:
+        return None
+
+    profile = data['data']
+    return {
+        'username': username,
+        'name': profile.get('name'),
+        'bio': profile.get('subreddit', {}).get('public_description', ''),
+        'title': profile.get('subreddit', {}).get('title', ''),
+        'icon': profile.get('icon_img'),
+        'created_utc': profile.get('created_utc'),
+        'total_karma': profile.get('total_karma', 0),
+        'link_karma': profile.get('link_karma', 0),
+        'comment_karma': profile.get('comment_karma', 0),
+    }
+
+
+def get_subreddit_users(subreddit, limit=100):
+    """get recent posters/commenters from a subreddit"""
+    users = set()
+
+    # posts
+    url = f'https://www.reddit.com/r/{subreddit}/new.json'
+    data = _api_get(url, {'limit': limit})
+    if data and 'data' in data:
+        for post in data['data'].get('children', []):
+            author = post['data'].get('author')
+            if author and author not in ['[deleted]', 'AutoModerator']:
+                users.add(author)
+
+    # comments
+    url = f'https://www.reddit.com/r/{subreddit}/comments.json'
+    data = _api_get(url, {'limit': limit})
+    if data and 'data' in data:
+        for comment in data['data'].get('children', []):
+            author = comment['data'].get('author')
+            if author and author not in ['[deleted]', 'AutoModerator']:
+                users.add(author)
+
+    return users
+
+
+def get_user_activity(username):
+    """get user's posts and comments"""
+    activity = []
+
+    # posts
+    url = f'https://www.reddit.com/user/{username}/submitted.json'
+    data = _api_get(url, {'limit': 100})
+    if data and 'data' in data:
+        for post in data['data'].get('children', []):
+            activity.append({
+                'type': 'post',
+                'subreddit': post['data'].get('subreddit'),
+                'title': post['data'].get('title', ''),
+                'body': post['data'].get('selftext', ''),
+                'score': post['data'].get('score', 0),
+            })
+
+    # comments
+    url = f'https://www.reddit.com/user/{username}/comments.json'
+    data = _api_get(url, {'limit': 100})
+    if data and 'data' in data:
+        for comment in data['data'].get('children', []):
+            activity.append({
+                'type': 'comment',
+                'subreddit': comment['data'].get('subreddit'),
+                'body': comment['data'].get('body', ''),
+                'score': comment['data'].get('score', 0),
+            })
+
+    return activity
+
+
+def analyze_reddit_user(username):
+    """
+    analyze a reddit user for alignment and extract external platform links.
+
+    reddit is DISCOVERY ONLY - we find users here but contact them elsewhere.
+    """
+    activity = get_user_activity(username)
+    if not activity:
+        return None
+
+    # get profile for bio
+    profile = get_user_profile(username)
+
+    # count subreddit activity
+    sub_activity = defaultdict(int)
+    text_parts = []
+    total_karma = 0
+
+    for item in activity:
+        sub = item.get('subreddit', '').lower()
+        if sub:
+            sub_activity[sub] += 1
+        if item.get('title'):
+            text_parts.append(item['title'])
+        if item.get('body'):
+            text_parts.append(item['body'])
+        total_karma += item.get('score', 0)
+
+    full_text = ' '.join(text_parts)
+    text_score, positive_signals, negative_signals = analyze_text(full_text)
+
+    # EXTRACT EXTERNAL LINKS - this is the key part
+    # check profile bio first
+    external_links = {}
+    if profile:
+        bio_text = f"{profile.get('bio', '')} {profile.get('title', '')}"
+        external_links.update(extract_external_links(bio_text))
+
+    # also scan posts/comments for links (people often share their github etc)
+    activity_links = extract_external_links(full_text)
+    for platform, link in activity_links.items():
+        if platform not in external_links:
+            external_links[platform] = link
+
+    # subreddit scoring
+    sub_score = 0
+    aligned_subs = []
+    for sub, count in sub_activity.items():
+        weight = ALIGNED_SUBREDDITS.get(sub, 0)
+        if weight > 0:
+            sub_score += weight * min(count, 5)
+            aligned_subs.append(sub)
+
+    # multi-sub bonus
+    if len(aligned_subs) >= 5:
+        sub_score += 30
+    elif len(aligned_subs) >= 3:
+        sub_score += 15
+
+    # negative sub penalty
+    for sub in sub_activity:
+        if sub.lower() in [n.lower() for n in NEGATIVE_SUBREDDITS]:
+            sub_score -= 50
+            negative_signals.append(f"r/{sub}")
+
+    total_score = text_score + sub_score
+
+    # bonus if they have external links (we can actually contact them)
+    if external_links.get('github'):
+        total_score += 10
+        positive_signals.append('has github')
+    if external_links.get('mastodon'):
+        total_score += 10
+        positive_signals.append('has mastodon')
+    if external_links.get('website'):
+        total_score += 5
+        positive_signals.append('has website')
+
+    # === LOST BUILDER DETECTION ===
+    # reddit is HIGH SIGNAL for lost builders - stuck in learnprogramming,
+    # imposter syndrome posts, "i wish i could" language, etc.
+    subreddits_list = list(sub_activity.keys())
+    lost_signals, lost_weight = analyze_reddit_for_lost_signals(activity, subreddits_list)
+
+    # also check full text for lost patterns (already done partially in analyze_reddit_for_lost_signals)
+    text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
+    for sig in text_lost_signals:
+        if sig not in lost_signals:
+            lost_signals.append(sig)
+            lost_weight += text_lost_weight
+
+    lost_potential_score = lost_weight
+
+    # classify: builder, lost, both, or none
+    # for reddit, builder_score is based on having external links + high karma
+    builder_activity = 0
+    if external_links.get('github'):
+        builder_activity += 20
+    if total_karma > 1000:
+        builder_activity += 15
+    elif total_karma > 500:
+        builder_activity += 10
+
+    user_type = classify_user(lost_potential_score, builder_activity, total_score)
+
+    # confidence
+    confidence = 0.3
+    if len(activity) > 20:
+        confidence += 0.2
+    if len(aligned_subs) >= 2:
+        confidence += 0.2
+    if len(text_parts) > 10:
+        confidence += 0.2
+    # higher confidence if we have contact methods
+    if external_links:
+        confidence += 0.1
+    confidence = min(confidence, 0.95)
+
+    reasons = []
+    if aligned_subs:
+        reasons.append(f"active in: {', '.join(aligned_subs[:5])}")
+    if positive_signals:
+        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
+    if negative_signals:
+        reasons.append(f"WARNING: {', '.join(negative_signals)}")
+    if external_links:
+        reasons.append(f"external: {', '.join(external_links.keys())}")
+
+    # add lost reasons if applicable
+    if user_type == 'lost' or user_type == 'both':
+        lost_descriptions = get_signal_descriptions(lost_signals)
+        if lost_descriptions:
+            reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
+
+    # determine if this is reddit-only (needs manual review)
+    reddit_only = len(external_links) == 0
+    if reddit_only:
+        reasons.append("REDDIT-ONLY: needs manual review for outreach")
+
+    return {
+        'platform': 'reddit',
+        'username': username,
+        'url': f"https://reddit.com/u/{username}",
+        'score': total_score,
+        'confidence': confidence,
+        'signals': positive_signals,
+        'negative_signals': negative_signals,
+        'subreddits': aligned_subs,
+        'activity_count': len(activity),
+        'karma': total_karma,
+        'reasons': reasons,
+        'scraped_at': datetime.now().isoformat(),
+        # external platform links for outreach
+        'external_links': external_links,
+        'reddit_only': reddit_only,
+        'extra': {
+            'github': external_links.get('github'),
+            'mastodon': external_links.get('mastodon'),
+            'twitter': external_links.get('twitter'),
+            'bluesky': external_links.get('bluesky'),
+            'website': external_links.get('website'),
+            'matrix': external_links.get('matrix'),
+            'reddit_karma': total_karma,
+            'reddit_activity': len(activity),
+        },
+        # lost builder fields
+        'lost_potential_score': lost_potential_score,
+        'lost_signals': lost_signals,
+        'user_type': user_type,
+    }
+
+
+def scrape_reddit(db, limit_per_sub=50):
+    """
+    full reddit scrape - DISCOVERY ONLY
+
+    finds aligned users, extracts external links for outreach.
+    reddit-only users go to manual queue.
+    """
+    print("scoutd/reddit: starting scrape (discovery only, not outreach)...")
+
+    # find users in multiple aligned subs
+    user_subs = defaultdict(set)
+
+    # aligned subs - active builders
+    priority_subs = ['intentionalcommunity', 'cohousing', 'selfhosted',
+                     'homeassistant', 'solarpunk', 'cooperatives', 'privacy',
+                     'localllama', 'homelab', 'degoogle', 'pihole', 'unraid']
+
+    # lost builder subs - people who need encouragement
+    # these folks might be stuck, but they have aligned interests
+    lost_subs = ['learnprogramming', 'findapath', 'getdisciplined',
+                 'careerguidance', 'cscareerquestions', 'decidingtobebetter']
+
+    # scrape both - we want to find lost builders with aligned interests
+    all_subs = priority_subs + lost_subs
+
+    for sub in all_subs:
+        print(f"  scraping r/{sub}...")
+        users = get_subreddit_users(sub, limit=limit_per_sub)
+        for user in users:
+            user_subs[user].add(sub)
+        print(f"    found {len(users)} users")
+
+    # filter for multi-sub users
+    multi_sub = {u: subs for u, subs in user_subs.items() if len(subs) >= 2}
+    print(f"  {len(multi_sub)} users in 2+ aligned subs")
+
+    # analyze
+    results = []
+    reddit_only_count = 0
+    external_link_count = 0
+    builders_found = 0
+    lost_found = 0
+
+    for username in multi_sub:
+        try:
+            result = analyze_reddit_user(username)
+            if result and result['score'] > 0:
+                results.append(result)
+                db.save_human(result)
+
+                user_type = result.get('user_type', 'none')
+
+                # track lost builders - reddit is high signal for these
+                if user_type == 'lost':
+                    lost_found += 1
+                    lost_score = result.get('lost_potential_score', 0)
+                    if lost_score >= 40:
+                        print(f"    💔 u/{username}: lost_score={lost_score}, values={result['score']} pts")
+                        # lost builders also go to manual queue if reddit-only
+                        if result.get('reddit_only'):
+                            _add_to_manual_queue(result)
+
+                elif user_type == 'builder':
+                    builders_found += 1
+
+                elif user_type == 'both':
+                    builders_found += 1
+                    lost_found += 1
+                    print(f"    ⚡ u/{username}: recovering builder")
+
+                # track external links
+                if result.get('reddit_only'):
+                    reddit_only_count += 1
+                    # add high-value users to manual queue for review
+                    if result['score'] >= 50 and user_type != 'lost':  # lost already added above
+                        _add_to_manual_queue(result)
+                        print(f"    📋 u/{username}: {result['score']} pts (reddit-only → manual queue)")
+                else:
+                    external_link_count += 1
+                    if result['score'] >= 50 and user_type == 'builder':
+                        links = list(result.get('external_links', {}).keys())
+                        print(f"    ★ u/{username}: {result['score']} pts → {', '.join(links)}")
+
+        except Exception as e:
+            print(f"    error on {username}: {e}")
+
+    print(f"scoutd/reddit: found {len(results)} aligned humans")
+    print(f"  - {builders_found} active builders")
+    print(f"  - {lost_found} lost builders (need encouragement)")
+    print(f"  - {external_link_count} with external links (reachable)")
+    print(f"  - {reddit_only_count} reddit-only (manual queue)")
+    return results
+
+
+def _add_to_manual_queue(result):
+    """add reddit-only user to manual queue for review"""
+    from pathlib import Path
+    import json
+
+    queue_file = Path(__file__).parent.parent / 'data' / 'manual_queue.json'
+    queue_file.parent.mkdir(parents=True, exist_ok=True)
+
+    queue = []
+    if queue_file.exists():
+        try:
+            queue = json.loads(queue_file.read_text())
+        except:
+            pass
+
+    # check if already in queue
+    existing = [q for q in queue if q.get('username') == result['username'] and q.get('platform') == 'reddit']
+    if existing:
+        return
+
+    queue.append({
+        'platform': 'reddit',
+        'username': result['username'],
+        'url': result['url'],
+        'score': result['score'],
+        'subreddits': result.get('subreddits', []),
+        'signals': result.get('signals', []),
+        'reasons': result.get('reasons', []),
+        'note': 'reddit-only user - no external links found. DM manually if promising.',
+        'queued_at': datetime.now().isoformat(),
+        'status': 'pending',
+    })
+
+    queue_file.write_text(json.dumps(queue, indent=2))
--- a/connectd/scoutd/signals.py
+++ b/connectd/scoutd/signals.py
@ -0,0 +1,158 @@
+"""
+shared signal patterns for all scrapers
+"""
+
+import re
+
+# positive signals - what we're looking for
+POSITIVE_PATTERNS = [
+    # values
+    (r'\b(solarpunk|cyberpunk)\b', 'solarpunk', 10),
+    (r'\b(anarchis[tm]|mutual.?aid)\b', 'mutual_aid', 10),
+    (r'\b(cooperative|collective|worker.?owned?|coop|co.?op)\b', 'cooperative', 15),
+    (r'\b(community|commons)\b', 'community', 5),
+    (r'\b(intentional.?community|cohousing|commune)\b', 'intentional_community', 20),
+
+    # queer-friendly
+    (r'\b(queer|lgbtq?|trans|nonbinary|enby|genderqueer)\b', 'queer', 15),
+    (r'\b(they/them|she/her|he/him|xe/xem|any.?pronouns)\b', 'pronouns', 10),
+    (r'\bblm\b', 'blm', 5),
+    (r'\b(acab|1312)\b', 'acab', 5),
+
+    # tech values
+    (r'\b(privacy|surveillance|anti.?surveillance)\b', 'privacy', 10),
+    (r'\b(self.?host(?:ed|ing)?|homelab|home.?server)\b', 'selfhosted', 15),
+    (r'\b(local.?first|offline.?first)\b', 'local_first', 15),
+    (r'\b(decentralized?|federation|federated|fediverse)\b', 'decentralized', 10),
+    (r'\b(foss|libre|open.?source|copyleft)\b', 'foss', 10),
+    (r'\b(home.?assistant|home.?automation)\b', 'home_automation', 10),
+    (r'\b(mesh|p2p|peer.?to.?peer)\b', 'p2p', 10),
+    (r'\b(matrix|xmpp|irc)\b', 'federated_chat', 5),
+    (r'\b(degoogle|de.?google)\b', 'degoogle', 10),
+
+    # location/availability
+    (r'\b(seattle|portland|pnw|cascadia|pacific.?northwest)\b', 'pnw', 20),
+    (r'\b(washington|oregon)\b', 'pnw_state', 10),
+    (r'\b(remote|anywhere|relocate|looking.?to.?move)\b', 'remote', 10),
+
+    # anti-capitalism
+    (r'\b(anti.?capitalis[tm]|post.?capitalis[tm]|degrowth)\b', 'anticapitalist', 10),
+
+    # neurodivergent (often overlaps with our values)
+    (r'\b(neurodivergent|adhd|autistic|autism)\b', 'neurodivergent', 5),
+
+    # technical skills (bonus for builders)
+    (r'\b(rust|go|python|typescript)\b', 'modern_lang', 3),
+    (r'\b(linux|bsd|nixos)\b', 'unix', 3),
+    (r'\b(kubernetes|docker|podman)\b', 'containers', 3),
+]
+
+# negative signals - red flags
+NEGATIVE_PATTERNS = [
+    (r'\b(qanon|maga|trump|wwg1wga)\b', 'maga', -50),
+    (r'\b(covid.?hoax|plandemic|5g.?conspiracy)\b', 'conspiracy', -50),
+    (r'\b(nwo|illuminati|deep.?state)\b', 'conspiracy', -30),
+    (r'\b(anti.?vax|antivax)\b', 'antivax', -30),
+    (r'\b(sovereign.?citizen)\b', 'sovcit', -40),
+    (r'\b(crypto.?bro|web3|nft|blockchain|bitcoin|ethereum)\b', 'crypto', -15),
+    (r'\b(conservative|republican)\b', 'conservative', -20),
+    (r'\b(free.?speech.?absolutist)\b', 'freeze_peach', -20),
+]
+
+# target topics for repo discovery
+TARGET_TOPICS = [
+    'local-first', 'self-hosted', 'privacy', 'mesh-network',
+    'cooperative', 'solarpunk', 'decentralized', 'p2p',
+    'fediverse', 'activitypub', 'matrix-org', 'homeassistant',
+    'esphome', 'open-source-hardware', 'right-to-repair',
+    'mutual-aid', 'commons', 'degoogle', 'privacy-tools',
+]
+
+# ecosystem repos - high signal contributors
+ECOSYSTEM_REPOS = [
+    'home-assistant/core',
+    'esphome/esphome',
+    'matrix-org/synapse',
+    'LemmyNet/lemmy',
+    'mastodon/mastodon',
+    'owncast/owncast',
+    'nextcloud/server',
+    'immich-app/immich',
+    'jellyfin/jellyfin',
+    'navidrome/navidrome',
+    'paperless-ngx/paperless-ngx',
+    'actualbudget/actual',
+    'firefly-iii/firefly-iii',
+    'logseq/logseq',
+    'AppFlowy-IO/AppFlowy',
+    'siyuan-note/siyuan',
+    'anytype/anytype-ts',
+    'calcom/cal.com',
+    'plausible/analytics',
+    'umami-software/umami',
+]
+
+# aligned subreddits
+ALIGNED_SUBREDDITS = {
+    'intentionalcommunity': 25,
+    'cohousing': 25,
+    'cooperatives': 20,
+    'solarpunk': 20,
+    'selfhosted': 15,
+    'homeassistant': 15,
+    'homelab': 10,
+    'privacy': 15,
+    'PrivacyGuides': 15,
+    'degoogle': 15,
+    'anticonsumption': 10,
+    'Frugal': 5,
+    'simpleliving': 5,
+    'Seattle': 10,
+    'Portland': 10,
+    'cascadia': 15,
+    'linux': 5,
+    'opensource': 10,
+    'FOSS': 10,
+}
+
+# negative subreddits
+NEGATIVE_SUBREDDITS = [
+    'conspiracy', 'conservative', 'walkaway', 'louderwithcrowder',
+    'JordanPeterson', 'TimPool', 'NoNewNormal', 'LockdownSkepticism',
+]
+
+# high-signal mastodon instances
+ALIGNED_INSTANCES = {
+    'tech.lgbt': 20,
+    'social.coop': 25,
+    'fosstodon.org': 10,
+    'hackers.town': 15,
+    'hachyderm.io': 10,
+    'infosec.exchange': 5,
+}
+
+
+def analyze_text(text):
+    """
+    analyze text for signals
+    returns: (score, signals_found, negative_signals)
+    """
+    if not text:
+        return 0, [], []
+
+    text = text.lower()
+    score = 0
+    signals = []
+    negatives = []
+
+    for pattern, signal_name, points in POSITIVE_PATTERNS:
+        if re.search(pattern, text, re.IGNORECASE):
+            score += points
+            signals.append(signal_name)
+
+    for pattern, signal_name, points in NEGATIVE_PATTERNS:
+        if re.search(pattern, text, re.IGNORECASE):
+            score += points  # points are already negative
+            negatives.append(signal_name)
+
+    return score, list(set(signals)), list(set(negatives))
--- a/connectd/scoutd/twitter.py
+++ b/connectd/scoutd/twitter.py
@ -0,0 +1,255 @@
+"""
+scoutd/twitter.py - twitter/x discovery via nitter instances
+
+scrapes nitter (twitter frontend) to find users posting about aligned topics
+without needing twitter API access
+
+nitter instances rotate to avoid rate limits
+"""
+
+import requests
+import json
+import time
+import re
+from datetime import datetime
+from pathlib import Path
+from bs4 import BeautifulSoup
+
+from .signals import analyze_text
+
+HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0'}
+CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'twitter'
+
+# nitter instances (rotate through these)
+NITTER_INSTANCES = [
+    'nitter.privacydev.net',
+    'nitter.poast.org',
+    'nitter.woodland.cafe',
+    'nitter.esmailelbob.xyz',
+]
+
+# hashtags to search
+ALIGNED_HASHTAGS = [
+    'selfhosted', 'homelab', 'homeassistant', 'foss', 'opensource',
+    'privacy', 'solarpunk', 'cooperative', 'mutualaid', 'localfirst',
+    'indieweb', 'smallweb', 'permacomputing', 'degrowth', 'techworkers',
+]
+
+_current_instance_idx = 0
+
+
+def get_nitter_instance():
+    """get current nitter instance, rotate on failure"""
+    global _current_instance_idx
+    return NITTER_INSTANCES[_current_instance_idx % len(NITTER_INSTANCES)]
+
+
+def rotate_instance():
+    """switch to next nitter instance"""
+    global _current_instance_idx
+    _current_instance_idx += 1
+
+
+def _scrape_page(url, retries=3):
+    """scrape a nitter page with instance rotation"""
+    for attempt in range(retries):
+        instance = get_nitter_instance()
+        full_url = url.replace('{instance}', instance)
+
+        # check cache
+        cache_key = f"{full_url}"
+        cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
+        CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+        if cache_file.exists():
+            try:
+                data = json.loads(cache_file.read_text())
+                if time.time() - data.get('_cached_at', 0) < 3600:
+                    return data.get('_html')
+            except:
+                pass
+
+        time.sleep(2)  # rate limit
+
+        try:
+            resp = requests.get(full_url, headers=HEADERS, timeout=30)
+            if resp.status_code == 200:
+                cache_file.write_text(json.dumps({
+                    '_cached_at': time.time(),
+                    '_html': resp.text
+                }))
+                return resp.text
+            elif resp.status_code in [429, 503]:
+                print(f"  nitter {instance} rate limited, rotating...")
+                rotate_instance()
+            else:
+                print(f"  nitter error: {resp.status_code}")
+                return None
+        except Exception as e:
+            print(f"  nitter {instance} error: {e}")
+            rotate_instance()
+
+    return None
+
+
+def search_hashtag(hashtag):
+    """search for tweets with hashtag"""
+    url = f"https://{{instance}}/search?q=%23{hashtag}&f=tweets"
+    html = _scrape_page(url)
+    if not html:
+        return []
+
+    soup = BeautifulSoup(html, 'html.parser')
+    tweets = []
+
+    for tweet_div in soup.select('.timeline-item'):
+        try:
+            username_elem = tweet_div.select_one('.username')
+            content_elem = tweet_div.select_one('.tweet-content')
+            fullname_elem = tweet_div.select_one('.fullname')
+
+            if username_elem and content_elem:
+                username = username_elem.text.strip().lstrip('@')
+                tweets.append({
+                    'username': username,
+                    'name': fullname_elem.text.strip() if fullname_elem else username,
+                    'content': content_elem.text.strip(),
+                })
+        except Exception as e:
+            continue
+
+    return tweets
+
+
+def get_user_profile(username):
+    """get user profile from nitter"""
+    url = f"https://{{instance}}/{username}"
+    html = _scrape_page(url)
+    if not html:
+        return None
+
+    soup = BeautifulSoup(html, 'html.parser')
+
+    try:
+        bio_elem = soup.select_one('.profile-bio')
+        bio = bio_elem.text.strip() if bio_elem else ''
+
+        location_elem = soup.select_one('.profile-location')
+        location = location_elem.text.strip() if location_elem else ''
+
+        website_elem = soup.select_one('.profile-website a')
+        website = website_elem.get('href') if website_elem else ''
+
+        # get recent tweets for more signal
+        tweets = []
+        for tweet_div in soup.select('.timeline-item')[:10]:
+            content_elem = tweet_div.select_one('.tweet-content')
+            if content_elem:
+                tweets.append(content_elem.text.strip())
+
+        return {
+            'username': username,
+            'bio': bio,
+            'location': location,
+            'website': website,
+            'recent_tweets': tweets,
+        }
+    except Exception as e:
+        print(f"  error parsing {username}: {e}")
+        return None
+
+
+def analyze_twitter_user(username, profile=None):
+    """analyze a twitter user for alignment"""
+    if not profile:
+        profile = get_user_profile(username)
+
+    if not profile:
+        return None
+
+    # collect text
+    text_parts = [profile.get('bio', '')]
+    text_parts.extend(profile.get('recent_tweets', []))
+
+    full_text = ' '.join(text_parts)
+    text_score, positive_signals, negative_signals = analyze_text(full_text)
+
+    # twitter is noisy, lower base confidence
+    confidence = 0.25
+    if len(positive_signals) >= 3:
+        confidence += 0.2
+    if profile.get('website'):
+        confidence += 0.1
+    if len(profile.get('recent_tweets', [])) >= 5:
+        confidence += 0.1
+    confidence = min(confidence, 0.7)  # cap lower for twitter
+
+    reasons = []
+    if positive_signals:
+        reasons.append(f"signals: {', '.join(positive_signals[:5])}")
+    if negative_signals:
+        reasons.append(f"WARNING: {', '.join(negative_signals)}")
+
+    return {
+        'platform': 'twitter',
+        'username': username,
+        'url': f"https://twitter.com/{username}",
+        'name': profile.get('name', username),
+        'bio': profile.get('bio'),
+        'location': profile.get('location'),
+        'score': text_score,
+        'confidence': confidence,
+        'signals': positive_signals,
+        'negative_signals': negative_signals,
+        'reasons': reasons,
+        'contact': {
+            'twitter': username,
+            'website': profile.get('website'),
+        },
+        'scraped_at': datetime.now().isoformat(),
+    }
+
+
+def scrape_twitter(db, limit_per_hashtag=50):
+    """full twitter scrape via nitter"""
+    print("scoutd/twitter: starting scrape via nitter...")
+
+    all_users = {}
+
+    for hashtag in ALIGNED_HASHTAGS:
+        print(f"  #{hashtag}...")
+        tweets = search_hashtag(hashtag)
+
+        for tweet in tweets[:limit_per_hashtag]:
+            username = tweet.get('username')
+            if username and username not in all_users:
+                all_users[username] = {
+                    'username': username,
+                    'name': tweet.get('name'),
+                    'hashtags': [hashtag],
+                }
+            elif username:
+                all_users[username]['hashtags'].append(hashtag)
+
+        print(f"    found {len(tweets)} tweets")
+
+    # prioritize users in multiple hashtags
+    multi_hashtag = {u: d for u, d in all_users.items() if len(d.get('hashtags', [])) >= 2}
+    print(f"  {len(multi_hashtag)} users in 2+ aligned hashtags")
+
+    # analyze
+    results = []
+    for username, data in list(multi_hashtag.items())[:100]:  # limit to prevent rate limits
+        try:
+            result = analyze_twitter_user(username)
+            if result and result['score'] > 0:
+                results.append(result)
+                db.save_human(result)
+
+                if result['score'] >= 30:
+                    print(f"    ★ @{username}: {result['score']} pts")
+        except Exception as e:
+            print(f"    error on {username}: {e}")
+
+    print(f"scoutd/twitter: found {len(results)} aligned humans")
+    return results
--- a/connectd/setup_user.py
+++ b/connectd/setup_user.py
@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""
+setup priority user - add yourself to get matches
+
+usage:
+    python setup_user.py                    # interactive setup
+    python setup_user.py --show             # show your profile
+    python setup_user.py --matches          # show your matches
+"""
+
+import argparse
+import json
+from db import Database
+from db.users import (init_users_table, add_priority_user, get_priority_users,
+                      get_priority_user_matches)
+
+
+def interactive_setup(db):
+    """interactive priority user setup"""
+    print("=" * 60)
+    print("connectd priority user setup")
+    print("=" * 60)
+    print("\nlink your profiles so connectd can find matches for YOU\n")
+
+    name = input("name: ").strip()
+    email = input("email (for notifications): ").strip()
+    github = input("github username (optional): ").strip() or None
+    reddit = input("reddit username (optional): ").strip() or None
+    mastodon = input("mastodon handle e.g. user@instance (optional): ").strip() or None
+    lobsters = input("lobste.rs username (optional): ").strip() or None
+    matrix = input("matrix id e.g. @user:matrix.org (optional): ").strip() or None
+    location = input("location (e.g. seattle, remote): ").strip() or None
+
+    print("\nwhat are you interested in? (comma separated)")
+    print("examples: self-hosting, cooperatives, solarpunk, home automation")
+    interests_raw = input("interests: ").strip()
+    interests = [i.strip() for i in interests_raw.split(',')] if interests_raw else []
+
+    print("\nwhat kind of people are you looking to connect with?")
+    looking_for = input("looking for: ").strip() or None
+
+    user_data = {
+        'name': name,
+        'email': email,
+        'github': github,
+        'reddit': reddit,
+        'mastodon': mastodon,
+        'lobsters': lobsters,
+        'matrix': matrix,
+        'location': location,
+        'interests': interests,
+        'looking_for': looking_for,
+    }
+
+    user_id = add_priority_user(db.conn, user_data)
+    print(f"\n✓ added as priority user #{user_id}")
+    print("connectd will now find matches for you")
+
+
+def show_profile(db):
+    """show current priority user profile"""
+    users = get_priority_users(db.conn)
+
+    if not users:
+        print("no priority users configured")
+        print("run: python setup_user.py")
+        return
+
+    for user in users:
+        print("=" * 60)
+        print(f"priority user #{user['id']}: {user['name']}")
+        print("=" * 60)
+        print(f"email: {user['email']}")
+        if user['github']:
+            print(f"github: {user['github']}")
+        if user['reddit']:
+            print(f"reddit: {user['reddit']}")
+        if user['mastodon']:
+            print(f"mastodon: {user['mastodon']}")
+        if user['lobsters']:
+            print(f"lobsters: {user['lobsters']}")
+        if user['matrix']:
+            print(f"matrix: {user['matrix']}")
+        if user['location']:
+            print(f"location: {user['location']}")
+        if user['interests']:
+            interests = json.loads(user['interests']) if isinstance(user['interests'], str) else user['interests']
+            print(f"interests: {', '.join(interests)}")
+        if user['looking_for']:
+            print(f"looking for: {user['looking_for']}")
+
+
+def show_matches(db):
+    """show matches for priority user"""
+    users = get_priority_users(db.conn)
+
+    if not users:
+        print("no priority users configured")
+        return
+
+    for user in users:
+        print(f"\n=== matches for {user['name']} ===\n")
+
+        matches = get_priority_user_matches(db.conn, user['id'], limit=20)
+
+        if not matches:
+            print("no matches yet - run the daemon to discover people")
+            continue
+
+        for i, match in enumerate(matches, 1):
+            print(f"{i}. {match['username']} ({match['platform']})")
+            print(f"   score: {match['overlap_score']:.0f}")
+            print(f"   url: {match['url']}")
+
+            reasons = match.get('overlap_reasons', '[]')
+            if isinstance(reasons, str):
+                reasons = json.loads(reasons)
+            if reasons:
+                print(f"   why: {reasons[0] if reasons else ''}")
+            print()
+
+
+def main():
+    parser = argparse.ArgumentParser(description='setup priority user')
+    parser.add_argument('--show', action='store_true', help='show your profile')
+    parser.add_argument('--matches', action='store_true', help='show your matches')
+    args = parser.parse_args()
+
+    db = Database()
+    init_users_table(db.conn)
+
+    if args.show:
+        show_profile(db)
+    elif args.matches:
+        show_matches(db)
+    else:
+        interactive_setup(db)
+
+    db.close()
+
+
+if __name__ == '__main__':
+    main()
--- a/repository.json
+++ b/repository.json
@ -0,0 +1,5 @@
+{
+  "name": "connectd add-ons",
+  "url": "https://github.com/sudoxnym/ha-addons",
+  "maintainer": "sudoxnym"
+}