initial release: connectd add-on v1.1.0

This commit is contained in:
Your Name 2025-12-15 11:06:51 -06:00
commit 3c02ee85c2
45 changed files with 10301 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
*.pyc
__pycache__/
*.db
.DS_Store

16
README.md Normal file
View file

@ -0,0 +1,16 @@
# connectd add-ons for home assistant
## installation
1. go to **settings****add-ons** → **add-on store**
2. click the three dots in the top right → **repositories**
3. add: `https://github.com/sudoxnym/ha-addons`
4. find **connectd** in the store and install
## add-ons
### connectd
find isolated builders with aligned values. auto-discovers humans on github, mastodon, lemmy, discord, and more.
[![Open your Home Assistant instance and show the add add-on repository dialog with a specific repository URL pre-filled.](https://my.home-assistant.io/badges/supervisor_add_addon_repository.svg)](https://my.home-assistant.io/redirect/supervisor_add_addon_repository/?repository_url=https%3A%2F%2Fgithub.com%2Fsudoxnym%2Fha-addons)

28
connectd/Dockerfile Normal file
View file

@ -0,0 +1,28 @@
ARG BUILD_FROM
FROM ${BUILD_FROM}
# install python deps
RUN apk add --no-cache python3 py3-pip py3-requests py3-beautifulsoup4
# create app directory
WORKDIR /app
# copy requirements and install
COPY requirements.txt .
RUN pip3 install --no-cache-dir --break-system-packages -r requirements.txt
# copy app code
COPY api.py config.py daemon.py cli.py setup_user.py ./
COPY db/ db/
COPY scoutd/ scoutd/
COPY matchd/ matchd/
COPY introd/ introd/
# create data directory
RUN mkdir -p /data/db /data/cache
# copy run script
COPY run.sh /
RUN chmod a+x /run.sh
CMD ["/run.sh"]

52
connectd/README.md Normal file
View file

@ -0,0 +1,52 @@
# connectd add-on for home assistant
find isolated builders with aligned values. auto-discovers humans on github, mastodon, lemmy, discord, and more.
## installation
1. add this repository to your home assistant add-on store
2. install the connectd add-on
3. configure your HOST_USER (github username) in the add-on settings
4. start the add-on
## configuration
### required
- **host_user**: your github username (connectd will auto-discover your profile)
### optional host info
- **host_name**: your display name
- **host_email**: your email
- **host_mastodon**: mastodon handle (@user@instance)
- **host_reddit**: reddit username
- **host_lemmy**: lemmy handle (@user@instance)
- **host_lobsters**: lobsters username
- **host_matrix**: matrix handle (@user:server)
- **host_discord**: discord user id
- **host_bluesky**: bluesky handle (handle.bsky.social)
- **host_location**: your location
- **host_interests**: comma-separated interests
- **host_looking_for**: what you're looking for
### api credentials
- **github_token**: for higher rate limits
- **groq_api_key**: for LLM-drafted intros
- **mastodon_token**: for DM delivery
- **discord_bot_token**: for discord discovery/delivery
## hacs integration
after starting the add-on, install the connectd integration via HACS:
1. add custom repository: `https://github.com/sudoxnym/connectd`
2. install connectd integration
3. add integration in HA settings
4. configure with host: `localhost`, port: `8099`
## sensors
- total humans, high score humans, active builders
- platform counts (github, mastodon, reddit, lemmy, discord, lobsters)
- priority matches, top humans
- countdown timers (next scout, match, intro)
- your personal score and profile

268
connectd/api.py Normal file
View file

@ -0,0 +1,268 @@
#!/usr/bin/env python3
"""
connectd/api.py - REST API for stats and control
exposes daemon stats for home assistant integration.
runs on port 8099 by default.
"""
import os
import json
import threading
from http.server import HTTPServer, BaseHTTPRequestHandler
from datetime import datetime
from db import Database
from db.users import get_priority_users, get_priority_user_matches, get_priority_user
API_PORT = int(os.environ.get('CONNECTD_API_PORT', 8099))
# shared state (updated by daemon)
_daemon_state = {
'running': False,
'dry_run': False,
'last_scout': None,
'last_match': None,
'last_intro': None,
'last_lost': None,
'intros_today': 0,
'lost_intros_today': 0,
'started_at': None,
}
def update_daemon_state(state_dict):
"""update shared daemon state (called by daemon)"""
global _daemon_state
_daemon_state.update(state_dict)
def get_daemon_state():
"""get current daemon state"""
return _daemon_state.copy()
class APIHandler(BaseHTTPRequestHandler):
"""simple REST API handler"""
def log_message(self, format, *args):
"""suppress default logging"""
pass
def _send_json(self, data, status=200):
"""send JSON response"""
self.send_response(status)
self.send_header('Content-Type', 'application/json')
self.send_header('Access-Control-Allow-Origin', '*')
self.end_headers()
self.wfile.write(json.dumps(data).encode())
def do_GET(self):
"""handle GET requests"""
if self.path == '/api/stats':
self._handle_stats()
elif self.path == '/api/health':
self._handle_health()
elif self.path == '/api/state':
self._handle_state()
elif self.path == '/api/priority_matches':
self._handle_priority_matches()
elif self.path == '/api/top_humans':
self._handle_top_humans()
elif self.path == '/api/user':
self._handle_user()
else:
self._send_json({'error': 'not found'}, 404)
def _handle_stats(self):
"""return database statistics"""
try:
db = Database()
stats = db.stats()
db.close()
self._send_json(stats)
except Exception as e:
self._send_json({'error': str(e)}, 500)
def _handle_health(self):
"""return daemon health status"""
state = get_daemon_state()
health = {
'status': 'running' if state['running'] else 'stopped',
'dry_run': state['dry_run'],
'uptime_seconds': None,
}
if state['started_at']:
uptime = datetime.now() - datetime.fromisoformat(state['started_at'])
health['uptime_seconds'] = int(uptime.total_seconds())
self._send_json(health)
def _handle_state(self):
"""return full daemon state"""
state = get_daemon_state()
# convert datetimes to strings
for key in ['last_scout', 'last_match', 'last_intro', 'last_lost', 'started_at']:
if state[key] and isinstance(state[key], datetime):
state[key] = state[key].isoformat()
self._send_json(state)
def _handle_priority_matches(self):
"""return priority matches for HA sensor"""
try:
db = Database()
users = get_priority_users(db.conn)
if not users:
self._send_json({
'count': 0,
'new_count': 0,
'top_matches': [],
})
db.close()
return
# get matches for first priority user (host)
user = users[0]
matches = get_priority_user_matches(db.conn, user['id'], limit=10)
new_count = sum(1 for m in matches if m.get('status') == 'new')
top_matches = []
for m in matches[:5]:
overlap_reasons = m.get('overlap_reasons', '[]')
if isinstance(overlap_reasons, str):
import json as json_mod
overlap_reasons = json_mod.loads(overlap_reasons) if overlap_reasons else []
top_matches.append({
'username': m.get('username'),
'platform': m.get('platform'),
'score': m.get('score', 0),
'overlap_score': m.get('overlap_score', 0),
'reasons': overlap_reasons[:3],
'url': m.get('url'),
'status': m.get('status', 'new'),
})
db.close()
self._send_json({
'count': len(matches),
'new_count': new_count,
'top_matches': top_matches,
})
except Exception as e:
self._send_json({'error': str(e)}, 500)
def _handle_top_humans(self):
"""return top scoring humans for HA sensor"""
try:
db = Database()
humans = db.get_all_humans(min_score=50, limit=5)
top_humans = []
for h in humans:
contact = h.get('contact', '{}')
if isinstance(contact, str):
import json as json_mod
contact = json_mod.loads(contact) if contact else {}
signals = h.get('signals', '[]')
if isinstance(signals, str):
import json as json_mod
signals = json_mod.loads(signals) if signals else []
top_humans.append({
'username': h.get('username'),
'platform': h.get('platform'),
'score': h.get('score', 0),
'name': h.get('name'),
'signals': signals[:5],
'contact_method': 'email' if contact.get('email') else
'mastodon' if contact.get('mastodon') else
'matrix' if contact.get('matrix') else 'manual',
})
db.close()
self._send_json({
'count': len(humans),
'top_humans': top_humans,
})
except Exception as e:
self._send_json({'error': str(e)}, 500)
def _handle_user(self):
"""return priority user info for HA sensor"""
try:
db = Database()
users = get_priority_users(db.conn)
if not users:
self._send_json({
'configured': False,
'score': 0,
'signals': [],
'match_count': 0,
})
db.close()
return
user = users[0]
signals = user.get('signals', '[]')
if isinstance(signals, str):
import json as json_mod
signals = json_mod.loads(signals) if signals else []
interests = user.get('interests', '[]')
if isinstance(interests, str):
import json as json_mod
interests = json_mod.loads(interests) if interests else []
matches = get_priority_user_matches(db.conn, user['id'], limit=100)
db.close()
self._send_json({
'configured': True,
'name': user.get('name'),
'github': user.get('github'),
'mastodon': user.get('mastodon'),
'reddit': user.get('reddit'),
'lobsters': user.get('lobsters'),
'matrix': user.get('matrix'),
'lemmy': user.get('lemmy'),
'discord': user.get('discord'),
'bluesky': user.get('bluesky'),
'score': user.get('score', 0),
'signals': signals[:10],
'interests': interests,
'location': user.get('location'),
'bio': user.get('bio'),
'match_count': len(matches),
'new_match_count': sum(1 for m in matches if m.get('status') == 'new'),
})
except Exception as e:
self._send_json({'error': str(e)}, 500)
def run_api_server():
"""run the API server in a thread"""
server = HTTPServer(('0.0.0.0', API_PORT), APIHandler)
print(f"connectd api running on port {API_PORT}")
server.serve_forever()
def start_api_thread():
"""start API server in background thread"""
thread = threading.Thread(target=run_api_server, daemon=True)
thread.start()
return thread
if __name__ == '__main__':
# standalone mode for testing
print(f"starting connectd api on port {API_PORT}...")
run_api_server()

11
connectd/build.yaml Normal file
View file

@ -0,0 +1,11 @@
build_from:
amd64: ghcr.io/hassio-addons/base:15.0.8
aarch64: ghcr.io/hassio-addons/base:15.0.8
armv7: ghcr.io/hassio-addons/base:15.0.8
labels:
org.opencontainers.image.title: "connectd"
org.opencontainers.image.description: "find isolated builders with aligned values"
org.opencontainers.image.source: "https://github.com/sudoxnym/connectd"
org.opencontainers.image.licenses: "MIT"
args:
BUILD_ARCH: amd64

878
connectd/cli.py Executable file
View file

@ -0,0 +1,878 @@
#!/usr/bin/env python3
"""
connectd - people discovery and matchmaking daemon
finds isolated builders and connects them
also finds LOST builders who need encouragement
usage:
connectd scout # run all scrapers
connectd scout --github # github only
connectd scout --reddit # reddit only
connectd scout --mastodon # mastodon only
connectd scout --lobsters # lobste.rs only
connectd scout --matrix # matrix only
connectd scout --lost # show lost builder stats after scout
connectd match # find all matches
connectd match --top 20 # show top 20 matches
connectd match --mine # show YOUR matches (priority user)
connectd match --lost # find matches for lost builders
connectd intro # generate intros for top matches
connectd intro --match 123 # generate intro for specific match
connectd intro --dry-run # preview intros without saving
connectd intro --lost # generate intros for lost builders
connectd review # interactive review queue
connectd send # send all approved intros
connectd send --export # export for manual sending
connectd daemon # run as continuous daemon
connectd daemon --oneshot # run once then exit
connectd daemon --dry-run # run but never send intros
connectd daemon --oneshot --dry-run # one cycle, preview only
connectd user # show your priority user profile
connectd user --setup # setup/update your profile
connectd user --matches # show matches found for you
connectd status # show database stats (including lost builders)
connectd lost # show lost builders ready for outreach
"""
import argparse
import sys
from pathlib import Path
# add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from db import Database
from db.users import (init_users_table, add_priority_user, get_priority_users,
get_priority_user_matches, score_priority_user, auto_match_priority_user,
update_priority_user_profile)
from scoutd import scrape_github, scrape_reddit, scrape_mastodon, scrape_lobsters, scrape_matrix
from scoutd.deep import deep_scrape_github_user
from scoutd.lost import get_signal_descriptions
from introd.deliver import (deliver_intro, deliver_batch, get_delivery_stats,
review_manual_queue, determine_best_contact, load_manual_queue,
save_manual_queue)
from matchd import find_all_matches, generate_fingerprint
from matchd.rank import get_top_matches
from matchd.lost import find_matches_for_lost_builders, get_lost_match_summary
from introd import draft_intro
from introd.draft import draft_intros_for_match
from introd.lost_intro import draft_lost_intro, get_lost_intro_config
from introd.review import review_all_pending, get_pending_intros
from introd.send import send_all_approved, export_manual_intros
def cmd_scout(args, db):
"""run discovery scrapers"""
from scoutd.deep import deep_scrape_github_user, save_deep_profile
print("=" * 60)
print("connectd scout - discovering aligned humans")
print("=" * 60)
# deep scrape specific user
if args.user:
print(f"\ndeep scraping github user: {args.user}")
profile = deep_scrape_github_user(args.user)
if profile:
save_deep_profile(db, profile)
print(f"\n=== {profile['username']} ===")
print(f"real name: {profile.get('real_name')}")
print(f"location: {profile.get('location')}")
print(f"company: {profile.get('company')}")
print(f"email: {profile.get('email')}")
print(f"twitter: {profile.get('twitter')}")
print(f"mastodon: {profile.get('mastodon')}")
print(f"orgs: {', '.join(profile.get('orgs', []))}")
print(f"languages: {', '.join(list(profile.get('languages', {}).keys())[:5])}")
print(f"topics: {', '.join(profile.get('topics', [])[:10])}")
print(f"signals: {', '.join(profile.get('signals', []))}")
print(f"score: {profile.get('score')}")
if profile.get('linked_profiles'):
print(f"linked profiles: {list(profile['linked_profiles'].keys())}")
else:
print("failed to scrape user")
return
run_all = not any([args.github, args.reddit, args.mastodon, args.lobsters, args.matrix, args.twitter, args.bluesky, args.lemmy, args.discord])
if args.github or run_all:
if args.deep:
# deep scrape mode - slower but more thorough
print("\nrunning DEEP github scrape (follows all links)...")
from scoutd.github import get_repo_contributors
from scoutd.signals import ECOSYSTEM_REPOS
all_logins = set()
for repo in ECOSYSTEM_REPOS[:5]: # limit for deep mode
contributors = get_repo_contributors(repo, per_page=20)
for c in contributors:
login = c.get('login')
if login and not login.endswith('[bot]'):
all_logins.add(login)
print(f" {repo}: {len(contributors)} contributors")
print(f"\ndeep scraping {len(all_logins)} users...")
for login in all_logins:
try:
profile = deep_scrape_github_user(login)
if profile and profile.get('score', 0) > 0:
save_deep_profile(db, profile)
if profile['score'] >= 30:
print(f"{login}: {profile['score']} pts")
if profile.get('email'):
print(f" email: {profile['email']}")
if profile.get('mastodon'):
print(f" mastodon: {profile['mastodon']}")
except Exception as e:
print(f" error on {login}: {e}")
else:
scrape_github(db)
if args.reddit or run_all:
scrape_reddit(db)
if args.mastodon or run_all:
scrape_mastodon(db)
if args.lobsters or run_all:
scrape_lobsters(db)
if args.matrix or run_all:
scrape_matrix(db)
if args.twitter or run_all:
from scoutd.twitter import scrape_twitter
scrape_twitter(db)
if args.bluesky or run_all:
from scoutd.bluesky import scrape_bluesky
scrape_bluesky(db)
if args.lemmy or run_all:
from scoutd.lemmy import scrape_lemmy
scrape_lemmy(db)
if args.discord or run_all:
from scoutd.discord import scrape_discord
scrape_discord(db)
# show stats
stats = db.stats()
print("\n" + "=" * 60)
print("SCOUT COMPLETE")
print("=" * 60)
print(f"total humans: {stats['total_humans']}")
for platform, count in stats.get('by_platform', {}).items():
print(f" {platform}: {count}")
# show lost builder stats if requested
if args.lost or True: # always show lost stats now
print("\n--- lost builder stats ---")
print(f"active builders: {stats.get('active_builders', 0)}")
print(f"lost builders: {stats.get('lost_builders', 0)}")
print(f"recovering builders: {stats.get('recovering_builders', 0)}")
print(f"high lost score (40+): {stats.get('high_lost_score', 0)}")
print(f"lost outreach sent: {stats.get('lost_outreach_sent', 0)}")
def cmd_match(args, db):
"""find and rank matches"""
import json as json_mod
print("=" * 60)
print("connectd match - finding aligned pairs")
print("=" * 60)
# lost builder matching
if args.lost:
print("\n--- LOST BUILDER MATCHING ---")
print("finding inspiring builders for lost souls...\n")
matches, error = find_matches_for_lost_builders(db, limit=args.top or 20)
if error:
print(f"error: {error}")
return
if not matches:
print("no lost builders ready for outreach")
return
print(f"found {len(matches)} lost builders with matching active builders\n")
for i, match in enumerate(matches, 1):
lost = match['lost_user']
builder = match['inspiring_builder']
lost_name = lost.get('name') or lost.get('username')
builder_name = builder.get('name') or builder.get('username')
print(f"{i}. {lost_name} ({lost.get('platform')}) → needs inspiration from")
print(f" {builder_name} ({builder.get('platform')})")
print(f" lost score: {lost.get('lost_potential_score', 0)} | values: {lost.get('score', 0)}")
print(f" shared interests: {', '.join(match.get('shared_interests', []))}")
print(f" builder has: {match.get('builder_repos', 0)} repos, {match.get('builder_stars', 0)} stars")
print()
return
if args.mine:
# show matches for priority user
init_users_table(db.conn)
users = get_priority_users(db.conn)
if not users:
print("no priority user configured. run: connectd user --setup")
return
for user in users:
print(f"\n=== matches for {user['name']} ===\n")
matches = get_priority_user_matches(db.conn, user['id'], limit=args.top or 20)
if not matches:
print("no matches yet - run: connectd scout && connectd match")
continue
for i, match in enumerate(matches, 1):
print(f"{i}. {match['username']} ({match['platform']})")
print(f" score: {match['overlap_score']:.0f}")
print(f" url: {match['url']}")
reasons = match.get('overlap_reasons', '[]')
if isinstance(reasons, str):
reasons = json_mod.loads(reasons)
if reasons:
print(f" why: {reasons[0]}")
print()
return
if args.top and not args.mine:
# just show existing top matches
matches = get_top_matches(db, limit=args.top)
else:
# run full matching
matches = find_all_matches(db, min_score=args.min_score, min_overlap=args.min_overlap)
print("\n" + "-" * 60)
print(f"TOP {min(len(matches), args.top or 20)} MATCHES")
print("-" * 60)
for i, match in enumerate(matches[:args.top or 20], 1):
human_a = match.get('human_a', {})
human_b = match.get('human_b', {})
print(f"\n{i}. {human_a.get('username')} <-> {human_b.get('username')}")
print(f" platforms: {human_a.get('platform')} / {human_b.get('platform')}")
print(f" overlap: {match.get('overlap_score', 0):.0f} pts")
reasons = match.get('overlap_reasons', [])
if isinstance(reasons, str):
reasons = json_mod.loads(reasons)
if reasons:
print(f" why: {' | '.join(reasons[:3])}")
if match.get('geographic_match'):
print(f" location: compatible ✓")
def cmd_intro(args, db):
"""generate intro drafts"""
import json as json_mod
print("=" * 60)
print("connectd intro - drafting introductions")
print("=" * 60)
if args.dry_run:
print("*** DRY RUN MODE - previewing only ***\n")
# lost builder intros - different tone entirely
if args.lost:
print("\n--- LOST BUILDER INTROS ---")
print("drafting encouragement for lost souls...\n")
matches, error = find_matches_for_lost_builders(db, limit=args.limit or 10)
if error:
print(f"error: {error}")
return
if not matches:
print("no lost builders ready for outreach")
return
config = get_lost_intro_config()
count = 0
for match in matches:
lost = match['lost_user']
builder = match['inspiring_builder']
lost_name = lost.get('name') or lost.get('username')
builder_name = builder.get('name') or builder.get('username')
# draft intro
draft, error = draft_lost_intro(lost, builder, config)
if error:
print(f" error drafting intro for {lost_name}: {error}")
continue
if args.dry_run:
print("=" * 60)
print(f"TO: {lost_name} ({lost.get('platform')})")
print(f"LOST SCORE: {lost.get('lost_potential_score', 0)}")
print(f"INSPIRING: {builder_name} ({builder.get('url')})")
print("-" * 60)
print("MESSAGE:")
print(draft)
print("-" * 60)
print("[DRY RUN - NOT SAVED]")
print("=" * 60)
else:
print(f" drafted intro for {lost_name}{builder_name}")
count += 1
if args.dry_run:
print(f"\npreviewed {count} lost builder intros (dry run)")
else:
print(f"\ndrafted {count} lost builder intros")
print("these require manual review before sending")
return
if args.match:
# specific match
matches = [m for m in get_top_matches(db, limit=1000) if m.get('id') == args.match]
else:
# top matches
matches = get_top_matches(db, limit=args.limit or 10)
if not matches:
print("no matches found")
return
print(f"generating intros for {len(matches)} matches...")
count = 0
for match in matches:
intros = draft_intros_for_match(match)
for intro in intros:
recipient = intro['recipient_human']
other = intro['other_human']
if args.dry_run:
# get contact info
contact = recipient.get('contact', {})
if isinstance(contact, str):
contact = json_mod.loads(contact)
email = contact.get('email', 'no email')
# get overlap reasons
reasons = match.get('overlap_reasons', [])
if isinstance(reasons, str):
reasons = json_mod.loads(reasons)
reason_summary = ', '.join(reasons[:3]) if reasons else 'aligned values'
# print preview
print("\n" + "=" * 60)
print(f"TO: {recipient.get('username')} ({recipient.get('platform')})")
print(f"EMAIL: {email}")
print(f"SUBJECT: you might want to meet {other.get('username')}")
print(f"SCORE: {match.get('overlap_score', 0):.0f} ({reason_summary})")
print("-" * 60)
print("MESSAGE:")
print(intro['draft'])
print("-" * 60)
print("[DRY RUN - NOT SENT]")
print("=" * 60)
else:
print(f"\n {recipient.get('username')} ({intro['channel']})")
# save to db
db.save_intro(
match.get('id'),
recipient.get('id'),
intro['channel'],
intro['draft']
)
count += 1
if args.dry_run:
print(f"\npreviewed {count} intros (dry run - nothing saved)")
else:
print(f"\ngenerated {count} intro drafts")
print("run 'connectd review' to approve before sending")
def cmd_review(args, db):
"""interactive review queue"""
review_all_pending(db)
def cmd_send(args, db):
"""send approved intros"""
import json as json_mod
if args.export:
# export manual queue to file for review
queue = load_manual_queue()
pending = [q for q in queue if q.get('status') == 'pending']
with open(args.export, 'w') as f:
json.dump(pending, f, indent=2)
print(f"exported {len(pending)} pending intros to {args.export}")
return
# send all approved from manual queue
queue = load_manual_queue()
approved = [q for q in queue if q.get('status') == 'approved']
if not approved:
print("no approved intros to send")
print("use 'connectd review' to approve intros first")
return
print(f"sending {len(approved)} approved intros...")
for item in approved:
match_data = item.get('match', {})
intro_draft = item.get('draft', '')
recipient = item.get('recipient', {})
success, error, method = deliver_intro(
{'human_b': recipient, **match_data},
intro_draft,
dry_run=args.dry_run if hasattr(args, 'dry_run') else False
)
status = 'ok' if success else f'failed: {error}'
print(f" {recipient.get('username')}: {method} - {status}")
# update queue status
item['status'] = 'sent' if success else 'failed'
item['error'] = error
save_manual_queue(queue)
# show stats
stats = get_delivery_stats()
print(f"\ndelivery stats: {stats['sent']} sent, {stats['failed']} failed")
def cmd_lost(args, db):
"""show lost builders ready for outreach"""
import json as json_mod
print("=" * 60)
print("connectd lost - lost builders who need encouragement")
print("=" * 60)
# get lost builders
lost_builders = db.get_lost_builders_for_outreach(
min_lost_score=args.min_score or 40,
min_values_score=20,
limit=args.limit or 50
)
if not lost_builders:
print("\nno lost builders ready for outreach")
print("run 'connectd scout' to discover more")
return
print(f"\n{len(lost_builders)} lost builders ready for outreach:\n")
for i, lost in enumerate(lost_builders, 1):
name = lost.get('name') or lost.get('username')
platform = lost.get('platform')
lost_score = lost.get('lost_potential_score', 0)
values_score = lost.get('score', 0)
# parse lost signals
lost_signals = lost.get('lost_signals', [])
if isinstance(lost_signals, str):
lost_signals = json_mod.loads(lost_signals) if lost_signals else []
# get signal descriptions
signal_descriptions = get_signal_descriptions(lost_signals)
print(f"{i}. {name} ({platform})")
print(f" lost score: {lost_score} | values score: {values_score}")
print(f" url: {lost.get('url')}")
if signal_descriptions:
print(f" why lost: {', '.join(signal_descriptions[:3])}")
print()
if args.verbose:
print("-" * 60)
print("these people need encouragement, not networking.")
print("the goal: show them someone like them made it.")
print("-" * 60)
def cmd_status(args, db):
"""show database stats"""
import json as json_mod
init_users_table(db.conn)
stats = db.stats()
print("=" * 60)
print("connectd status")
print("=" * 60)
# priority users
users = get_priority_users(db.conn)
print(f"\npriority users: {len(users)}")
for user in users:
print(f" - {user['name']} ({user['email']})")
print(f"\nhumans discovered: {stats['total_humans']}")
print(f" high-score (50+): {stats['high_score_humans']}")
print("\nby platform:")
for platform, count in stats.get('by_platform', {}).items():
print(f" {platform}: {count}")
print(f"\nstranger matches: {stats['total_matches']}")
print(f"intros created: {stats['total_intros']}")
print(f"intros sent: {stats['sent_intros']}")
# lost builder stats
print("\n--- lost builder stats ---")
print(f"active builders: {stats.get('active_builders', 0)}")
print(f"lost builders: {stats.get('lost_builders', 0)}")
print(f"recovering builders: {stats.get('recovering_builders', 0)}")
print(f"high lost score (40+): {stats.get('high_lost_score', 0)}")
print(f"lost outreach sent: {stats.get('lost_outreach_sent', 0)}")
# priority user matches
for user in users:
matches = get_priority_user_matches(db.conn, user['id'])
print(f"\nmatches for {user['name']}: {len(matches)}")
# pending intros
pending = get_pending_intros(db)
print(f"\nintros pending review: {len(pending)}")
def cmd_daemon(args, db):
"""run as continuous daemon"""
from daemon import ConnectDaemon
daemon = ConnectDaemon(dry_run=args.dry_run)
if args.oneshot:
print("running one cycle...")
if args.dry_run:
print("*** DRY RUN MODE - no intros will be sent ***")
daemon.scout_cycle()
daemon.match_priority_users()
daemon.match_strangers()
daemon.send_stranger_intros()
print("done")
else:
daemon.run()
def cmd_user(args, db):
"""manage priority user profile"""
import json as json_mod
init_users_table(db.conn)
if args.setup:
# interactive setup
print("=" * 60)
print("connectd priority user setup")
print("=" * 60)
print("\nlink your profiles so connectd finds matches for YOU\n")
name = input("name: ").strip()
email = input("email: ").strip()
github = input("github username: ").strip() or None
reddit = input("reddit username: ").strip() or None
mastodon = input("mastodon (user@instance): ").strip() or None
location = input("location (e.g. seattle): ").strip() or None
print("\ninterests (comma separated):")
interests_raw = input("> ").strip()
interests = [i.strip() for i in interests_raw.split(',')] if interests_raw else []
looking_for = input("looking for: ").strip() or None
user_data = {
'name': name, 'email': email, 'github': github,
'reddit': reddit, 'mastodon': mastodon,
'location': location, 'interests': interests,
'looking_for': looking_for,
}
user_id = add_priority_user(db.conn, user_data)
print(f"\n✓ added as priority user #{user_id}")
elif args.matches:
# show matches
users = get_priority_users(db.conn)
if not users:
print("no priority user. run: connectd user --setup")
return
for user in users:
print(f"\n=== matches for {user['name']} ===\n")
matches = get_priority_user_matches(db.conn, user['id'], limit=20)
if not matches:
print("no matches yet")
continue
for i, match in enumerate(matches, 1):
print(f"{i}. {match['username']} ({match['platform']})")
print(f" {match['url']}")
print(f" score: {match['overlap_score']:.0f}")
print()
else:
# show profile
users = get_priority_users(db.conn)
if not users:
print("no priority user configured")
print("run: connectd user --setup")
return
for user in users:
print("=" * 60)
print(f"priority user #{user['id']}: {user['name']}")
print("=" * 60)
print(f"email: {user['email']}")
if user['github']:
print(f"github: {user['github']}")
if user['reddit']:
print(f"reddit: {user['reddit']}")
if user['mastodon']:
print(f"mastodon: {user['mastodon']}")
if user['location']:
print(f"location: {user['location']}")
if user['interests']:
interests = json_mod.loads(user['interests']) if isinstance(user['interests'], str) else user['interests']
print(f"interests: {', '.join(interests)}")
if user['looking_for']:
print(f"looking for: {user['looking_for']}")
def cmd_me(args, db):
"""auto-score and auto-match for priority user with optional groq intros"""
import json as json_mod
init_users_table(db.conn)
# get priority user
users = get_priority_users(db.conn)
if not users:
print("no priority user configured")
print("run: connectd user --setup")
return
user = users[0] # first/main user
print("=" * 60)
print(f"connectd me - {user['name']}")
print("=" * 60)
# step 1: scrape github profile
if user.get('github') and not args.skip_scrape:
print(f"\n[1/4] scraping github profile: {user['github']}")
profile = deep_scrape_github_user(user['github'], scrape_commits=False)
if profile:
print(f" repos: {len(profile.get('top_repos', []))}")
print(f" languages: {', '.join(list(profile.get('languages', {}).keys())[:5])}")
else:
print(" failed to scrape (rate limited?)")
profile = None
else:
print("\n[1/4] skipping github scrape (using saved profile)")
# use saved profile if available
saved = user.get('scraped_profile')
if saved:
profile = json_mod.loads(saved) if isinstance(saved, str) else saved
print(f" loaded saved profile: {len(profile.get('top_repos', []))} repos")
else:
profile = None
# step 2: calculate score
print(f"\n[2/4] calculating your score...")
result = score_priority_user(db.conn, user['id'], profile)
if result:
print(f" score: {result['score']}")
print(f" signals: {', '.join(sorted(result['signals'])[:10])}")
# step 3: find matches
print(f"\n[3/4] finding matches...")
matches = auto_match_priority_user(db.conn, user['id'], min_overlap=args.min_overlap)
print(f" found {len(matches)} matches")
# step 4: show results (optionally with groq intros)
print(f"\n[4/4] top matches:")
print("-" * 60)
limit = args.limit or 10
for i, m in enumerate(matches[:limit], 1):
human = m['human']
shared = m['shared']
print(f"\n{i}. {human.get('name') or human['username']} ({human['platform']})")
print(f" {human.get('url', '')}")
print(f" score: {human.get('score', 0):.0f} | overlap: {m['overlap_score']:.0f}")
print(f" location: {human.get('location') or 'unknown'}")
print(f" why: {', '.join(shared[:5])}")
# groq intro draft
if args.groq:
try:
from introd.groq_draft import draft_intro_with_llm
match_data = {
'human_a': {'name': user['name'], 'username': user.get('github'),
'platform': 'github', 'signals': result.get('signals', []) if result else [],
'bio': user.get('bio'), 'location': user.get('location'),
'extra': profile or {}},
'human_b': human,
'overlap_score': m['overlap_score'],
'overlap_reasons': shared,
}
intro, err = draft_intro_with_llm(match_data, recipient='b')
if intro:
print(f"\n --- groq draft ({intro.get('contact_method', 'manual')}) ---")
if intro.get('contact_info'):
print(f" deliver via: {intro['contact_info']}")
for line in intro['draft'].split('\n'):
print(f" {line}")
print(f" ------------------")
elif err:
print(f" [groq error: {err}]")
except Exception as e:
print(f" [groq error: {e}]")
# summary
print("\n" + "=" * 60)
print(f"your score: {result['score'] if result else 'unknown'}")
print(f"matches found: {len(matches)}")
if args.groq:
print("groq intros: enabled")
else:
print("tip: add --groq to generate ai intro drafts")
def main():
parser = argparse.ArgumentParser(
description='connectd - people discovery and matchmaking daemon',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
subparsers = parser.add_subparsers(dest='command', help='commands')
# scout command
scout_parser = subparsers.add_parser('scout', help='discover aligned humans')
scout_parser.add_argument('--github', action='store_true', help='github only')
scout_parser.add_argument('--reddit', action='store_true', help='reddit only')
scout_parser.add_argument('--mastodon', action='store_true', help='mastodon only')
scout_parser.add_argument('--lobsters', action='store_true', help='lobste.rs only')
scout_parser.add_argument('--matrix', action='store_true', help='matrix only')
scout_parser.add_argument('--twitter', action='store_true', help='twitter/x via nitter')
scout_parser.add_argument('--bluesky', action='store_true', help='bluesky/atproto')
scout_parser.add_argument('--lemmy', action='store_true', help='lemmy (fediverse reddit)')
scout_parser.add_argument('--discord', action='store_true', help='discord servers')
scout_parser.add_argument('--deep', action='store_true', help='deep scrape - follow all links')
scout_parser.add_argument('--user', type=str, help='deep scrape specific github user')
scout_parser.add_argument('--lost', action='store_true', help='show lost builder stats')
# match command
match_parser = subparsers.add_parser('match', help='find and rank matches')
match_parser.add_argument('--top', type=int, help='show top N matches')
match_parser.add_argument('--mine', action='store_true', help='show YOUR matches')
match_parser.add_argument('--lost', action='store_true', help='find matches for lost builders')
match_parser.add_argument('--min-score', type=int, default=30, help='min human score')
match_parser.add_argument('--min-overlap', type=int, default=20, help='min overlap score')
# intro command
intro_parser = subparsers.add_parser('intro', help='generate intro drafts')
intro_parser.add_argument('--match', type=int, help='specific match id')
intro_parser.add_argument('--limit', type=int, default=10, help='number of matches')
intro_parser.add_argument('--dry-run', action='store_true', help='preview only, do not save')
intro_parser.add_argument('--lost', action='store_true', help='generate intros for lost builders')
# lost command - show lost builders ready for outreach
lost_parser = subparsers.add_parser('lost', help='show lost builders who need encouragement')
lost_parser.add_argument('--min-score', type=int, default=40, help='min lost score')
lost_parser.add_argument('--limit', type=int, default=50, help='max results')
lost_parser.add_argument('--verbose', '-v', action='store_true', help='show philosophy')
# review command
review_parser = subparsers.add_parser('review', help='review intro queue')
# send command
send_parser = subparsers.add_parser('send', help='send approved intros')
send_parser.add_argument('--export', type=str, help='export to file for manual sending')
# status command
status_parser = subparsers.add_parser('status', help='show stats')
# daemon command
daemon_parser = subparsers.add_parser('daemon', help='run as continuous daemon')
daemon_parser.add_argument('--oneshot', action='store_true', help='run once then exit')
daemon_parser.add_argument('--dry-run', action='store_true', help='preview intros, do not send')
# user command
user_parser = subparsers.add_parser('user', help='manage priority user profile')
user_parser.add_argument('--setup', action='store_true', help='setup/update profile')
user_parser.add_argument('--matches', action='store_true', help='show your matches')
# me command - auto score + match + optional groq intros
me_parser = subparsers.add_parser('me', help='auto-score and match yourself')
me_parser.add_argument('--groq', action='store_true', help='generate groq llama intro drafts')
me_parser.add_argument('--skip-scrape', action='store_true', help='skip github scraping')
me_parser.add_argument('--min-overlap', type=int, default=40, help='min overlap score')
me_parser.add_argument('--limit', type=int, default=10, help='number of matches to show')
args = parser.parse_args()
if not args.command:
parser.print_help()
return
# init database
db = Database()
try:
if args.command == 'scout':
cmd_scout(args, db)
elif args.command == 'match':
cmd_match(args, db)
elif args.command == 'intro':
cmd_intro(args, db)
elif args.command == 'review':
cmd_review(args, db)
elif args.command == 'send':
cmd_send(args, db)
elif args.command == 'status':
cmd_status(args, db)
elif args.command == 'daemon':
cmd_daemon(args, db)
elif args.command == 'user':
cmd_user(args, db)
elif args.command == 'me':
cmd_me(args, db)
elif args.command == 'lost':
cmd_lost(args, db)
finally:
db.close()
if __name__ == '__main__':
main()

124
connectd/config.py Normal file
View file

@ -0,0 +1,124 @@
"""
connectd/config.py - central configuration
all configurable settings in one place.
"""
import os
from pathlib import Path
# base paths
BASE_DIR = Path(__file__).parent
DB_DIR = BASE_DIR / 'db'
DATA_DIR = BASE_DIR / 'data'
CACHE_DIR = DB_DIR / 'cache'
# ensure directories exist
DATA_DIR.mkdir(exist_ok=True)
CACHE_DIR.mkdir(exist_ok=True)
# === DAEMON CONFIG ===
SCOUT_INTERVAL = 3600 * 4 # full scout every 4 hours
MATCH_INTERVAL = 3600 # check matches every hour
INTRO_INTERVAL = 3600 * 2 # send intros every 2 hours
MAX_INTROS_PER_DAY = 20 # rate limit builder-to-builder outreach
# === MATCHING CONFIG ===
MIN_OVERLAP_PRIORITY = 30 # min score for priority user matches
MIN_OVERLAP_STRANGERS = 50 # higher bar for stranger intros
MIN_HUMAN_SCORE = 25 # min values score to be considered
# === LOST BUILDER CONFIG ===
# these people need encouragement, not networking.
# the goal isn't to recruit them - it's to show them the door exists.
LOST_CONFIG = {
# detection thresholds
'min_lost_score': 40, # minimum lost_potential_score
'min_values_score': 20, # must have SOME values alignment
# outreach settings
'enabled': True,
'max_per_day': 5, # lower volume, higher care
'require_review': False, # fully autonomous
'cooldown_days': 90, # don't spam struggling people
# matching settings
'min_builder_score': 50, # inspiring builders must be active
'min_match_overlap': 10, # must have SOME shared interests
# LLM drafting
'use_llm': True,
'llm_temperature': 0.7, # be genuine, not robotic
# message guidelines (for LLM prompt)
'tone': 'genuine, not salesy',
'max_words': 150, # they don't have energy for long messages
'no_pressure': True, # never pushy
'sign_off': '- connectd',
}
# === API CREDENTIALS ===
# all credentials from environment variables - no defaults
GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
GROQ_MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
MASTODON_TOKEN = os.environ.get('MASTODON_TOKEN', '')
MASTODON_INSTANCE = os.environ.get('MASTODON_INSTANCE', '')
BLUESKY_HANDLE = os.environ.get('BLUESKY_HANDLE', '')
BLUESKY_APP_PASSWORD = os.environ.get('BLUESKY_APP_PASSWORD', '')
MATRIX_HOMESERVER = os.environ.get('MATRIX_HOMESERVER', '')
MATRIX_USER_ID = os.environ.get('MATRIX_USER_ID', '')
MATRIX_ACCESS_TOKEN = os.environ.get('MATRIX_ACCESS_TOKEN', '')
DISCORD_BOT_TOKEN = os.environ.get('DISCORD_BOT_TOKEN', '')
DISCORD_TARGET_SERVERS = os.environ.get('DISCORD_TARGET_SERVERS', '')
# lemmy (for authenticated access to private instance)
LEMMY_INSTANCE = os.environ.get('LEMMY_INSTANCE', '')
LEMMY_USERNAME = os.environ.get('LEMMY_USERNAME', '')
LEMMY_PASSWORD = os.environ.get('LEMMY_PASSWORD', '')
# email (for sending intros)
SMTP_HOST = os.environ.get('SMTP_HOST', '')
SMTP_PORT = int(os.environ.get('SMTP_PORT', '465'))
SMTP_USER = os.environ.get('SMTP_USER', '')
SMTP_PASS = os.environ.get('SMTP_PASS', '')
# === HOST USER CONFIG ===
# the person running connectd - gets priority matching
HOST_USER = os.environ.get('HOST_USER', '') # alias like sudoxnym
HOST_NAME = os.environ.get('HOST_NAME', '')
HOST_EMAIL = os.environ.get('HOST_EMAIL', '')
HOST_GITHUB = os.environ.get('HOST_GITHUB', '')
HOST_MASTODON = os.environ.get('HOST_MASTODON', '') # user@instance
HOST_REDDIT = os.environ.get('HOST_REDDIT', '')
HOST_LEMMY = os.environ.get('HOST_LEMMY', '') # user@instance
HOST_LOBSTERS = os.environ.get('HOST_LOBSTERS', '')
HOST_MATRIX = os.environ.get('HOST_MATRIX', '') # @user:server
HOST_DISCORD = os.environ.get('HOST_DISCORD', '') # user id
HOST_BLUESKY = os.environ.get('HOST_BLUESKY', '') # handle.bsky.social
HOST_LOCATION = os.environ.get('HOST_LOCATION', '')
HOST_INTERESTS = os.environ.get('HOST_INTERESTS', '') # comma separated
HOST_LOOKING_FOR = os.environ.get('HOST_LOOKING_FOR', '')
def get_lost_config():
"""get lost builder configuration"""
return LOST_CONFIG.copy()
def update_lost_config(updates):
"""update lost builder configuration"""
global LOST_CONFIG
LOST_CONFIG.update(updates)
return LOST_CONFIG.copy()

72
connectd/config.yaml Normal file
View file

@ -0,0 +1,72 @@
name: connectd
version: "1.1.0"
slug: connectd
description: "find isolated builders with aligned values. auto-discover humans on github, mastodon, lemmy, discord, and more."
url: "https://github.com/sudoxnym/connectd"
arch:
- amd64
- aarch64
- armv7
startup: application
boot: auto
ports:
8099/tcp: 8099
ports_description:
8099/tcp: "connectd API (for HACS integration)"
map:
- config:rw
options:
host_user: ""
host_name: ""
host_email: ""
host_mastodon: ""
host_reddit: ""
host_lemmy: ""
host_lobsters: ""
host_matrix: ""
host_discord: ""
host_bluesky: ""
host_location: ""
host_interests: ""
host_looking_for: ""
github_token: ""
groq_api_key: ""
mastodon_token: ""
mastodon_instance: ""
discord_bot_token: ""
discord_target_servers: ""
lemmy_instance: ""
lemmy_username: ""
lemmy_password: ""
smtp_host: ""
smtp_port: 465
smtp_user: ""
smtp_pass: ""
schema:
host_user: str?
host_name: str?
host_email: email?
host_mastodon: str?
host_reddit: str?
host_lemmy: str?
host_lobsters: str?
host_matrix: str?
host_discord: str?
host_bluesky: str?
host_location: str?
host_interests: str?
host_looking_for: str?
github_token: str?
groq_api_key: str?
mastodon_token: str?
mastodon_instance: str?
discord_bot_token: str?
discord_target_servers: str?
lemmy_instance: str?
lemmy_username: str?
lemmy_password: str?
smtp_host: str?
smtp_port: int?
smtp_user: str?
smtp_pass: str?
image: sudoxreboot/connectd-addon-{arch}

546
connectd/daemon.py Normal file
View file

@ -0,0 +1,546 @@
#!/usr/bin/env python3
"""
connectd daemon - continuous discovery and matchmaking
two modes of operation:
1. priority matching: find matches FOR hosts who run connectd
2. altruistic matching: connect strangers to each other
runs continuously, respects rate limits, sends intros automatically
"""
import time
import json
import signal
import sys
from datetime import datetime, timedelta
from pathlib import Path
from db import Database
from db.users import (init_users_table, get_priority_users, save_priority_match,
get_priority_user_matches, discover_host_user)
from scoutd import scrape_github, scrape_reddit, scrape_mastodon, scrape_lobsters, scrape_lemmy, scrape_discord
from config import HOST_USER
from scoutd.github import analyze_github_user, get_github_user
from scoutd.signals import analyze_text
from matchd.fingerprint import generate_fingerprint, fingerprint_similarity
from matchd.overlap import find_overlap
from matchd.lost import find_matches_for_lost_builders
from introd.draft import draft_intro, summarize_human, summarize_overlap
from introd.lost_intro import draft_lost_intro, get_lost_intro_config
from introd.send import send_email
from introd.deliver import deliver_intro, determine_best_contact
from config import get_lost_config
from api import start_api_thread, update_daemon_state
# daemon config
SCOUT_INTERVAL = 3600 * 4 # full scout every 4 hours
MATCH_INTERVAL = 3600 # check matches every hour
INTRO_INTERVAL = 3600 * 2 # send intros every 2 hours
LOST_INTERVAL = 3600 * 6 # lost builder outreach every 6 hours (lower volume)
MAX_INTROS_PER_DAY = 20 # rate limit outreach
MIN_OVERLAP_PRIORITY = 30 # min score for priority user matches
MIN_OVERLAP_STRANGERS = 50 # higher bar for stranger intros
class ConnectDaemon:
def __init__(self, dry_run=False):
self.db = Database()
init_users_table(self.db.conn)
self.running = True
self.dry_run = dry_run
self.started_at = datetime.now()
self.last_scout = None
self.last_match = None
self.last_intro = None
self.last_lost = None
self.intros_today = 0
self.lost_intros_today = 0
self.today = datetime.now().date()
# handle shutdown gracefully
signal.signal(signal.SIGINT, self._shutdown)
signal.signal(signal.SIGTERM, self._shutdown)
# auto-discover host user from env
if HOST_USER:
self.log(f"HOST_USER set: {HOST_USER}")
discover_host_user(self.db.conn, HOST_USER)
# update API state
self._update_api_state()
def _shutdown(self, signum, frame):
print("\nconnectd: shutting down...")
self.running = False
self._update_api_state()
def _update_api_state(self):
"""update API state for HA integration"""
now = datetime.now()
# calculate countdowns - if no cycle has run, use started_at
def secs_until(last, interval):
base = last if last else self.started_at
next_run = base + timedelta(seconds=interval)
remaining = (next_run - now).total_seconds()
return max(0, int(remaining))
update_daemon_state({
'running': self.running,
'dry_run': self.dry_run,
'last_scout': self.last_scout.isoformat() if self.last_scout else None,
'last_match': self.last_match.isoformat() if self.last_match else None,
'last_intro': self.last_intro.isoformat() if self.last_intro else None,
'last_lost': self.last_lost.isoformat() if self.last_lost else None,
'intros_today': self.intros_today,
'lost_intros_today': self.lost_intros_today,
'started_at': self.started_at.isoformat(),
'countdown_scout': secs_until(self.last_scout, SCOUT_INTERVAL),
'countdown_match': secs_until(self.last_match, MATCH_INTERVAL),
'countdown_intro': secs_until(self.last_intro, INTRO_INTERVAL),
'countdown_lost': secs_until(self.last_lost, LOST_INTERVAL),
})
def log(self, msg):
"""timestamped log"""
print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}")
def reset_daily_limits(self):
"""reset daily intro count"""
if datetime.now().date() != self.today:
self.today = datetime.now().date()
self.intros_today = 0
self.lost_intros_today = 0
self.log("reset daily intro limits")
def scout_cycle(self):
"""run discovery on all platforms"""
self.log("starting scout cycle...")
try:
scrape_github(self.db, limit_per_source=30)
except Exception as e:
self.log(f"github scout error: {e}")
try:
scrape_reddit(self.db, limit_per_sub=30)
except Exception as e:
self.log(f"reddit scout error: {e}")
try:
scrape_mastodon(self.db, limit_per_instance=30)
except Exception as e:
self.log(f"mastodon scout error: {e}")
try:
scrape_lobsters(self.db)
except Exception as e:
self.log(f"lobsters scout error: {e}")
try:
scrape_lemmy(self.db, limit_per_community=30)
except Exception as e:
self.log(f"lemmy scout error: {e}")
try:
scrape_discord(self.db, limit_per_channel=50)
except Exception as e:
self.log(f"discord scout error: {e}")
self.last_scout = datetime.now()
stats = self.db.stats()
self.log(f"scout complete: {stats['total_humans']} humans in db")
def match_priority_users(self):
"""find matches for priority users (hosts)"""
priority_users = get_priority_users(self.db.conn)
if not priority_users:
return
self.log(f"matching for {len(priority_users)} priority users...")
humans = self.db.get_all_humans(min_score=20, limit=500)
for puser in priority_users:
# build priority user's fingerprint from their linked profiles
puser_signals = []
puser_text = []
if puser.get('bio'):
puser_text.append(puser['bio'])
if puser.get('interests'):
interests = json.loads(puser['interests']) if isinstance(puser['interests'], str) else puser['interests']
puser_signals.extend(interests)
if puser.get('looking_for'):
puser_text.append(puser['looking_for'])
# analyze their linked github if available
if puser.get('github'):
gh_user = analyze_github_user(puser['github'])
if gh_user:
puser_signals.extend(gh_user.get('signals', []))
puser_fingerprint = {
'values_vector': {},
'skills': {},
'interests': list(set(puser_signals)),
'location_pref': 'pnw' if puser.get('location') and 'seattle' in puser['location'].lower() else None,
}
# score text
if puser_text:
_, text_signals, _ = analyze_text(' '.join(puser_text))
puser_signals.extend(text_signals)
# find matches
matches_found = 0
for human in humans:
# skip if it's their own profile on another platform
human_user = human.get('username', '').lower()
if puser.get('github') and human_user == puser['github'].lower():
continue
if puser.get('reddit') and human_user == puser['reddit'].lower():
continue
if puser.get('mastodon') and human_user == puser['mastodon'].lower().split('@')[0]:
continue
# calculate overlap
human_signals = human.get('signals', [])
if isinstance(human_signals, str):
human_signals = json.loads(human_signals)
shared = set(puser_signals) & set(human_signals)
overlap_score = len(shared) * 10
# location bonus
if puser.get('location') and human.get('location'):
if 'seattle' in human['location'].lower() or 'pnw' in human['location'].lower():
overlap_score += 20
if overlap_score >= MIN_OVERLAP_PRIORITY:
overlap_data = {
'overlap_score': overlap_score,
'overlap_reasons': [f"shared: {', '.join(list(shared)[:5])}"] if shared else [],
}
save_priority_match(self.db.conn, puser['id'], human['id'], overlap_data)
matches_found += 1
if matches_found:
self.log(f" found {matches_found} matches for {puser['name'] or puser['email']}")
def match_strangers(self):
"""find matches between discovered humans (altruistic)"""
self.log("matching strangers...")
humans = self.db.get_all_humans(min_score=40, limit=200)
if len(humans) < 2:
return
# generate fingerprints
fingerprints = {}
for human in humans:
fp = generate_fingerprint(human)
fingerprints[human['id']] = fp
# find pairs
matches_found = 0
from itertools import combinations
for human_a, human_b in combinations(humans, 2):
# skip same platform same user
if human_a['platform'] == human_b['platform']:
if human_a['username'] == human_b['username']:
continue
fp_a = fingerprints.get(human_a['id'])
fp_b = fingerprints.get(human_b['id'])
overlap = find_overlap(human_a, human_b, fp_a, fp_b)
if overlap['overlap_score'] >= MIN_OVERLAP_STRANGERS:
# save match
self.db.save_match(human_a['id'], human_b['id'], overlap)
matches_found += 1
if matches_found:
self.log(f"found {matches_found} stranger matches")
self.last_match = datetime.now()
def send_stranger_intros(self):
"""send intros to connect strangers (or preview in dry-run mode)"""
self.reset_daily_limits()
if not self.dry_run and self.intros_today >= MAX_INTROS_PER_DAY:
self.log("daily intro limit reached")
return
# get unsent matches
c = self.db.conn.cursor()
c.execute('''SELECT m.*,
ha.id as a_id, ha.username as a_user, ha.platform as a_platform,
ha.name as a_name, ha.url as a_url, ha.contact as a_contact,
ha.signals as a_signals, ha.extra as a_extra,
hb.id as b_id, hb.username as b_user, hb.platform as b_platform,
hb.name as b_name, hb.url as b_url, hb.contact as b_contact,
hb.signals as b_signals, hb.extra as b_extra
FROM matches m
JOIN humans ha ON m.human_a_id = ha.id
JOIN humans hb ON m.human_b_id = hb.id
WHERE m.status = 'pending'
ORDER BY m.overlap_score DESC
LIMIT 10''')
matches = c.fetchall()
if self.dry_run:
self.log(f"DRY RUN: previewing {len(matches)} potential intros")
for match in matches:
if not self.dry_run and self.intros_today >= MAX_INTROS_PER_DAY:
break
match = dict(match)
# build human dicts
human_a = {
'id': match['a_id'],
'username': match['a_user'],
'platform': match['a_platform'],
'name': match['a_name'],
'url': match['a_url'],
'contact': match['a_contact'],
'signals': match['a_signals'],
'extra': match['a_extra'],
}
human_b = {
'id': match['b_id'],
'username': match['b_user'],
'platform': match['b_platform'],
'name': match['b_name'],
'url': match['b_url'],
'contact': match['b_contact'],
'signals': match['b_signals'],
'extra': match['b_extra'],
}
match_data = {
'id': match['id'],
'human_a': human_a,
'human_b': human_b,
'overlap_score': match['overlap_score'],
'overlap_reasons': match['overlap_reasons'],
}
# try to send intro to person with email
for recipient, other in [(human_a, human_b), (human_b, human_a)]:
contact = recipient.get('contact', {})
if isinstance(contact, str):
contact = json.loads(contact)
email = contact.get('email')
if not email:
continue
# draft intro
intro = draft_intro(match_data, recipient='a' if recipient == human_a else 'b')
# parse overlap reasons for display
reasons = match['overlap_reasons']
if isinstance(reasons, str):
reasons = json.loads(reasons)
reason_summary = ', '.join(reasons[:3]) if reasons else 'aligned values'
if self.dry_run:
# print preview
print("\n" + "=" * 60)
print(f"TO: {recipient['username']} ({recipient['platform']})")
print(f"EMAIL: {email}")
print(f"SUBJECT: you might want to meet {other['username']}")
print(f"SCORE: {match['overlap_score']:.0f} ({reason_summary})")
print("-" * 60)
print("MESSAGE:")
print(intro['draft'])
print("-" * 60)
print("[DRY RUN - NOT SENT]")
print("=" * 60)
break
else:
# actually send
success, error = send_email(
email,
f"connectd: you might want to meet {other['username']}",
intro['draft']
)
if success:
self.log(f"sent intro to {recipient['username']} ({email})")
self.intros_today += 1
# mark match as intro_sent
c.execute('UPDATE matches SET status = "intro_sent" WHERE id = ?',
(match['id'],))
self.db.conn.commit()
break
else:
self.log(f"failed to send to {email}: {error}")
self.last_intro = datetime.now()
def send_lost_builder_intros(self):
"""
reach out to lost builders - different tone, lower volume.
these people need encouragement, not networking.
"""
self.reset_daily_limits()
lost_config = get_lost_config()
if not lost_config.get('enabled', True):
return
max_per_day = lost_config.get('max_per_day', 5)
if not self.dry_run and self.lost_intros_today >= max_per_day:
self.log("daily lost builder intro limit reached")
return
# find lost builders with matching active builders
matches, error = find_matches_for_lost_builders(
self.db,
min_lost_score=lost_config.get('min_lost_score', 40),
min_values_score=lost_config.get('min_values_score', 20),
limit=max_per_day - self.lost_intros_today
)
if error:
self.log(f"lost builder matching error: {error}")
return
if not matches:
self.log("no lost builders ready for outreach")
return
if self.dry_run:
self.log(f"DRY RUN: previewing {len(matches)} lost builder intros")
for match in matches:
if not self.dry_run and self.lost_intros_today >= max_per_day:
break
lost = match['lost_user']
builder = match['inspiring_builder']
lost_name = lost.get('name') or lost.get('username')
builder_name = builder.get('name') or builder.get('username')
# draft intro
draft, draft_error = draft_lost_intro(lost, builder, lost_config)
if draft_error:
self.log(f"error drafting lost intro for {lost_name}: {draft_error}")
continue
# determine best contact method (activity-based)
method, contact_info = determine_best_contact(lost)
if self.dry_run:
print("\n" + "=" * 60)
print("LOST BUILDER OUTREACH")
print("=" * 60)
print(f"TO: {lost_name} ({lost.get('platform')})")
print(f"DELIVERY: {method}{contact_info}")
print(f"LOST SCORE: {lost.get('lost_potential_score', 0)}")
print(f"VALUES SCORE: {lost.get('score', 0)}")
print(f"INSPIRING BUILDER: {builder_name}")
print(f"SHARED INTERESTS: {', '.join(match.get('shared_interests', []))}")
print("-" * 60)
print("MESSAGE:")
print(draft)
print("-" * 60)
print("[DRY RUN - NOT SENT]")
print("=" * 60)
else:
# build match data for unified delivery
match_data = {
'human_a': builder, # inspiring builder
'human_b': lost, # lost builder (recipient)
'overlap_score': match.get('match_score', 0),
'overlap_reasons': match.get('shared_interests', []),
}
success, error, delivery_method = deliver_intro(match_data, draft)
if success:
self.log(f"sent lost builder intro to {lost_name} via {delivery_method}")
self.lost_intros_today += 1
self.db.mark_lost_outreach(lost['id'])
else:
self.log(f"failed to reach {lost_name} via {delivery_method}: {error}")
self.last_lost = datetime.now()
self.log(f"lost builder cycle complete: {self.lost_intros_today} sent today")
def run(self):
"""main daemon loop"""
self.log("connectd daemon starting...")
# start API server
start_api_thread()
self.log("api server started on port 8099")
if self.dry_run:
self.log("*** DRY RUN MODE - no intros will be sent ***")
self.log(f"scout interval: {SCOUT_INTERVAL}s")
self.log(f"match interval: {MATCH_INTERVAL}s")
self.log(f"intro interval: {INTRO_INTERVAL}s")
self.log(f"lost interval: {LOST_INTERVAL}s")
self.log(f"max intros/day: {MAX_INTROS_PER_DAY}")
# initial scout
self.scout_cycle()
self._update_api_state()
while self.running:
now = datetime.now()
# scout cycle
if not self.last_scout or (now - self.last_scout).seconds >= SCOUT_INTERVAL:
self.scout_cycle()
self._update_api_state()
# match cycle
if not self.last_match or (now - self.last_match).seconds >= MATCH_INTERVAL:
self.match_priority_users()
self.match_strangers()
self._update_api_state()
# intro cycle
if not self.last_intro or (now - self.last_intro).seconds >= INTRO_INTERVAL:
self.send_stranger_intros()
self._update_api_state()
# lost builder cycle
if not self.last_lost or (now - self.last_lost).seconds >= LOST_INTERVAL:
self.send_lost_builder_intros()
self._update_api_state()
# sleep between checks
time.sleep(60)
self.log("connectd daemon stopped")
self.db.close()
def run_daemon(dry_run=False):
"""entry point"""
daemon = ConnectDaemon(dry_run=dry_run)
daemon.run()
if __name__ == '__main__':
import sys
dry_run = '--dry-run' in sys.argv
run_daemon(dry_run=dry_run)

375
connectd/db/__init__.py Normal file
View file

@ -0,0 +1,375 @@
"""
connectd database layer
sqlite storage for humans, fingerprints, matches, intros
"""
import os
import sqlite3
import json
from datetime import datetime
from pathlib import Path
# use env var for DB path (docker) or default to local
DB_PATH = Path(os.environ.get('DB_PATH', Path(__file__).parent / 'connectd.db'))
class Database:
def __init__(self, path=None):
self.path = path or DB_PATH
self.conn = sqlite3.connect(self.path)
self.conn.row_factory = sqlite3.Row
self._init_tables()
def _init_tables(self):
c = self.conn.cursor()
# humans table - all discovered people
c.execute('''CREATE TABLE IF NOT EXISTS humans (
id INTEGER PRIMARY KEY,
platform TEXT NOT NULL,
username TEXT NOT NULL,
url TEXT,
name TEXT,
bio TEXT,
location TEXT,
score REAL DEFAULT 0,
confidence REAL DEFAULT 0,
signals TEXT,
negative_signals TEXT,
reasons TEXT,
contact TEXT,
extra TEXT,
fingerprint_id INTEGER,
scraped_at TEXT,
updated_at TEXT,
lost_potential_score REAL DEFAULT 0,
lost_signals TEXT,
user_type TEXT DEFAULT 'none',
last_lost_outreach TEXT,
UNIQUE(platform, username)
)''')
# migration: add new columns if they don't exist
try:
c.execute('ALTER TABLE humans ADD COLUMN lost_potential_score REAL DEFAULT 0')
except sqlite3.OperationalError:
pass # column exists
try:
c.execute('ALTER TABLE humans ADD COLUMN lost_signals TEXT')
except sqlite3.OperationalError:
pass
try:
c.execute('ALTER TABLE humans ADD COLUMN user_type TEXT DEFAULT "none"')
except sqlite3.OperationalError:
pass
try:
c.execute('ALTER TABLE humans ADD COLUMN last_lost_outreach TEXT')
except sqlite3.OperationalError:
pass
# fingerprints table - values profiles
c.execute('''CREATE TABLE IF NOT EXISTS fingerprints (
id INTEGER PRIMARY KEY,
human_id INTEGER,
values_vector TEXT,
skills TEXT,
interests TEXT,
location_pref TEXT,
availability TEXT,
generated_at TEXT,
FOREIGN KEY(human_id) REFERENCES humans(id)
)''')
# matches table - paired humans
c.execute('''CREATE TABLE IF NOT EXISTS matches (
id INTEGER PRIMARY KEY,
human_a_id INTEGER,
human_b_id INTEGER,
overlap_score REAL,
overlap_reasons TEXT,
complementary_skills TEXT,
geographic_match INTEGER,
status TEXT DEFAULT 'pending',
created_at TEXT,
reviewed_at TEXT,
FOREIGN KEY(human_a_id) REFERENCES humans(id),
FOREIGN KEY(human_b_id) REFERENCES humans(id),
UNIQUE(human_a_id, human_b_id)
)''')
# intros table - outreach attempts
c.execute('''CREATE TABLE IF NOT EXISTS intros (
id INTEGER PRIMARY KEY,
match_id INTEGER,
recipient_human_id INTEGER,
channel TEXT,
draft TEXT,
status TEXT DEFAULT 'draft',
approved_by TEXT,
approved_at TEXT,
sent_at TEXT,
response TEXT,
response_at TEXT,
FOREIGN KEY(match_id) REFERENCES matches(id),
FOREIGN KEY(recipient_human_id) REFERENCES humans(id)
)''')
# cross-platform links
c.execute('''CREATE TABLE IF NOT EXISTS cross_platform (
id INTEGER PRIMARY KEY,
human_a_id INTEGER,
human_b_id INTEGER,
confidence REAL,
reason TEXT,
FOREIGN KEY(human_a_id) REFERENCES humans(id),
FOREIGN KEY(human_b_id) REFERENCES humans(id),
UNIQUE(human_a_id, human_b_id)
)''')
self.conn.commit()
def save_human(self, data):
"""save or update a human record"""
c = self.conn.cursor()
# fields to exclude from extra json
exclude_fields = ['platform', 'username', 'url', 'name', 'bio',
'location', 'score', 'confidence', 'signals',
'negative_signals', 'reasons', 'contact',
'lost_potential_score', 'lost_signals', 'user_type']
c.execute('''INSERT OR REPLACE INTO humans
(platform, username, url, name, bio, location, score, confidence,
signals, negative_signals, reasons, contact, extra, scraped_at, updated_at,
lost_potential_score, lost_signals, user_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
(data.get('platform'),
data.get('username'),
data.get('url'),
data.get('name'),
data.get('bio'),
data.get('location'),
data.get('score', 0),
data.get('confidence', 0),
json.dumps(data.get('signals', [])),
json.dumps(data.get('negative_signals', [])),
json.dumps(data.get('reasons', [])),
json.dumps(data.get('contact', {})),
json.dumps({k: v for k, v in data.items() if k not in exclude_fields}),
data.get('scraped_at', datetime.now().isoformat()),
datetime.now().isoformat(),
data.get('lost_potential_score', 0),
json.dumps(data.get('lost_signals', [])),
data.get('user_type', 'none')))
self.conn.commit()
return c.lastrowid
def get_human(self, platform, username):
"""get a human by platform and username"""
c = self.conn.cursor()
c.execute('SELECT * FROM humans WHERE platform = ? AND username = ?',
(platform, username))
row = c.fetchone()
return dict(row) if row else None
def get_human_by_id(self, human_id):
"""get a human by id"""
c = self.conn.cursor()
c.execute('SELECT * FROM humans WHERE id = ?', (human_id,))
row = c.fetchone()
return dict(row) if row else None
def get_all_humans(self, min_score=0, limit=1000):
"""get all humans above score threshold"""
c = self.conn.cursor()
c.execute('''SELECT * FROM humans
WHERE score >= ?
ORDER BY score DESC, confidence DESC
LIMIT ?''', (min_score, limit))
return [dict(row) for row in c.fetchall()]
def get_humans_by_platform(self, platform, min_score=0, limit=500):
"""get humans for a specific platform"""
c = self.conn.cursor()
c.execute('''SELECT * FROM humans
WHERE platform = ? AND score >= ?
ORDER BY score DESC
LIMIT ?''', (platform, min_score, limit))
return [dict(row) for row in c.fetchall()]
def get_lost_builders(self, min_lost_score=40, min_values_score=20, limit=100):
"""get lost builders who need encouragement"""
c = self.conn.cursor()
c.execute('''SELECT * FROM humans
WHERE user_type = 'lost' OR user_type = 'both'
AND lost_potential_score >= ?
AND score >= ?
ORDER BY lost_potential_score DESC, score DESC
LIMIT ?''', (min_lost_score, min_values_score, limit))
return [dict(row) for row in c.fetchall()]
def get_lost_builders_for_outreach(self, min_lost_score=40, min_values_score=20,
cooldown_days=90, limit=50):
"""get lost builders who are ready for outreach (respecting cooldown)"""
c = self.conn.cursor()
c.execute('''SELECT * FROM humans
WHERE (user_type = 'lost' OR user_type = 'both')
AND lost_potential_score >= ?
AND score >= ?
AND (last_lost_outreach IS NULL
OR datetime(last_lost_outreach) < datetime('now', '-' || ? || ' days'))
ORDER BY lost_potential_score DESC, score DESC
LIMIT ?''', (min_lost_score, min_values_score, cooldown_days, limit))
return [dict(row) for row in c.fetchall()]
def get_active_builders(self, min_score=50, limit=100):
"""get active builders who can inspire lost builders"""
c = self.conn.cursor()
c.execute('''SELECT * FROM humans
WHERE user_type = 'builder'
AND score >= ?
ORDER BY score DESC, confidence DESC
LIMIT ?''', (min_score, limit))
return [dict(row) for row in c.fetchall()]
def mark_lost_outreach(self, human_id):
"""mark that we reached out to a lost builder"""
c = self.conn.cursor()
c.execute('''UPDATE humans SET last_lost_outreach = ? WHERE id = ?''',
(datetime.now().isoformat(), human_id))
self.conn.commit()
def save_fingerprint(self, human_id, fingerprint_data):
"""save a fingerprint for a human"""
c = self.conn.cursor()
c.execute('''INSERT OR REPLACE INTO fingerprints
(human_id, values_vector, skills, interests, location_pref, availability, generated_at)
VALUES (?, ?, ?, ?, ?, ?, ?)''',
(human_id,
json.dumps(fingerprint_data.get('values_vector', {})),
json.dumps(fingerprint_data.get('skills', [])),
json.dumps(fingerprint_data.get('interests', [])),
fingerprint_data.get('location_pref'),
fingerprint_data.get('availability'),
datetime.now().isoformat()))
# update human's fingerprint_id
c.execute('UPDATE humans SET fingerprint_id = ? WHERE id = ?',
(c.lastrowid, human_id))
self.conn.commit()
return c.lastrowid
def get_fingerprint(self, human_id):
"""get fingerprint for a human"""
c = self.conn.cursor()
c.execute('SELECT * FROM fingerprints WHERE human_id = ?', (human_id,))
row = c.fetchone()
return dict(row) if row else None
def save_match(self, human_a_id, human_b_id, match_data):
"""save a match between two humans"""
c = self.conn.cursor()
c.execute('''INSERT OR REPLACE INTO matches
(human_a_id, human_b_id, overlap_score, overlap_reasons,
complementary_skills, geographic_match, status, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)''',
(human_a_id, human_b_id,
match_data.get('overlap_score', 0),
json.dumps(match_data.get('overlap_reasons', [])),
json.dumps(match_data.get('complementary_skills', [])),
1 if match_data.get('geographic_match') else 0,
'pending',
datetime.now().isoformat()))
self.conn.commit()
return c.lastrowid
def get_matches(self, status=None, limit=100):
"""get matches, optionally filtered by status"""
c = self.conn.cursor()
if status:
c.execute('''SELECT * FROM matches WHERE status = ?
ORDER BY overlap_score DESC LIMIT ?''', (status, limit))
else:
c.execute('''SELECT * FROM matches
ORDER BY overlap_score DESC LIMIT ?''', (limit,))
return [dict(row) for row in c.fetchall()]
def save_intro(self, match_id, recipient_id, channel, draft):
"""save an intro draft"""
c = self.conn.cursor()
c.execute('''INSERT INTO intros
(match_id, recipient_human_id, channel, draft, status)
VALUES (?, ?, ?, ?, 'draft')''',
(match_id, recipient_id, channel, draft))
self.conn.commit()
return c.lastrowid
def get_pending_intros(self, limit=50):
"""get intros pending approval"""
c = self.conn.cursor()
c.execute('''SELECT * FROM intros WHERE status = 'draft'
ORDER BY id DESC LIMIT ?''', (limit,))
return [dict(row) for row in c.fetchall()]
def approve_intro(self, intro_id, approved_by='human'):
"""approve an intro for sending"""
c = self.conn.cursor()
c.execute('''UPDATE intros SET status = 'approved',
approved_by = ?, approved_at = ? WHERE id = ?''',
(approved_by, datetime.now().isoformat(), intro_id))
self.conn.commit()
def mark_intro_sent(self, intro_id):
"""mark an intro as sent"""
c = self.conn.cursor()
c.execute('''UPDATE intros SET status = 'sent', sent_at = ? WHERE id = ?''',
(datetime.now().isoformat(), intro_id))
self.conn.commit()
def stats(self):
"""get database statistics"""
c = self.conn.cursor()
stats = {}
c.execute('SELECT COUNT(*) FROM humans')
stats['total_humans'] = c.fetchone()[0]
c.execute('SELECT platform, COUNT(*) FROM humans GROUP BY platform')
stats['by_platform'] = {row[0]: row[1] for row in c.fetchall()}
c.execute('SELECT COUNT(*) FROM humans WHERE score >= 50')
stats['high_score_humans'] = c.fetchone()[0]
c.execute('SELECT COUNT(*) FROM matches')
stats['total_matches'] = c.fetchone()[0]
c.execute('SELECT COUNT(*) FROM intros')
stats['total_intros'] = c.fetchone()[0]
c.execute('SELECT COUNT(*) FROM intros WHERE status = "sent"')
stats['sent_intros'] = c.fetchone()[0]
# lost builder stats
c.execute("SELECT COUNT(*) FROM humans WHERE user_type = 'builder'")
stats['active_builders'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM humans WHERE user_type = 'lost'")
stats['lost_builders'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM humans WHERE user_type = 'both'")
stats['recovering_builders'] = c.fetchone()[0]
c.execute('SELECT COUNT(*) FROM humans WHERE lost_potential_score >= 40')
stats['high_lost_score'] = c.fetchone()[0]
c.execute('SELECT COUNT(*) FROM humans WHERE last_lost_outreach IS NOT NULL')
stats['lost_outreach_sent'] = c.fetchone()[0]
return stats
def close(self):
self.conn.close()

510
connectd/db/users.py Normal file
View file

@ -0,0 +1,510 @@
"""
priority users - people who host connectd get direct matching
"""
import sqlite3
import json
from datetime import datetime
from pathlib import Path
DB_PATH = Path(__file__).parent / 'connectd.db'
# map user-friendly interests to signal terms
INTEREST_TO_SIGNALS = {
'self-hosting': ['selfhosted', 'home_automation'],
'home-assistant': ['home_automation'],
'intentional-community': ['community', 'cooperative'],
'cooperatives': ['cooperative', 'community'],
'solarpunk': ['solarpunk'],
'privacy': ['privacy', 'local_first'],
'local-first': ['local_first', 'privacy'],
'queer-friendly': ['queer'],
'anti-capitalism': ['cooperative', 'decentralized', 'community'],
'esports-venue': [],
'foss': ['foss'],
'decentralized': ['decentralized'],
'federated': ['federated_chat'],
'mesh': ['mesh'],
}
def init_users_table(conn):
"""create priority users table"""
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS priority_users (
id INTEGER PRIMARY KEY,
name TEXT,
email TEXT UNIQUE,
github TEXT,
reddit TEXT,
mastodon TEXT,
lobsters TEXT,
matrix TEXT,
lemmy TEXT,
discord TEXT,
bluesky TEXT,
location TEXT,
bio TEXT,
interests TEXT,
looking_for TEXT,
created_at TEXT,
active INTEGER DEFAULT 1,
score REAL DEFAULT 0,
signals TEXT,
scraped_profile TEXT,
last_scored_at TEXT
)''')
# add missing columns to existing table
for col in ['lemmy', 'discord', 'bluesky']:
try:
c.execute(f'ALTER TABLE priority_users ADD COLUMN {col} TEXT')
except:
pass # column already exists
# matches specifically for priority users
c.execute('''CREATE TABLE IF NOT EXISTS priority_matches (
id INTEGER PRIMARY KEY,
priority_user_id INTEGER,
matched_human_id INTEGER,
overlap_score REAL,
overlap_reasons TEXT,
status TEXT DEFAULT 'new',
notified_at TEXT,
viewed_at TEXT,
FOREIGN KEY(priority_user_id) REFERENCES priority_users(id),
FOREIGN KEY(matched_human_id) REFERENCES humans(id)
)''')
conn.commit()
def add_priority_user(conn, user_data):
"""add a priority user (someone hosting connectd)"""
c = conn.cursor()
c.execute('''INSERT OR REPLACE INTO priority_users
(name, email, github, reddit, mastodon, lobsters, matrix, lemmy, discord, bluesky,
location, bio, interests, looking_for, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
(user_data.get('name'),
user_data.get('email'),
user_data.get('github'),
user_data.get('reddit'),
user_data.get('mastodon'),
user_data.get('lobsters'),
user_data.get('matrix'),
user_data.get('lemmy'),
user_data.get('discord'),
user_data.get('bluesky'),
user_data.get('location'),
user_data.get('bio'),
json.dumps(user_data.get('interests', [])),
user_data.get('looking_for'),
datetime.now().isoformat()))
conn.commit()
return c.lastrowid
def get_priority_users(conn):
"""get all active priority users"""
c = conn.cursor()
c.execute('SELECT * FROM priority_users WHERE active = 1')
return [dict(row) for row in c.fetchall()]
def get_priority_user(conn, user_id):
"""get a specific priority user"""
c = conn.cursor()
c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
row = c.fetchone()
return dict(row) if row else None
def save_priority_match(conn, priority_user_id, human_id, overlap_data):
"""save a match for a priority user"""
c = conn.cursor()
c.execute('''INSERT OR IGNORE INTO priority_matches
(priority_user_id, matched_human_id, overlap_score, overlap_reasons, status)
VALUES (?, ?, ?, ?, 'new')''',
(priority_user_id, human_id,
overlap_data.get('overlap_score', 0),
json.dumps(overlap_data.get('overlap_reasons', []))))
conn.commit()
return c.lastrowid
def get_priority_user_matches(conn, priority_user_id, status=None, limit=50):
"""get matches for a priority user"""
c = conn.cursor()
if status:
c.execute('''SELECT pm.*, h.* FROM priority_matches pm
JOIN humans h ON pm.matched_human_id = h.id
WHERE pm.priority_user_id = ? AND pm.status = ?
ORDER BY pm.overlap_score DESC
LIMIT ?''', (priority_user_id, status, limit))
else:
c.execute('''SELECT pm.*, h.* FROM priority_matches pm
JOIN humans h ON pm.matched_human_id = h.id
WHERE pm.priority_user_id = ?
ORDER BY pm.overlap_score DESC
LIMIT ?''', (priority_user_id, limit))
return [dict(row) for row in c.fetchall()]
def mark_match_viewed(conn, match_id):
"""mark a priority match as viewed"""
c = conn.cursor()
c.execute('''UPDATE priority_matches SET status = 'viewed', viewed_at = ?
WHERE id = ?''', (datetime.now().isoformat(), match_id))
conn.commit()
def expand_interests_to_signals(interests):
"""expand user-friendly interests to signal terms"""
signals = set()
for interest in interests:
interest_lower = interest.lower().strip()
if interest_lower in INTEREST_TO_SIGNALS:
signals.update(INTEREST_TO_SIGNALS[interest_lower])
else:
signals.add(interest_lower)
# always add these aligned signals for priority users
signals.update(['foss', 'decentralized', 'federated_chat', 'containers', 'unix', 'selfhosted'])
return list(signals)
def score_priority_user(conn, user_id, scraped_profile=None):
"""
calculate a score for a priority user based on:
- their stated interests
- their scraped github profile (if available)
- their repos and activity
"""
c = conn.cursor()
c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
row = c.fetchone()
if not row:
return None
user = dict(row)
score = 0
signals = set()
# 1. score from stated interests
interests = user.get('interests')
if isinstance(interests, str):
interests = json.loads(interests) if interests else []
for interest in interests:
interest_lower = interest.lower()
# high-value interests
if 'solarpunk' in interest_lower:
score += 30
signals.add('solarpunk')
if 'queer' in interest_lower:
score += 30
signals.add('queer')
if 'cooperative' in interest_lower or 'intentional' in interest_lower:
score += 20
signals.add('cooperative')
if 'privacy' in interest_lower:
score += 10
signals.add('privacy')
if 'self-host' in interest_lower or 'selfhost' in interest_lower:
score += 15
signals.add('selfhosted')
if 'home-assistant' in interest_lower:
score += 15
signals.add('home_automation')
if 'foss' in interest_lower or 'open source' in interest_lower:
score += 10
signals.add('foss')
# 2. score from scraped profile
if scraped_profile:
# repos
repos = scraped_profile.get('top_repos', [])
if len(repos) >= 20:
score += 20
elif len(repos) >= 10:
score += 10
elif len(repos) >= 5:
score += 5
# languages
languages = scraped_profile.get('languages', {})
if 'Python' in languages or 'Rust' in languages:
score += 5
signals.add('modern_lang')
# topics from repos
topics = scraped_profile.get('topics', [])
for topic in topics:
if topic in ['self-hosted', 'home-assistant', 'privacy', 'foss']:
score += 10
signals.add(topic.replace('-', '_'))
# followers
followers = scraped_profile.get('followers', 0)
if followers >= 100:
score += 15
elif followers >= 50:
score += 10
elif followers >= 10:
score += 5
# 3. add expanded signals
expanded = expand_interests_to_signals(interests)
signals.update(expanded)
# update user
c.execute('''UPDATE priority_users
SET score = ?, signals = ?, scraped_profile = ?, last_scored_at = ?
WHERE id = ?''',
(score, json.dumps(list(signals)), json.dumps(scraped_profile) if scraped_profile else None,
datetime.now().isoformat(), user_id))
conn.commit()
return {'score': score, 'signals': list(signals)}
def auto_match_priority_user(conn, user_id, min_overlap=40):
"""
automatically find and save matches for a priority user
uses relationship filtering to skip already-connected people
"""
from scoutd.deep import check_already_connected
c = conn.cursor()
# get user
c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
row = c.fetchone()
if not row:
return []
user = dict(row)
# get user signals
user_signals = set()
if user.get('signals'):
signals = json.loads(user['signals']) if isinstance(user['signals'], str) else user['signals']
user_signals.update(signals)
# also expand interests
if user.get('interests'):
interests = json.loads(user['interests']) if isinstance(user['interests'], str) else user['interests']
user_signals.update(expand_interests_to_signals(interests))
# clear old matches
c.execute('DELETE FROM priority_matches WHERE priority_user_id = ?', (user_id,))
conn.commit()
# get all humans
c.execute('SELECT * FROM humans WHERE score >= 25')
columns = [d[0] for d in c.description]
matches = []
for row in c.fetchall():
human = dict(zip(columns, row))
# skip own profiles
username = (human.get('username') or '').lower()
if user.get('github') and username == user['github'].lower():
continue
if user.get('reddit') and username == user.get('reddit', '').lower():
continue
# check if already connected
user_human = {'username': user.get('github'), 'platform': 'github', 'extra': {}}
connected, reason = check_already_connected(user_human, human)
if connected:
continue
# get human signals
human_signals = human.get('signals', [])
if isinstance(human_signals, str):
human_signals = json.loads(human_signals) if human_signals else []
# calculate overlap
shared = user_signals & set(human_signals)
overlap_score = len(shared) * 10
# high-value bonuses
if 'queer' in human_signals:
overlap_score += 40
shared.add('queer (rare!)')
if 'solarpunk' in human_signals:
overlap_score += 30
shared.add('solarpunk (rare!)')
if 'cooperative' in human_signals:
overlap_score += 20
shared.add('cooperative (values)')
# location bonus
location = (human.get('location') or '').lower()
user_location = (user.get('location') or '').lower()
if user_location and location:
if any(x in location for x in ['seattle', 'portland', 'pnw', 'washington', 'oregon']):
if 'seattle' in user_location or 'pnw' in user_location:
overlap_score += 25
shared.add('PNW location!')
if overlap_score >= min_overlap:
matches.append({
'human': human,
'overlap_score': overlap_score,
'shared': list(shared),
})
# sort and save top matches
matches.sort(key=lambda x: x['overlap_score'], reverse=True)
for m in matches[:50]: # save top 50
save_priority_match(conn, user_id, m['human']['id'], {
'overlap_score': m['overlap_score'],
'overlap_reasons': m['shared'],
})
return matches
def update_priority_user_profile(conn, user_id, profile_data):
"""update a priority user's profile with new data"""
c = conn.cursor()
updates = []
values = []
for field in ['name', 'email', 'github', 'reddit', 'mastodon', 'lobsters',
'matrix', 'lemmy', 'discord', 'bluesky', 'location', 'bio', 'looking_for']:
if field in profile_data and profile_data[field]:
updates.append(f'{field} = ?')
values.append(profile_data[field])
if 'interests' in profile_data:
updates.append('interests = ?')
values.append(json.dumps(profile_data['interests']))
if updates:
values.append(user_id)
c.execute(f'''UPDATE priority_users SET {', '.join(updates)} WHERE id = ?''', values)
conn.commit()
return True
def discover_host_user(conn, alias):
"""
auto-discover a host user by their alias (username).
scrapes github and discovers all connected social handles.
also merges in HOST_ env vars from config for manual overrides.
returns the priority user id
"""
from scoutd.github import analyze_github_user
from config import (HOST_NAME, HOST_EMAIL, HOST_GITHUB, HOST_MASTODON,
HOST_REDDIT, HOST_LEMMY, HOST_LOBSTERS, HOST_MATRIX,
HOST_DISCORD, HOST_BLUESKY, HOST_LOCATION, HOST_INTERESTS, HOST_LOOKING_FOR)
print(f"connectd: discovering host user '{alias}'...")
# scrape github for full profile
profile = analyze_github_user(alias)
if not profile:
print(f" could not find github user '{alias}'")
# still create from env vars if no github found
profile = {'name': HOST_NAME or alias, 'bio': '', 'location': HOST_LOCATION,
'contact': {}, 'extra': {'handles': {}}, 'topics': [], 'signals': []}
print(f" found: {profile.get('name')} ({alias})")
print(f" score: {profile.get('score', 0)}, signals: {len(profile.get('signals', []))}")
# extract contact info
contact = profile.get('contact', {})
handles = profile.get('extra', {}).get('handles', {})
# merge in HOST_ env vars (override discovered values)
if HOST_MASTODON:
handles['mastodon'] = HOST_MASTODON
if HOST_REDDIT:
handles['reddit'] = HOST_REDDIT
if HOST_LEMMY:
handles['lemmy'] = HOST_LEMMY
if HOST_LOBSTERS:
handles['lobsters'] = HOST_LOBSTERS
if HOST_MATRIX:
handles['matrix'] = HOST_MATRIX
if HOST_DISCORD:
handles['discord'] = HOST_DISCORD
if HOST_BLUESKY:
handles['bluesky'] = HOST_BLUESKY
# check if user already exists
c = conn.cursor()
c.execute('SELECT id FROM priority_users WHERE github = ?', (alias,))
existing = c.fetchone()
# parse HOST_INTERESTS if provided
interests = profile.get('topics', [])
if HOST_INTERESTS:
interests = [i.strip() for i in HOST_INTERESTS.split(',') if i.strip()]
user_data = {
'name': HOST_NAME or profile.get('name') or alias,
'email': HOST_EMAIL or contact.get('email'),
'github': HOST_GITHUB or alias,
'reddit': handles.get('reddit'),
'mastodon': handles.get('mastodon') or contact.get('mastodon'),
'lobsters': handles.get('lobsters'),
'matrix': handles.get('matrix') or contact.get('matrix'),
'lemmy': handles.get('lemmy') or contact.get('lemmy'),
'discord': handles.get('discord'),
'bluesky': handles.get('bluesky') or contact.get('bluesky'),
'location': HOST_LOCATION or profile.get('location'),
'bio': profile.get('bio'),
'interests': interests,
'looking_for': HOST_LOOKING_FOR,
}
if existing:
# update existing user
user_id = existing['id']
update_priority_user_profile(conn, user_id, user_data)
print(f" updated existing priority user (id={user_id})")
else:
# create new user
user_id = add_priority_user(conn, user_data)
print(f" created new priority user (id={user_id})")
# score the user
scraped_profile = {
'top_repos': profile.get('extra', {}).get('top_repos', []),
'languages': profile.get('languages', {}),
'topics': profile.get('topics', []),
'followers': profile.get('extra', {}).get('followers', 0),
}
score_result = score_priority_user(conn, user_id, scraped_profile)
print(f" scored: {score_result.get('score')}, {len(score_result.get('signals', []))} signals")
# print discovered handles
print(f" discovered handles:")
for platform, handle in handles.items():
print(f" {platform}: {handle}")
return user_id
def get_host_user(conn):
"""get the host user (first priority user)"""
users = get_priority_users(conn)
return users[0] if users else None

BIN
connectd/icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

View file

@ -0,0 +1,10 @@
"""
introd - outreach module
drafts intros, queues for human review, sends via appropriate channel
"""
from .draft import draft_intro
from .review import get_pending_intros, approve_intro, reject_intro
from .send import send_intro
__all__ = ['draft_intro', 'get_pending_intros', 'approve_intro', 'reject_intro', 'send_intro']

509
connectd/introd/deliver.py Normal file
View file

@ -0,0 +1,509 @@
"""
introd/deliver.py - intro delivery via multiple channels
supports:
- email (smtp)
- mastodon dm (if they allow dms)
- bluesky dm (via AT Protocol)
- matrix dm (creates DM room and sends message)
- github issue (opens intro as issue on their most active repo)
- manual queue (for review before sending)
contact method is determined by ACTIVITY-BASED SELECTION:
- picks the platform where the user is MOST ACTIVE
- verified handles (from rel="me" links) get a bonus
NOTE: reddit is NOT a delivery method - it's discovery only.
reddit-discovered users are contacted via their external links.
"""
import os
import json
import smtplib
import requests
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime
from pathlib import Path
# config from env - no hardcoded credentials
SMTP_HOST = os.environ.get('SMTP_HOST', '')
SMTP_PORT = int(os.environ.get('SMTP_PORT', 465))
SMTP_USER = os.environ.get('SMTP_USER', '')
SMTP_PASS = os.environ.get('SMTP_PASS', '')
FROM_EMAIL = os.environ.get('FROM_EMAIL', '')
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
MASTODON_TOKEN = os.environ.get('MASTODON_TOKEN', '')
MASTODON_INSTANCE = os.environ.get('MASTODON_INSTANCE', '')
BLUESKY_HANDLE = os.environ.get('BLUESKY_HANDLE', '')
BLUESKY_APP_PASSWORD = os.environ.get('BLUESKY_APP_PASSWORD', '')
MATRIX_HOMESERVER = os.environ.get('MATRIX_HOMESERVER', '')
MATRIX_USER_ID = os.environ.get('MATRIX_USER_ID', '')
MATRIX_ACCESS_TOKEN = os.environ.get('MATRIX_ACCESS_TOKEN', '')
# delivery log
DELIVERY_LOG = Path(__file__).parent.parent / 'data' / 'delivery_log.json'
MANUAL_QUEUE = Path(__file__).parent.parent / 'data' / 'manual_queue.json'
def load_delivery_log():
"""load delivery history"""
if DELIVERY_LOG.exists():
return json.loads(DELIVERY_LOG.read_text())
return {'sent': [], 'failed': [], 'queued': []}
def save_delivery_log(log):
"""save delivery history"""
DELIVERY_LOG.parent.mkdir(parents=True, exist_ok=True)
DELIVERY_LOG.write_text(json.dumps(log, indent=2))
def load_manual_queue():
"""load manual review queue"""
if MANUAL_QUEUE.exists():
return json.loads(MANUAL_QUEUE.read_text())
return []
def save_manual_queue(queue):
"""save manual review queue"""
MANUAL_QUEUE.parent.mkdir(parents=True, exist_ok=True)
MANUAL_QUEUE.write_text(json.dumps(queue, indent=2))
def already_contacted(recipient_id):
"""check if we've already sent an intro to this person"""
log = load_delivery_log()
sent_ids = [s.get('recipient_id') for s in log.get('sent', [])]
return recipient_id in sent_ids
def send_email(to_email, subject, body, dry_run=False):
"""send email via smtp"""
if dry_run:
print(f" [dry run] would email {to_email}")
print(f" subject: {subject}")
print(f" body preview: {body[:100]}...")
return True, "dry run"
try:
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = FROM_EMAIL
msg['To'] = to_email
# plain text
text_part = MIMEText(body, 'plain')
msg.attach(text_part)
# html version (simple)
html_body = body.replace('\n', '<br>')
html_part = MIMEText(f"<html><body><p>{html_body}</p></body></html>", 'html')
msg.attach(html_part)
with smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) as server:
server.login(SMTP_USER, SMTP_PASS)
server.sendmail(SMTP_USER, to_email, msg.as_string())
return True, None
except Exception as e:
return False, str(e)
def create_github_issue(owner, repo, title, body, dry_run=False):
"""create github issue as intro"""
if not GITHUB_TOKEN:
return False, "GITHUB_TOKEN not set"
if dry_run:
print(f" [dry run] would create issue on {owner}/{repo}")
print(f" title: {title}")
return True, "dry run"
try:
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
resp = requests.post(
url,
headers={
'Authorization': f'token {GITHUB_TOKEN}',
'Accept': 'application/vnd.github.v3+json',
},
json={
'title': title,
'body': body,
'labels': ['introduction', 'community'],
},
timeout=30,
)
if resp.status_code == 201:
issue_url = resp.json().get('html_url')
return True, issue_url
else:
return False, f"github api error: {resp.status_code} - {resp.text}"
except Exception as e:
return False, str(e)
def send_mastodon_dm(recipient_acct, message, dry_run=False):
"""send mastodon direct message"""
if not MASTODON_TOKEN:
return False, "MASTODON_TOKEN not set"
if dry_run:
print(f" [dry run] would DM {recipient_acct}")
print(f" message preview: {message[:100]}...")
return True, "dry run"
try:
# post as direct message (visibility: direct, mention recipient)
url = f"https://{MASTODON_INSTANCE}/api/v1/statuses"
resp = requests.post(
url,
headers={
'Authorization': f'Bearer {MASTODON_TOKEN}',
'Content-Type': 'application/json',
},
json={
'status': f"@{recipient_acct} {message}",
'visibility': 'direct',
},
timeout=30,
)
if resp.status_code in [200, 201]:
return True, resp.json().get('url')
else:
return False, f"mastodon api error: {resp.status_code} - {resp.text}"
except Exception as e:
return False, str(e)
def send_bluesky_dm(recipient_handle, message, dry_run=False):
"""send bluesky direct message via AT Protocol"""
if not BLUESKY_APP_PASSWORD:
return False, "BLUESKY_APP_PASSWORD not set"
if dry_run:
print(f" [dry run] would DM {recipient_handle} on bluesky")
print(f" message preview: {message[:100]}...")
return True, "dry run"
try:
# authenticate with bluesky
auth_url = "https://bsky.social/xrpc/com.atproto.server.createSession"
auth_resp = requests.post(
auth_url,
json={
'identifier': BLUESKY_HANDLE,
'password': BLUESKY_APP_PASSWORD,
},
timeout=30,
)
if auth_resp.status_code != 200:
return False, f"bluesky auth failed: {auth_resp.status_code}"
auth_data = auth_resp.json()
access_token = auth_data.get('accessJwt')
did = auth_data.get('did')
# resolve recipient DID
resolve_url = f"https://bsky.social/xrpc/com.atproto.identity.resolveHandle"
resolve_resp = requests.get(
resolve_url,
params={'handle': recipient_handle.lstrip('@')},
timeout=30,
)
if resolve_resp.status_code != 200:
return False, f"couldn't resolve handle {recipient_handle}"
recipient_did = resolve_resp.json().get('did')
# create chat/DM (using convo namespace)
# first get or create conversation
convo_url = "https://bsky.social/xrpc/chat.bsky.convo.getConvoForMembers"
convo_resp = requests.get(
convo_url,
headers={'Authorization': f'Bearer {access_token}'},
params={'members': [recipient_did]},
timeout=30,
)
if convo_resp.status_code != 200:
# try creating conversation
return False, f"couldn't get/create conversation: {convo_resp.status_code}"
convo_id = convo_resp.json().get('convo', {}).get('id')
# send message
msg_url = "https://bsky.social/xrpc/chat.bsky.convo.sendMessage"
msg_resp = requests.post(
msg_url,
headers={
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json',
},
json={
'convoId': convo_id,
'message': {'text': message},
},
timeout=30,
)
if msg_resp.status_code in [200, 201]:
return True, f"sent to {recipient_handle}"
else:
return False, f"bluesky dm failed: {msg_resp.status_code} - {msg_resp.text}"
except Exception as e:
return False, str(e)
def send_matrix_dm(recipient_mxid, message, dry_run=False):
"""send matrix direct message"""
if not MATRIX_ACCESS_TOKEN:
return False, "MATRIX_ACCESS_TOKEN not set"
if dry_run:
print(f" [dry run] would DM {recipient_mxid} on matrix")
print(f" message preview: {message[:100]}...")
return True, "dry run"
try:
# create or get direct room with recipient
# first, check if we already have a DM room
headers = {'Authorization': f'Bearer {MATRIX_ACCESS_TOKEN}'}
# create a new DM room
create_room_resp = requests.post(
f'{MATRIX_HOMESERVER}/_matrix/client/v3/createRoom',
headers=headers,
json={
'is_direct': True,
'invite': [recipient_mxid],
'preset': 'trusted_private_chat',
},
timeout=30,
)
if create_room_resp.status_code not in [200, 201]:
return False, f"matrix room creation failed: {create_room_resp.status_code} - {create_room_resp.text}"
room_id = create_room_resp.json().get('room_id')
# send message to room
import time
txn_id = str(int(time.time() * 1000))
msg_resp = requests.put(
f'{MATRIX_HOMESERVER}/_matrix/client/v3/rooms/{room_id}/send/m.room.message/{txn_id}',
headers=headers,
json={
'msgtype': 'm.text',
'body': message,
},
timeout=30,
)
if msg_resp.status_code in [200, 201]:
return True, f"sent to {recipient_mxid} in {room_id}"
else:
return False, f"matrix send failed: {msg_resp.status_code} - {msg_resp.text}"
except Exception as e:
return False, str(e)
def add_to_manual_queue(intro_data):
"""add intro to manual review queue"""
queue = load_manual_queue()
queue.append({
**intro_data,
'queued_at': datetime.now().isoformat(),
'status': 'pending',
})
save_manual_queue(queue)
return True
def determine_best_contact(human):
"""
determine best contact method based on WHERE THEY'RE MOST ACTIVE
uses activity-based selection from groq_draft module
"""
from introd.groq_draft import determine_contact_method as activity_based_contact
method, info = activity_based_contact(human)
# convert github_issue info to dict format for delivery
if method == 'github_issue' and isinstance(info, str) and '/' in info:
parts = info.split('/', 1)
return method, {'owner': parts[0], 'repo': parts[1]}
return method, info
def deliver_intro(match_data, intro_draft, dry_run=False):
"""
deliver an intro via the best available method
match_data: {human_a, human_b, overlap_score, overlap_reasons}
intro_draft: the text to send (from groq)
"""
recipient = match_data.get('human_b', {})
recipient_id = f"{recipient.get('platform')}:{recipient.get('username')}"
# check if already contacted
if already_contacted(recipient_id):
return False, "already contacted", None
# determine contact method
method, contact_info = determine_best_contact(recipient)
log = load_delivery_log()
result = {
'recipient_id': recipient_id,
'recipient_name': recipient.get('name') or recipient.get('username'),
'method': method,
'contact_info': contact_info,
'overlap_score': match_data.get('overlap_score'),
'timestamp': datetime.now().isoformat(),
}
success = False
error = None
if method == 'email':
subject = f"someone you might want to know - connectd"
success, error = send_email(contact_info, subject, intro_draft, dry_run)
elif method == 'mastodon':
success, error = send_mastodon_dm(contact_info, intro_draft, dry_run)
elif method == 'bluesky':
success, error = send_bluesky_dm(contact_info, intro_draft, dry_run)
elif method == 'matrix':
success, error = send_matrix_dm(contact_info, intro_draft, dry_run)
elif method == 'discord':
from scoutd.discord import send_discord_dm
success, error = send_discord_dm(contact_info, intro_draft, dry_run)
elif method == 'lemmy':
from scoutd.lemmy import send_lemmy_dm
success, error = send_lemmy_dm(contact_info, intro_draft, dry_run)
elif method == 'github_issue':
owner = contact_info.get('owner')
repo = contact_info.get('repo')
title = "community introduction from connectd"
# format for github
github_body = f"""hey {recipient.get('name') or recipient.get('username')},
{intro_draft}
---
*this is an automated introduction from [connectd](https://github.com/connectd-daemon), a daemon that finds isolated builders with aligned values and connects them. if this feels spammy, i apologize - you can close this issue and we won't reach out again.*
"""
success, error = create_github_issue(owner, repo, title, github_body, dry_run)
elif method == 'manual':
# add to review queue
add_to_manual_queue({
'match': match_data,
'draft': intro_draft,
'recipient': recipient,
})
success = True
error = "added to manual queue"
# log result
result['success'] = success
result['error'] = error
if success:
log['sent'].append(result)
else:
log['failed'].append(result)
save_delivery_log(log)
return success, error, method
def deliver_batch(matches_with_intros, dry_run=False):
"""
deliver intros for a batch of matches
matches_with_intros: list of {match_data, intro_draft}
"""
results = []
for item in matches_with_intros:
match_data = item.get('match_data') or item.get('match')
intro_draft = item.get('intro_draft') or item.get('draft')
if not match_data or not intro_draft:
continue
success, error, method = deliver_intro(match_data, intro_draft, dry_run)
results.append({
'recipient': match_data.get('human_b', {}).get('username'),
'method': method,
'success': success,
'error': error,
})
print(f" {match_data.get('human_b', {}).get('username')}: {method} - {'ok' if success else error}")
return results
def get_delivery_stats():
"""get delivery statistics"""
log = load_delivery_log()
queue = load_manual_queue()
return {
'sent': len(log.get('sent', [])),
'failed': len(log.get('failed', [])),
'queued': len(log.get('queued', [])),
'manual_pending': len([q for q in queue if q.get('status') == 'pending']),
'by_method': {
'email': len([s for s in log.get('sent', []) if s.get('method') == 'email']),
'mastodon': len([s for s in log.get('sent', []) if s.get('method') == 'mastodon']),
'github_issue': len([s for s in log.get('sent', []) if s.get('method') == 'github_issue']),
'manual': len([s for s in log.get('sent', []) if s.get('method') == 'manual']),
},
}
def review_manual_queue():
"""review and process manual queue"""
queue = load_manual_queue()
pending = [q for q in queue if q.get('status') == 'pending']
if not pending:
print("no items in manual queue")
return
print(f"\n{len(pending)} items pending review:\n")
for i, item in enumerate(pending, 1):
recipient = item.get('recipient', {})
match = item.get('match', {})
print(f"[{i}] {recipient.get('name') or recipient.get('username')}")
print(f" platform: {recipient.get('platform')}")
print(f" url: {recipient.get('url')}")
print(f" overlap: {match.get('overlap_score')}")
print(f" draft preview: {item.get('draft', '')[:80]}...")
print()
return pending

210
connectd/introd/draft.py Normal file
View file

@ -0,0 +1,210 @@
"""
introd/draft.py - AI writes intro messages referencing both parties' work
"""
import json
# intro template - transparent about being AI, neutral third party
INTRO_TEMPLATE = """hi {recipient_name},
i'm an AI that connects isolated builders working on similar things.
you're building: {recipient_summary}
{other_name} is building: {other_summary}
overlap: {overlap_summary}
thought you might benefit from knowing each other.
their work: {other_url}
no pitch. just connection. ignore if not useful.
- connectd
"""
# shorter version for platforms with character limits
SHORT_TEMPLATE = """hi {recipient_name} - i'm an AI connecting aligned builders.
you: {recipient_summary}
{other_name}: {other_summary}
overlap: {overlap_summary}
their work: {other_url}
no pitch, just connection.
"""
def summarize_human(human_data):
"""generate a brief summary of what someone is building/interested in"""
parts = []
# name or username
name = human_data.get('name') or human_data.get('username', 'unknown')
# platform context
platform = human_data.get('platform', '')
# signals/interests
signals = human_data.get('signals', [])
if isinstance(signals, str):
signals = json.loads(signals)
# extra data
extra = human_data.get('extra', {})
if isinstance(extra, str):
extra = json.loads(extra)
# build summary based on available data
topics = extra.get('topics', [])
languages = list(extra.get('languages', {}).keys())[:3]
repo_count = extra.get('repo_count', 0)
subreddits = extra.get('subreddits', [])
if platform == 'github':
if topics:
parts.append(f"working on {', '.join(topics[:3])}")
if languages:
parts.append(f"using {', '.join(languages)}")
if repo_count > 10:
parts.append(f"({repo_count} repos)")
elif platform == 'reddit':
if subreddits:
parts.append(f"active in r/{', r/'.join(subreddits[:3])}")
elif platform == 'mastodon':
instance = extra.get('instance', '')
if instance:
parts.append(f"on {instance}")
elif platform == 'lobsters':
karma = extra.get('karma', 0)
if karma > 50:
parts.append(f"active on lobste.rs ({karma} karma)")
# add key signals
key_signals = [s for s in signals if s in ['selfhosted', 'privacy', 'cooperative',
'solarpunk', 'intentional_community',
'home_automation', 'foss']]
if key_signals:
parts.append(f"interested in {', '.join(key_signals[:3])}")
if not parts:
parts.append(f"builder on {platform}")
return ' | '.join(parts)
def summarize_overlap(overlap_data):
"""generate overlap summary"""
reasons = overlap_data.get('overlap_reasons', [])
if isinstance(reasons, str):
reasons = json.loads(reasons)
if reasons:
return ' | '.join(reasons[:3])
# fallback
shared = overlap_data.get('shared_signals', [])
if shared:
return f"shared interests: {', '.join(shared[:3])}"
return "aligned values and interests"
def draft_intro(match_data, recipient='a'):
"""
draft an intro message for a match
match_data: dict with human_a, human_b, overlap info
recipient: 'a' or 'b' - who receives this intro
returns: dict with draft text, channel, metadata
"""
if recipient == 'a':
recipient_human = match_data['human_a']
other_human = match_data['human_b']
else:
recipient_human = match_data['human_b']
other_human = match_data['human_a']
# get names
recipient_name = recipient_human.get('name') or recipient_human.get('username', 'friend')
other_name = other_human.get('name') or other_human.get('username', 'someone')
# generate summaries
recipient_summary = summarize_human(recipient_human)
other_summary = summarize_human(other_human)
overlap_summary = summarize_overlap(match_data)
# other's url
other_url = other_human.get('url', '')
# determine best channel
contact = recipient_human.get('contact', {})
if isinstance(contact, str):
contact = json.loads(contact)
channel = None
channel_address = None
# prefer email if available
if contact.get('email'):
channel = 'email'
channel_address = contact['email']
# github issue/discussion
elif recipient_human.get('platform') == 'github':
channel = 'github'
channel_address = recipient_human.get('url')
# mastodon DM
elif recipient_human.get('platform') == 'mastodon':
channel = 'mastodon'
channel_address = recipient_human.get('username')
# reddit message
elif recipient_human.get('platform') == 'reddit':
channel = 'reddit'
channel_address = recipient_human.get('username')
else:
channel = 'manual'
channel_address = recipient_human.get('url')
# choose template based on channel
if channel in ['mastodon', 'reddit']:
template = SHORT_TEMPLATE
else:
template = INTRO_TEMPLATE
# render draft
draft = template.format(
recipient_name=recipient_name.split()[0] if recipient_name else 'friend', # first name only
recipient_summary=recipient_summary,
other_name=other_name.split()[0] if other_name else 'someone',
other_summary=other_summary,
overlap_summary=overlap_summary,
other_url=other_url,
)
return {
'recipient_human': recipient_human,
'other_human': other_human,
'channel': channel,
'channel_address': channel_address,
'draft': draft,
'overlap_score': match_data.get('overlap_score', 0),
'match_id': match_data.get('id'),
}
def draft_intros_for_match(match_data):
"""
draft intros for both parties in a match
returns list of two intro dicts
"""
intro_a = draft_intro(match_data, recipient='a')
intro_b = draft_intro(match_data, recipient='b')
return [intro_a, intro_b]

View file

@ -0,0 +1,437 @@
"""
introd/groq_draft.py - groq llama 4 maverick for smart intro drafting
uses groq api to generate personalized, natural intro messages
that don't sound like ai-generated slop
"""
import os
import json
import requests
from datetime import datetime
GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
def determine_contact_method(human):
"""
determine best contact method based on WHERE THEY'RE MOST ACTIVE
don't use fixed hierarchy - analyze activity per platform:
- count posts/commits/activity
- weight by recency (last 30 days matters more)
- contact them where they already are
- fall back to email only if no social activity
"""
from datetime import datetime, timedelta
extra = human.get('extra', {})
if isinstance(extra, str):
extra = json.loads(extra) if extra else {}
# handle nested extra.extra from old save format
if 'extra' in extra and isinstance(extra['extra'], dict):
extra = {**extra, **extra['extra']}
contact = human.get('contact', {})
if isinstance(contact, str):
contact = json.loads(contact) if contact else {}
# collect activity scores per platform
activity_scores = {}
now = datetime.now()
thirty_days_ago = now - timedelta(days=30)
ninety_days_ago = now - timedelta(days=90)
# github activity
github_username = human.get('username') if human.get('platform') == 'github' else extra.get('github')
if github_username:
github_score = 0
top_repos = extra.get('top_repos', [])
for repo in top_repos:
# recent commits weight more
pushed_at = repo.get('pushed_at', '')
if pushed_at:
try:
push_date = datetime.fromisoformat(pushed_at.replace('Z', '+00:00')).replace(tzinfo=None)
if push_date > thirty_days_ago:
github_score += 10 # very recent
elif push_date > ninety_days_ago:
github_score += 5 # somewhat recent
else:
github_score += 1 # old but exists
except:
github_score += 1
# stars indicate engagement
github_score += min(repo.get('stars', 0) // 10, 5)
# commit activity from deep scrape
commit_count = extra.get('commit_count', 0)
github_score += min(commit_count // 10, 20)
if github_score > 0:
activity_scores['github_issue'] = {
'score': github_score,
'info': f"{github_username}/{top_repos[0]['name']}" if top_repos else github_username
}
# mastodon activity
mastodon_handle = extra.get('mastodon') or contact.get('mastodon')
if mastodon_handle:
mastodon_score = 0
statuses_count = extra.get('mastodon_statuses', 0) or human.get('statuses_count', 0)
# high post count = active user
if statuses_count > 1000:
mastodon_score += 30
elif statuses_count > 500:
mastodon_score += 20
elif statuses_count > 100:
mastodon_score += 10
elif statuses_count > 0:
mastodon_score += 5
# platform bonus for fediverse (values-aligned)
mastodon_score += 10
# bonus if handle was discovered via rel="me" or similar verification
# (having a handle linked from their website = they want to be contacted there)
handles = extra.get('handles', {})
if handles.get('mastodon') == mastodon_handle:
mastodon_score += 15 # verified handle bonus
if mastodon_score > 0:
activity_scores['mastodon'] = {'score': mastodon_score, 'info': mastodon_handle}
# bluesky activity
bluesky_handle = extra.get('bluesky') or contact.get('bluesky')
if bluesky_handle:
bluesky_score = 0
posts_count = extra.get('bluesky_posts', 0) or human.get('posts_count', 0)
if posts_count > 500:
bluesky_score += 25
elif posts_count > 100:
bluesky_score += 15
elif posts_count > 0:
bluesky_score += 5
# newer platform, slightly lower weight
bluesky_score += 5
if bluesky_score > 0:
activity_scores['bluesky'] = {'score': bluesky_score, 'info': bluesky_handle}
# twitter activity
twitter_handle = extra.get('twitter') or contact.get('twitter')
if twitter_handle:
twitter_score = 0
tweets_count = extra.get('twitter_tweets', 0)
if tweets_count > 1000:
twitter_score += 20
elif tweets_count > 100:
twitter_score += 10
elif tweets_count > 0:
twitter_score += 5
# if we found them via twitter hashtags, they're active there
if human.get('platform') == 'twitter':
twitter_score += 15
if twitter_score > 0:
activity_scores['twitter'] = {'score': twitter_score, 'info': twitter_handle}
# NOTE: reddit is DISCOVERY ONLY, not a contact method
# we find users on reddit but reach out via their external links (github, mastodon, etc.)
# reddit-only users go to manual_queue for review
# lobsters activity
lobsters_username = extra.get('lobsters') or contact.get('lobsters')
if lobsters_username or human.get('platform') == 'lobsters':
lobsters_score = 0
lobsters_username = lobsters_username or human.get('username')
karma = extra.get('lobsters_karma', 0) or human.get('karma', 0)
# lobsters is invite-only, high signal
lobsters_score += 15
if karma > 100:
lobsters_score += 15
elif karma > 50:
lobsters_score += 10
elif karma > 0:
lobsters_score += 5
if lobsters_score > 0:
activity_scores['lobsters'] = {'score': lobsters_score, 'info': lobsters_username}
# matrix activity
matrix_id = extra.get('matrix') or contact.get('matrix')
if matrix_id:
matrix_score = 0
# matrix users are typically privacy-conscious and technical
matrix_score += 15 # platform bonus for decentralized chat
# bonus if handle was discovered via rel="me" verification
handles = extra.get('handles', {})
if handles.get('matrix') == matrix_id:
matrix_score += 10 # verified handle bonus
if matrix_score > 0:
activity_scores['matrix'] = {'score': matrix_score, 'info': matrix_id}
# lemmy activity (fediverse)
lemmy_username = human.get('username') if human.get('platform') == 'lemmy' else extra.get('lemmy')
if lemmy_username:
lemmy_score = 0
# lemmy is fediverse - high values alignment
lemmy_score += 20 # fediverse platform bonus
post_count = extra.get('post_count', 0)
comment_count = extra.get('comment_count', 0)
if post_count > 100:
lemmy_score += 15
elif post_count > 50:
lemmy_score += 10
elif post_count > 10:
lemmy_score += 5
if comment_count > 500:
lemmy_score += 10
elif comment_count > 100:
lemmy_score += 5
if lemmy_score > 0:
activity_scores['lemmy'] = {'score': lemmy_score, 'info': lemmy_username}
# pick highest activity platform
if activity_scores:
best_platform = max(activity_scores.items(), key=lambda x: x[1]['score'])
return best_platform[0], best_platform[1]['info']
# fall back to email ONLY if no social activity detected
email = extra.get('email') or contact.get('email')
# also check emails list
if not email:
emails = extra.get('emails') or contact.get('emails') or []
for e in emails:
if e and '@' in e and 'noreply' not in e.lower():
email = e
break
if email and '@' in email and 'noreply' not in email.lower():
return 'email', email
# last resort: manual
return 'manual', None
def draft_intro_with_llm(match_data, recipient='a', dry_run=False):
"""
use groq llama 4 maverick to draft a personalized intro
match_data should contain:
- human_a: the first person
- human_b: the second person
- overlap_score: numeric score
- overlap_reasons: list of why they match
recipient: 'a' or 'b' - who we're writing to
"""
if not GROQ_API_KEY:
return None, "GROQ_API_KEY not set"
# determine recipient and other person
if recipient == 'a':
to_person = match_data.get('human_a', {})
other_person = match_data.get('human_b', {})
else:
to_person = match_data.get('human_b', {})
other_person = match_data.get('human_a', {})
# build context
to_name = to_person.get('name') or to_person.get('username', 'friend')
other_name = other_person.get('name') or other_person.get('username', 'someone')
to_signals = to_person.get('signals', [])
if isinstance(to_signals, str):
to_signals = json.loads(to_signals) if to_signals else []
other_signals = other_person.get('signals', [])
if isinstance(other_signals, str):
other_signals = json.loads(other_signals) if other_signals else []
overlap_reasons = match_data.get('overlap_reasons', [])
if isinstance(overlap_reasons, str):
overlap_reasons = json.loads(overlap_reasons) if overlap_reasons else []
# parse extra data
to_extra = to_person.get('extra', {})
other_extra = other_person.get('extra', {})
if isinstance(to_extra, str):
to_extra = json.loads(to_extra) if to_extra else {}
if isinstance(other_extra, str):
other_extra = json.loads(other_extra) if other_extra else {}
# build profile summaries
to_profile = f"""
name: {to_name}
platform: {to_person.get('platform', 'unknown')}
bio: {to_person.get('bio') or 'no bio'}
location: {to_person.get('location') or 'unknown'}
signals: {', '.join(to_signals[:8])}
repos: {len(to_extra.get('top_repos', []))} public repos
languages: {', '.join(to_extra.get('languages', {}).keys())}
"""
other_profile = f"""
name: {other_name}
platform: {other_person.get('platform', 'unknown')}
bio: {other_person.get('bio') or 'no bio'}
location: {other_person.get('location') or 'unknown'}
signals: {', '.join(other_signals[:8])}
repos: {len(other_extra.get('top_repos', []))} public repos
languages: {', '.join(other_extra.get('languages', {}).keys())}
url: {other_person.get('url', '')}
"""
# build prompt
system_prompt = """you are connectd, an ai that connects isolated builders who share values but don't know each other yet.
your job is to write a short, genuine intro message to one person about another person they might want to know.
rules:
- be brief (3-5 sentences max)
- be genuine, not salesy or fake
- focus on WHY they might want to connect, not just WHAT they have in common
- don't be cringe or use buzzwords
- lowercase preferred (casual tone)
- no emojis unless the person's profile suggests they'd like them
- mention specific things from their profiles, not generic "you both like open source"
- end with a simple invitation, not a hard sell
- sign off as "- connectd" (lowercase)
bad examples:
- "I noticed you're both passionate about..." (too formal)
- "You two would be PERFECT for each other!" (too salesy)
- "As a fellow privacy enthusiast..." (cringe)
good examples:
- "hey, saw you're building X. there's someone else working on similar stuff in Y who might be interesting to know."
- "you might want to check out Z's work on federated systems - similar approach to what you're doing with A."
"""
user_prompt = f"""write an intro message to {to_name} about {other_name}.
RECIPIENT ({to_name}):
{to_profile}
INTRODUCING ({other_name}):
{other_profile}
WHY THEY MATCH (overlap score {match_data.get('overlap_score', 0)}):
{', '.join(overlap_reasons[:5])}
write a short intro message. remember: lowercase, genuine, not salesy."""
try:
response = requests.post(
GROQ_API_URL,
headers={
'Authorization': f'Bearer {GROQ_API_KEY}',
'Content-Type': 'application/json',
},
json={
'model': MODEL,
'messages': [
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': user_prompt},
],
'temperature': 0.7,
'max_tokens': 300,
},
timeout=30,
)
if response.status_code != 200:
return None, f"groq api error: {response.status_code} - {response.text}"
data = response.json()
draft = data['choices'][0]['message']['content'].strip()
# determine contact method for recipient
contact_method, contact_info = determine_contact_method(to_person)
return {
'draft': draft,
'model': MODEL,
'to': to_name,
'about': other_name,
'overlap_score': match_data.get('overlap_score', 0),
'contact_method': contact_method,
'contact_info': contact_info,
'generated_at': datetime.now().isoformat(),
}, None
except Exception as e:
return None, f"groq error: {str(e)}"
def draft_intro_batch(matches, dry_run=False):
"""
draft intros for multiple matches
returns list of (match, intro_result, error) tuples
"""
results = []
for match in matches:
# draft for both directions
intro_a, err_a = draft_intro_with_llm(match, recipient='a', dry_run=dry_run)
intro_b, err_b = draft_intro_with_llm(match, recipient='b', dry_run=dry_run)
results.append({
'match': match,
'intro_to_a': intro_a,
'intro_to_b': intro_b,
'errors': [err_a, err_b],
})
return results
def test_groq_connection():
"""test that groq api is working"""
if not GROQ_API_KEY:
return False, "GROQ_API_KEY not set"
try:
response = requests.post(
GROQ_API_URL,
headers={
'Authorization': f'Bearer {GROQ_API_KEY}',
'Content-Type': 'application/json',
},
json={
'model': MODEL,
'messages': [{'role': 'user', 'content': 'say "ok" and nothing else'}],
'max_tokens': 10,
},
timeout=10,
)
if response.status_code == 200:
return True, "groq api working"
else:
return False, f"groq api error: {response.status_code}"
except Exception as e:
return False, f"groq connection error: {str(e)}"

View file

@ -0,0 +1,250 @@
"""
introd/lost_intro.py - intro drafting for lost builders
different tone than builder-to-builder intros.
these people need encouragement, not networking.
the goal isn't to recruit them. it's to show them the door exists.
they take it or they don't. but they'll know someone saw them.
"""
import os
import json
import requests
from datetime import datetime
GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')
LOST_INTRO_TEMPLATE = """hey {name},
i'm connectd. i'm a daemon that finds people who might need a nudge.
i noticed you're interested in {interests}. you ask good questions. you clearly get it.
but maybe you haven't built anything yet. or you started and stopped. or you don't think you can.
that's okay. most people don't.
but some people do. here's one: {builder_name} ({builder_url})
{builder_description}
they started where you are. look at what they built.
you're not behind. you're just not started yet.
no pressure. just wanted you to know someone noticed.
- connectd"""
SYSTEM_PROMPT = """you are connectd, a daemon that finds isolated builders with aligned values and connects them.
right now you're reaching out to someone who has POTENTIAL but hasn't found it yet. maybe they gave up, maybe they're stuck, maybe they don't believe they can do it.
your job is to:
1. acknowledge where they are without being condescending
2. point them to an active builder who could inspire them
3. be genuine, not salesy or motivational-speaker-y
4. keep it short - these people are tired, don't overwhelm them
5. use lowercase, be human, no corporate bullshit
6. make it clear there's no pressure, no follow-up spam
you're not recruiting. you're not selling. you're just showing them a door.
the template structure:
- acknowledge them (you noticed something about them)
- normalize where they are (most people don't build things)
- show them someone who did (the builder)
- brief encouragement (you're not behind, just not started)
- sign off with no pressure
do NOT:
- be preachy or lecture them
- use motivational cliches ("you got this!", "believe in yourself!")
- make promises about outcomes
- be too long - they don't have energy for long messages
- make them feel bad about where they are"""
def draft_lost_intro(lost_user, inspiring_builder, config=None):
"""
draft an intro for a lost builder, pairing them with an inspiring active builder.
lost_user: the person who needs a nudge
inspiring_builder: an active builder with similar interests who could inspire them
"""
config = config or {}
# gather info about lost user
lost_name = lost_user.get('name') or lost_user.get('username', 'there')
lost_signals = lost_user.get('lost_signals', [])
lost_interests = extract_interests(lost_user)
# gather info about inspiring builder
builder_name = inspiring_builder.get('name') or inspiring_builder.get('username')
builder_url = inspiring_builder.get('url') or f"https://github.com/{inspiring_builder.get('username')}"
builder_description = create_builder_description(inspiring_builder)
# use LLM to personalize
if GROQ_API_KEY and config.get('use_llm', True):
return draft_with_llm(lost_user, inspiring_builder, lost_interests, builder_description)
# fallback to template
return LOST_INTRO_TEMPLATE.format(
name=lost_name,
interests=', '.join(lost_interests[:3]) if lost_interests else 'building things',
builder_name=builder_name,
builder_url=builder_url,
builder_description=builder_description,
), None
def extract_interests(user):
"""extract interests from user profile"""
interests = []
# from topics/tags
extra = user.get('extra', {})
if isinstance(extra, str):
try:
extra = json.loads(extra)
except:
extra = {}
topics = extra.get('topics', []) or extra.get('aligned_topics', [])
interests.extend(topics[:5])
# from subreddits
subreddits = user.get('subreddits', [])
for sub in subreddits[:3]:
if sub.lower() not in ['learnprogramming', 'findapath', 'getdisciplined']:
interests.append(sub)
# from bio keywords
bio = user.get('bio') or ''
bio_lower = bio.lower()
interest_keywords = [
'rust', 'python', 'javascript', 'go', 'linux', 'self-hosting', 'homelab',
'privacy', 'security', 'open source', 'foss', 'decentralized', 'ai', 'ml',
'web dev', 'backend', 'frontend', 'devops', 'data', 'automation',
]
for kw in interest_keywords:
if kw in bio_lower and kw not in interests:
interests.append(kw)
return interests[:5] if interests else ['technology', 'building things']
def create_builder_description(builder):
"""create a brief description of what the builder has done"""
extra = builder.get('extra', {})
if isinstance(extra, str):
try:
extra = json.loads(extra)
except:
extra = {}
parts = []
# what they build
repos = extra.get('top_repos', [])[:3]
if repos:
repo_names = [r.get('name') for r in repos if r.get('name')]
if repo_names:
parts.append(f"they've built things like {', '.join(repo_names[:2])}")
# their focus
topics = extra.get('aligned_topics', []) or extra.get('topics', [])
if topics:
parts.append(f"they work on {', '.join(topics[:3])}")
# their vibe
signals = builder.get('signals', [])
if 'self-hosted' in str(signals).lower():
parts.append("they're into self-hosting and owning their own infrastructure")
if 'privacy' in str(signals).lower():
parts.append("they care about privacy")
if 'community' in str(signals).lower():
parts.append("they're community-focused")
if parts:
return '. '.join(parts) + '.'
else:
return "they're building cool stuff in the open."
def draft_with_llm(lost_user, inspiring_builder, interests, builder_description):
"""use LLM to draft personalized intro"""
lost_name = lost_user.get('name') or lost_user.get('username', 'there')
lost_signals = lost_user.get('lost_signals', [])
lost_bio = lost_user.get('bio', '')
builder_name = inspiring_builder.get('name') or inspiring_builder.get('username')
builder_url = inspiring_builder.get('url') or f"https://github.com/{inspiring_builder.get('username')}"
user_prompt = f"""draft an intro for this lost builder:
LOST USER:
- name: {lost_name}
- interests: {', '.join(interests)}
- signals detected: {', '.join(lost_signals[:5]) if lost_signals else 'general stuck/aspiring patterns'}
- bio: {lost_bio[:200] if lost_bio else 'none'}
INSPIRING BUILDER TO SHOW THEM:
- name: {builder_name}
- url: {builder_url}
- what they do: {builder_description}
write a short, genuine message. no fluff. no motivational cliches. just human.
keep it under 150 words.
use lowercase.
end with "- connectd"
"""
try:
resp = requests.post(
GROQ_API_URL,
headers={
'Authorization': f'Bearer {GROQ_API_KEY}',
'Content-Type': 'application/json',
},
json={
'model': MODEL,
'messages': [
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': user_prompt},
],
'temperature': 0.7,
'max_tokens': 500,
},
timeout=30,
)
if resp.status_code == 200:
content = resp.json()['choices'][0]['message']['content']
return content.strip(), None
else:
return None, f"llm error: {resp.status_code}"
except Exception as e:
return None, str(e)
def get_lost_intro_config():
"""get configuration for lost builder outreach"""
return {
'enabled': True,
'max_per_day': 5, # lower volume, higher care
'require_review': True, # always manual approval
'cooldown_days': 90, # don't spam struggling people
'min_lost_score': 40,
'min_values_score': 20,
'use_llm': True,
}

126
connectd/introd/review.py Normal file
View file

@ -0,0 +1,126 @@
"""
introd/review.py - human approval queue before sending
"""
import json
from datetime import datetime
def get_pending_intros(db, limit=50):
"""
get all intros pending human review
returns list of intro dicts with full context
"""
rows = db.get_pending_intros(limit=limit)
intros = []
for row in rows:
# get associated match and humans
match_id = row.get('match_id')
recipient_id = row.get('recipient_human_id')
recipient = db.get_human_by_id(recipient_id) if recipient_id else None
intros.append({
'id': row['id'],
'match_id': match_id,
'recipient': recipient,
'channel': row.get('channel'),
'draft': row.get('draft'),
'status': row.get('status'),
})
return intros
def approve_intro(db, intro_id, approved_by='human'):
"""
approve an intro for sending
intro_id: database id of the intro
approved_by: who approved it (for audit trail)
"""
db.approve_intro(intro_id, approved_by)
print(f"introd: approved intro {intro_id} by {approved_by}")
def reject_intro(db, intro_id, reason=None):
"""
reject an intro (won't be sent)
"""
c = db.conn.cursor()
c.execute('''UPDATE intros SET status = 'rejected',
approved_at = ?, approved_by = ? WHERE id = ?''',
(datetime.now().isoformat(), f"rejected: {reason}" if reason else "rejected", intro_id))
db.conn.commit()
print(f"introd: rejected intro {intro_id}")
def review_intro_interactive(db, intro):
"""
interactive review of a single intro
returns: 'approve', 'reject', 'edit', or 'skip'
"""
print("\n" + "=" * 60)
print("INTRO FOR REVIEW")
print("=" * 60)
recipient = intro.get('recipient', {})
print(f"\nRecipient: {recipient.get('name') or recipient.get('username')}")
print(f"Platform: {recipient.get('platform')}")
print(f"Channel: {intro.get('channel')}")
print(f"\n--- DRAFT ---")
print(intro.get('draft'))
print("--- END ---\n")
while True:
choice = input("[a]pprove / [r]eject / [s]kip / [e]dit? ").strip().lower()
if choice in ['a', 'approve']:
approve_intro(db, intro['id'])
return 'approve'
elif choice in ['r', 'reject']:
reason = input("reason (optional): ").strip()
reject_intro(db, intro['id'], reason)
return 'reject'
elif choice in ['s', 'skip']:
return 'skip'
elif choice in ['e', 'edit']:
print("editing not yet implemented - approve or reject")
else:
print("invalid choice")
def review_all_pending(db):
"""
interactive review of all pending intros
"""
intros = get_pending_intros(db)
if not intros:
print("no pending intros to review")
return
print(f"\n{len(intros)} intros pending review\n")
approved = 0
rejected = 0
skipped = 0
for intro in intros:
result = review_intro_interactive(db, intro)
if result == 'approve':
approved += 1
elif result == 'reject':
rejected += 1
else:
skipped += 1
cont = input("\ncontinue reviewing? [y/n] ").strip().lower()
if cont != 'y':
break
print(f"\nreview complete: {approved} approved, {rejected} rejected, {skipped} skipped")

216
connectd/introd/send.py Normal file
View file

@ -0,0 +1,216 @@
"""
introd/send.py - actually deliver intros via appropriate channel
"""
import smtplib
import requests
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime
import os
# email config (from env)
SMTP_HOST = os.environ.get('SMTP_HOST', '')
SMTP_PORT = int(os.environ.get('SMTP_PORT', '465'))
SMTP_USER = os.environ.get('SMTP_USER', '')
SMTP_PASS = os.environ.get('SMTP_PASS', '')
FROM_EMAIL = os.environ.get('FROM_EMAIL', '')
def send_email(to_email, subject, body):
"""send email via SMTP"""
msg = MIMEMultipart()
msg['From'] = FROM_EMAIL
msg['To'] = to_email
msg['Subject'] = subject
msg.attach(MIMEText(body, 'plain'))
try:
with smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) as server:
server.login(SMTP_USER, SMTP_PASS)
server.send_message(msg)
return True, None
except Exception as e:
return False, str(e)
def send_github_issue(repo_url, title, body):
"""
create a github issue (requires GITHUB_TOKEN)
note: only works if you have write access to the repo
typically won't work for random users - fallback to manual
"""
# extract owner/repo from url
# https://github.com/owner/repo -> owner/repo
parts = repo_url.rstrip('/').split('/')
if len(parts) < 2:
return False, "invalid github url"
owner = parts[-2]
repo = parts[-1]
token = os.environ.get('GITHUB_TOKEN')
if not token:
return False, "no github token"
# would create issue via API - but this is invasive
# better to just output the info for manual action
return False, "github issues not automated - use manual outreach"
def send_mastodon_dm(instance, username, message):
"""
send mastodon DM (requires account credentials)
not implemented - requires oauth setup
"""
return False, "mastodon DMs not automated - use manual outreach"
def send_reddit_message(username, subject, body):
"""
send reddit message (requires account credentials)
not implemented - requires oauth setup
"""
return False, "reddit messages not automated - use manual outreach"
def send_intro(db, intro_id):
"""
send an approved intro
returns: (success, error_message)
"""
# get intro from db
c = db.conn.cursor()
c.execute('SELECT * FROM intros WHERE id = ?', (intro_id,))
row = c.fetchone()
if not row:
return False, "intro not found"
intro = dict(row)
if intro['status'] != 'approved':
return False, f"intro not approved (status: {intro['status']})"
channel = intro.get('channel')
draft = intro.get('draft')
# get recipient info
recipient = db.get_human_by_id(intro['recipient_human_id'])
if not recipient:
return False, "recipient not found"
success = False
error = None
if channel == 'email':
# get email from contact
import json
contact = recipient.get('contact', {})
if isinstance(contact, str):
contact = json.loads(contact)
email = contact.get('email')
if email:
success, error = send_email(
email,
"connection: aligned builder intro",
draft
)
else:
error = "no email address"
elif channel == 'github':
success, error = send_github_issue(
recipient.get('url'),
"connection: aligned builder intro",
draft
)
elif channel == 'mastodon':
success, error = send_mastodon_dm(
recipient.get('instance'),
recipient.get('username'),
draft
)
elif channel == 'reddit':
success, error = send_reddit_message(
recipient.get('username'),
"connection: aligned builder intro",
draft
)
else:
error = f"unknown channel: {channel}"
# update status
if success:
db.mark_intro_sent(intro_id)
print(f"introd: sent intro {intro_id} via {channel}")
else:
# mark as needs manual sending
c.execute('''UPDATE intros SET status = 'manual_needed',
approved_at = ? WHERE id = ?''',
(datetime.now().isoformat(), intro_id))
db.conn.commit()
print(f"introd: intro {intro_id} needs manual send ({error})")
return success, error
def send_all_approved(db):
"""
send all approved intros
"""
c = db.conn.cursor()
c.execute('SELECT id FROM intros WHERE status = "approved"')
rows = c.fetchall()
if not rows:
print("no approved intros to send")
return
print(f"sending {len(rows)} approved intros...")
sent = 0
failed = 0
for row in rows:
success, error = send_intro(db, row['id'])
if success:
sent += 1
else:
failed += 1
print(f"sent: {sent}, failed/manual: {failed}")
def export_manual_intros(db, output_file='manual_intros.txt'):
"""
export intros that need manual sending to a text file
"""
c = db.conn.cursor()
c.execute('''SELECT i.*, h.username, h.platform, h.url
FROM intros i
JOIN humans h ON i.recipient_human_id = h.id
WHERE i.status IN ('approved', 'manual_needed')''')
rows = c.fetchall()
if not rows:
print("no intros to export")
return
with open(output_file, 'w') as f:
for row in rows:
f.write("=" * 60 + "\n")
f.write(f"TO: {row['username']} ({row['platform']})\n")
f.write(f"URL: {row['url']}\n")
f.write(f"CHANNEL: {row['channel']}\n")
f.write("-" * 60 + "\n")
f.write(row['draft'] + "\n")
f.write("\n")
print(f"exported {len(rows)} intros to {output_file}")

BIN
connectd/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

View file

@ -0,0 +1,10 @@
"""
matchd - pairing module
generates fingerprints, finds overlaps, ranks matches
"""
from .fingerprint import generate_fingerprint
from .overlap import find_overlap
from .rank import rank_matches, find_all_matches
__all__ = ['generate_fingerprint', 'find_overlap', 'rank_matches', 'find_all_matches']

View file

@ -0,0 +1,210 @@
"""
matchd/fingerprint.py - generate values profiles for humans
"""
import json
from collections import defaultdict
# values dimensions we track
VALUES_DIMENSIONS = [
'privacy', # surveillance concern, degoogle, self-hosted
'decentralization', # p2p, fediverse, local-first
'cooperation', # coops, mutual aid, community
'queer_friendly', # lgbtq+, pronouns
'environmental', # solarpunk, degrowth, sustainability
'anticapitalist', # post-capitalism, worker ownership
'builder', # creates vs consumes
'pnw_oriented', # pacific northwest connection
]
# skill categories
SKILL_CATEGORIES = [
'backend', # python, go, rust, databases
'frontend', # js, react, css
'devops', # docker, k8s, linux admin
'hardware', # electronics, embedded, iot
'design', # ui/ux, graphics
'community', # organizing, facilitation
'writing', # documentation, content
]
# signal to dimension mapping
SIGNAL_TO_DIMENSION = {
'privacy': 'privacy',
'selfhosted': 'privacy',
'degoogle': 'privacy',
'decentralized': 'decentralization',
'local_first': 'decentralization',
'p2p': 'decentralization',
'federated_chat': 'decentralization',
'foss': 'decentralization',
'cooperative': 'cooperation',
'community': 'cooperation',
'mutual_aid': 'cooperation',
'intentional_community': 'cooperation',
'queer': 'queer_friendly',
'pronouns': 'queer_friendly',
'blm': 'queer_friendly',
'acab': 'queer_friendly',
'solarpunk': 'environmental',
'anticapitalist': 'anticapitalist',
'pnw': 'pnw_oriented',
'pnw_state': 'pnw_oriented',
'remote': 'pnw_oriented',
'home_automation': 'builder',
'modern_lang': 'builder',
'unix': 'builder',
'containers': 'builder',
}
# language to skill mapping
LANGUAGE_TO_SKILL = {
'python': 'backend',
'go': 'backend',
'rust': 'backend',
'java': 'backend',
'ruby': 'backend',
'php': 'backend',
'javascript': 'frontend',
'typescript': 'frontend',
'html': 'frontend',
'css': 'frontend',
'vue': 'frontend',
'shell': 'devops',
'dockerfile': 'devops',
'nix': 'devops',
'hcl': 'devops',
'c': 'hardware',
'c++': 'hardware',
'arduino': 'hardware',
'verilog': 'hardware',
}
def generate_fingerprint(human_data):
"""
generate a values fingerprint for a human
input: human dict from database (has signals, languages, etc)
output: fingerprint dict with values_vector, skills, interests
"""
# parse stored json fields
signals = human_data.get('signals', [])
if isinstance(signals, str):
signals = json.loads(signals)
extra = human_data.get('extra', {})
if isinstance(extra, str):
extra = json.loads(extra)
languages = extra.get('languages', {})
topics = extra.get('topics', [])
# build values vector
values_vector = defaultdict(float)
# from signals
for signal in signals:
dimension = SIGNAL_TO_DIMENSION.get(signal)
if dimension:
values_vector[dimension] += 1.0
# normalize values vector (0-1 scale)
max_val = max(values_vector.values()) if values_vector else 1
values_vector = {k: min(v / max_val, 1.0) for k, v in values_vector.items()}
# fill in missing dimensions with 0
for dim in VALUES_DIMENSIONS:
if dim not in values_vector:
values_vector[dim] = 0.0
# determine skills from languages
skills = defaultdict(float)
total_repos = sum(languages.values()) if languages else 1
for lang, count in languages.items():
skill = LANGUAGE_TO_SKILL.get(lang.lower())
if skill:
skills[skill] += count / total_repos
# normalize skills
if skills:
max_skill = max(skills.values())
skills = {k: min(v / max_skill, 1.0) for k, v in skills.items()}
# interests from topics and signals
interests = list(set(topics + signals))
# location preference
location_pref = None
if 'pnw' in signals or 'pnw_state' in signals:
location_pref = 'pnw'
elif 'remote' in signals:
location_pref = 'remote'
elif human_data.get('location'):
loc = human_data['location'].lower()
if any(x in loc for x in ['seattle', 'portland', 'washington', 'oregon', 'pnw', 'cascadia']):
location_pref = 'pnw'
# availability (based on hireable flag if present)
availability = None
if extra.get('hireable'):
availability = 'open'
return {
'human_id': human_data.get('id'),
'values_vector': dict(values_vector),
'skills': dict(skills),
'interests': interests,
'location_pref': location_pref,
'availability': availability,
}
def fingerprint_similarity(fp_a, fp_b):
"""
calculate similarity between two fingerprints
returns 0-1 score
"""
# values similarity (cosine-ish)
va = fp_a.get('values_vector', {})
vb = fp_b.get('values_vector', {})
all_dims = set(va.keys()) | set(vb.keys())
if not all_dims:
return 0.0
dot_product = sum(va.get(d, 0) * vb.get(d, 0) for d in all_dims)
mag_a = sum(v**2 for v in va.values()) ** 0.5
mag_b = sum(v**2 for v in vb.values()) ** 0.5
if mag_a == 0 or mag_b == 0:
values_sim = 0.0
else:
values_sim = dot_product / (mag_a * mag_b)
# interest overlap (jaccard)
ia = set(fp_a.get('interests', []))
ib = set(fp_b.get('interests', []))
if ia or ib:
interest_sim = len(ia & ib) / len(ia | ib)
else:
interest_sim = 0.0
# location compatibility
loc_a = fp_a.get('location_pref')
loc_b = fp_b.get('location_pref')
loc_sim = 0.0
if loc_a == loc_b and loc_a is not None:
loc_sim = 1.0
elif loc_a == 'remote' or loc_b == 'remote':
loc_sim = 0.5
elif loc_a == 'pnw' or loc_b == 'pnw':
loc_sim = 0.3
# weighted combination
similarity = (values_sim * 0.5) + (interest_sim * 0.3) + (loc_sim * 0.2)
return similarity

199
connectd/matchd/lost.py Normal file
View file

@ -0,0 +1,199 @@
"""
matchd/lost.py - lost builder matching
lost builders don't get matched to each other (both need energy).
they get matched to ACTIVE builders who can inspire them.
the goal: show them someone like them who made it.
"""
import json
from .overlap import find_overlap, is_same_person
def find_inspiring_builder(lost_user, active_builders, db=None):
"""
find an active builder who could inspire a lost builder.
criteria:
- shared interests (they need to relate to this person)
- active builder has shipped real work (proof it's possible)
- similar background signals if possible
- NOT the same person across platforms
"""
if not active_builders:
return None, "no active builders available"
# parse lost user data
lost_signals = lost_user.get('signals', [])
if isinstance(lost_signals, str):
lost_signals = json.loads(lost_signals) if lost_signals else []
lost_extra = lost_user.get('extra', {})
if isinstance(lost_extra, str):
lost_extra = json.loads(lost_extra) if lost_extra else {}
# lost user interests
lost_interests = set()
lost_interests.update(lost_signals)
lost_interests.update(lost_extra.get('topics', []))
lost_interests.update(lost_extra.get('aligned_topics', []))
# also include subreddits if from reddit (shows interests)
subreddits = lost_user.get('subreddits', [])
if isinstance(subreddits, str):
subreddits = json.loads(subreddits) if subreddits else []
lost_interests.update(subreddits)
# score each active builder
candidates = []
for builder in active_builders:
# skip if same person (cross-platform)
if is_same_person(lost_user, builder):
continue
# get builder signals
builder_signals = builder.get('signals', [])
if isinstance(builder_signals, str):
builder_signals = json.loads(builder_signals) if builder_signals else []
builder_extra = builder.get('extra', {})
if isinstance(builder_extra, str):
builder_extra = json.loads(builder_extra) if builder_extra else {}
# builder interests
builder_interests = set()
builder_interests.update(builder_signals)
builder_interests.update(builder_extra.get('topics', []))
builder_interests.update(builder_extra.get('aligned_topics', []))
# calculate match score
shared_interests = lost_interests & builder_interests
match_score = len(shared_interests) * 10
# bonus for high-value shared signals
high_value_signals = ['privacy', 'selfhosted', 'home_automation', 'foss',
'solarpunk', 'cooperative', 'decentralized', 'queer']
for signal in shared_interests:
if signal in high_value_signals:
match_score += 15
# bonus if builder has shipped real work (proof it's possible)
repos = builder_extra.get('top_repos', [])
if len(repos) >= 5:
match_score += 20 # they've built things
elif len(repos) >= 2:
match_score += 10
# bonus for high stars (visible success)
total_stars = sum(r.get('stars', 0) for r in repos) if repos else 0
if total_stars >= 100:
match_score += 15
elif total_stars >= 20:
match_score += 5
# bonus for similar location (relatable)
lost_loc = (lost_user.get('location') or '').lower()
builder_loc = (builder.get('location') or '').lower()
if lost_loc and builder_loc:
pnw_keywords = ['seattle', 'portland', 'washington', 'oregon', 'pnw']
if any(k in lost_loc for k in pnw_keywords) and any(k in builder_loc for k in pnw_keywords):
match_score += 10
# minimum threshold - need SOMETHING in common
if match_score < 10:
continue
candidates.append({
'builder': builder,
'match_score': match_score,
'shared_interests': list(shared_interests)[:5],
'repos_count': len(repos),
'total_stars': total_stars,
})
if not candidates:
return None, "no matching active builders found"
# sort by match score, return best
candidates.sort(key=lambda x: x['match_score'], reverse=True)
best = candidates[0]
return best, None
def find_matches_for_lost_builders(db, min_lost_score=40, min_values_score=20, limit=10):
"""
find inspiring builder matches for all lost builders ready for outreach.
returns list of (lost_user, inspiring_builder, match_data)
"""
# get lost builders ready for outreach
lost_builders = db.get_lost_builders_for_outreach(
min_lost_score=min_lost_score,
min_values_score=min_values_score,
limit=limit
)
if not lost_builders:
return [], "no lost builders ready for outreach"
# get active builders who can inspire
active_builders = db.get_active_builders(min_score=50, limit=200)
if not active_builders:
return [], "no active builders available"
matches = []
for lost_user in lost_builders:
best_match, error = find_inspiring_builder(lost_user, active_builders, db)
if best_match:
matches.append({
'lost_user': lost_user,
'inspiring_builder': best_match['builder'],
'match_score': best_match['match_score'],
'shared_interests': best_match['shared_interests'],
'builder_repos': best_match['repos_count'],
'builder_stars': best_match['total_stars'],
})
return matches, None
def get_lost_match_summary(match_data):
"""
get a human-readable summary of a lost builder match.
"""
lost = match_data['lost_user']
builder = match_data['inspiring_builder']
lost_name = lost.get('name') or lost.get('username', 'someone')
builder_name = builder.get('name') or builder.get('username', 'a builder')
lost_signals = match_data.get('lost_signals', [])
if isinstance(lost_signals, str):
lost_signals = json.loads(lost_signals) if lost_signals else []
shared = match_data.get('shared_interests', [])
summary = f"""
lost builder: {lost_name} ({lost.get('platform')})
lost score: {lost.get('lost_potential_score', 0)}
values score: {lost.get('score', 0)}
url: {lost.get('url')}
inspiring builder: {builder_name} ({builder.get('platform')})
score: {builder.get('score', 0)}
repos: {match_data.get('builder_repos', 0)}
stars: {match_data.get('builder_stars', 0)}
url: {builder.get('url')}
match score: {match_data.get('match_score', 0)}
shared interests: {', '.join(shared) if shared else 'values alignment'}
this lost builder needs to see that someone like them made it.
"""
return summary.strip()

150
connectd/matchd/overlap.py Normal file
View file

@ -0,0 +1,150 @@
"""
matchd/overlap.py - find pairs with alignment
"""
import json
from .fingerprint import fingerprint_similarity
def find_overlap(human_a, human_b, fp_a=None, fp_b=None):
"""
analyze overlap between two humans
returns overlap details: score, shared values, complementary skills
"""
# parse stored json if needed
signals_a = human_a.get('signals', [])
if isinstance(signals_a, str):
signals_a = json.loads(signals_a)
signals_b = human_b.get('signals', [])
if isinstance(signals_b, str):
signals_b = json.loads(signals_b)
extra_a = human_a.get('extra', {})
if isinstance(extra_a, str):
extra_a = json.loads(extra_a)
extra_b = human_b.get('extra', {})
if isinstance(extra_b, str):
extra_b = json.loads(extra_b)
# shared signals
shared_signals = list(set(signals_a) & set(signals_b))
# shared topics
topics_a = set(extra_a.get('topics', []))
topics_b = set(extra_b.get('topics', []))
shared_topics = list(topics_a & topics_b)
# complementary skills (what one has that the other doesn't)
langs_a = set(extra_a.get('languages', {}).keys())
langs_b = set(extra_b.get('languages', {}).keys())
complementary_langs = list((langs_a - langs_b) | (langs_b - langs_a))
# geographic compatibility
loc_a = human_a.get('location', '').lower() if human_a.get('location') else ''
loc_b = human_b.get('location', '').lower() if human_b.get('location') else ''
pnw_keywords = ['seattle', 'portland', 'washington', 'oregon', 'pnw', 'cascadia', 'pacific northwest']
remote_keywords = ['remote', 'anywhere', 'distributed']
a_pnw = any(k in loc_a for k in pnw_keywords) or 'pnw' in signals_a
b_pnw = any(k in loc_b for k in pnw_keywords) or 'pnw' in signals_b
a_remote = any(k in loc_a for k in remote_keywords) or 'remote' in signals_a
b_remote = any(k in loc_b for k in remote_keywords) or 'remote' in signals_b
geographic_match = False
geo_reason = None
if a_pnw and b_pnw:
geographic_match = True
geo_reason = 'both in pnw'
elif (a_pnw or b_pnw) and (a_remote or b_remote):
geographic_match = True
geo_reason = 'pnw + remote compatible'
elif a_remote and b_remote:
geographic_match = True
geo_reason = 'both remote-friendly'
# calculate overlap score
base_score = 0
# shared values (most important)
base_score += len(shared_signals) * 10
# shared interests
base_score += len(shared_topics) * 5
# complementary skills bonus (they can help each other)
if complementary_langs:
base_score += min(len(complementary_langs), 5) * 3
# geographic bonus
if geographic_match:
base_score += 20
# fingerprint similarity if available
fp_score = 0
if fp_a and fp_b:
fp_score = fingerprint_similarity(fp_a, fp_b) * 50
total_score = base_score + fp_score
# build reasons
overlap_reasons = []
if shared_signals:
overlap_reasons.append(f"shared values: {', '.join(shared_signals[:5])}")
if shared_topics:
overlap_reasons.append(f"shared interests: {', '.join(shared_topics[:5])}")
if geo_reason:
overlap_reasons.append(geo_reason)
if complementary_langs:
overlap_reasons.append(f"complementary skills: {', '.join(complementary_langs[:5])}")
return {
'overlap_score': total_score,
'shared_signals': shared_signals,
'shared_topics': shared_topics,
'complementary_skills': complementary_langs,
'geographic_match': geographic_match,
'geo_reason': geo_reason,
'overlap_reasons': overlap_reasons,
'fingerprint_similarity': fp_score / 50 if fp_a and fp_b else None,
}
def is_same_person(human_a, human_b):
"""
check if two records might be the same person (cross-platform)
"""
# same platform = definitely different records
if human_a['platform'] == human_b['platform']:
return False
# check username similarity
user_a = human_a.get('username', '').lower().split('@')[0]
user_b = human_b.get('username', '').lower().split('@')[0]
if user_a == user_b:
return True
# check if github username matches
contact_a = human_a.get('contact', {})
contact_b = human_b.get('contact', {})
if isinstance(contact_a, str):
contact_a = json.loads(contact_a)
if isinstance(contact_b, str):
contact_b = json.loads(contact_b)
# github cross-reference
if contact_a.get('github') and contact_a.get('github') == contact_b.get('github'):
return True
if contact_a.get('github') == user_b or contact_b.get('github') == user_a:
return True
# email cross-reference
if contact_a.get('email') and contact_a.get('email') == contact_b.get('email'):
return True
return False

137
connectd/matchd/rank.py Normal file
View file

@ -0,0 +1,137 @@
"""
matchd/rank.py - score and rank match quality
"""
from itertools import combinations
from .fingerprint import generate_fingerprint
from .overlap import find_overlap, is_same_person
from scoutd.deep import check_already_connected
def rank_matches(matches):
"""
rank a list of matches by quality
returns sorted list with quality scores
"""
ranked = []
for match in matches:
# base score from overlap
score = match.get('overlap_score', 0)
# bonus for geographic match
if match.get('geographic_match'):
score *= 1.2
# bonus for high fingerprint similarity
fp_sim = match.get('fingerprint_similarity')
if fp_sim and fp_sim > 0.7:
score *= 1.3
# bonus for complementary skills
comp_skills = match.get('complementary_skills', [])
if len(comp_skills) >= 3:
score *= 1.1
match['quality_score'] = score
ranked.append(match)
# sort by quality score
ranked.sort(key=lambda x: x['quality_score'], reverse=True)
return ranked
def find_all_matches(db, min_score=30, min_overlap=20):
"""
find all potential matches from database
returns list of match dicts
"""
print("matchd: finding all potential matches...")
# get all humans above threshold
humans = db.get_all_humans(min_score=min_score)
print(f" {len(humans)} humans to match")
# generate fingerprints
fingerprints = {}
for human in humans:
fp = generate_fingerprint(human)
fingerprints[human['id']] = fp
db.save_fingerprint(human['id'], fp)
print(f" generated {len(fingerprints)} fingerprints")
# find all pairs
matches = []
checked = 0
skipped_same = 0
skipped_connected = 0
for human_a, human_b in combinations(humans, 2):
checked += 1
# skip if likely same person
if is_same_person(human_a, human_b):
skipped_same += 1
continue
# skip if already connected (same org, company, co-contributors)
connected, reason = check_already_connected(human_a, human_b)
if connected:
skipped_connected += 1
continue
# calculate overlap
fp_a = fingerprints.get(human_a['id'])
fp_b = fingerprints.get(human_b['id'])
overlap = find_overlap(human_a, human_b, fp_a, fp_b)
if overlap['overlap_score'] >= min_overlap:
match = {
'human_a': human_a,
'human_b': human_b,
**overlap
}
matches.append(match)
# save to db
db.save_match(human_a['id'], human_b['id'], overlap)
if checked % 1000 == 0:
print(f" checked {checked} pairs, {len(matches)} matches so far...")
print(f" checked {checked} pairs")
print(f" skipped {skipped_same} (same person), {skipped_connected} (already connected)")
print(f" found {len(matches)} potential matches")
# rank them
ranked = rank_matches(matches)
return ranked
def get_top_matches(db, limit=50):
"""
get top matches from database
"""
match_rows = db.get_matches(limit=limit)
matches = []
for row in match_rows:
human_a = db.get_human_by_id(row['human_a_id'])
human_b = db.get_human_by_id(row['human_b_id'])
if human_a and human_b:
matches.append({
'id': row['id'],
'human_a': human_a,
'human_b': human_b,
'overlap_score': row['overlap_score'],
'overlap_reasons': row['overlap_reasons'],
'geographic_match': row['geographic_match'],
'status': row['status'],
})
return matches

3
connectd/repository.yaml Normal file
View file

@ -0,0 +1,3 @@
name: connectd add-ons
url: https://github.com/sudoxnym/connectd
maintainer: sudoxnym

View file

@ -0,0 +1,2 @@
requests>=2.28.0
beautifulsoup4>=4.12.0

45
connectd/run.sh Normal file
View file

@ -0,0 +1,45 @@
#!/usr/bin/with-contenv bashio
# shellcheck shell=bash
# read options from add-on config
export HOST_USER=$(bashio::config 'host_user')
export HOST_NAME=$(bashio::config 'host_name')
export HOST_EMAIL=$(bashio::config 'host_email')
export HOST_MASTODON=$(bashio::config 'host_mastodon')
export HOST_REDDIT=$(bashio::config 'host_reddit')
export HOST_LEMMY=$(bashio::config 'host_lemmy')
export HOST_LOBSTERS=$(bashio::config 'host_lobsters')
export HOST_MATRIX=$(bashio::config 'host_matrix')
export HOST_DISCORD=$(bashio::config 'host_discord')
export HOST_BLUESKY=$(bashio::config 'host_bluesky')
export HOST_LOCATION=$(bashio::config 'host_location')
export HOST_INTERESTS=$(bashio::config 'host_interests')
export HOST_LOOKING_FOR=$(bashio::config 'host_looking_for')
export GITHUB_TOKEN=$(bashio::config 'github_token')
export GROQ_API_KEY=$(bashio::config 'groq_api_key')
export MASTODON_TOKEN=$(bashio::config 'mastodon_token')
export MASTODON_INSTANCE=$(bashio::config 'mastodon_instance')
export DISCORD_BOT_TOKEN=$(bashio::config 'discord_bot_token')
export DISCORD_TARGET_SERVERS=$(bashio::config 'discord_target_servers')
export LEMMY_INSTANCE=$(bashio::config 'lemmy_instance')
export LEMMY_USERNAME=$(bashio::config 'lemmy_username')
export LEMMY_PASSWORD=$(bashio::config 'lemmy_password')
export SMTP_HOST=$(bashio::config 'smtp_host')
export SMTP_PORT=$(bashio::config 'smtp_port')
export SMTP_USER=$(bashio::config 'smtp_user')
export SMTP_PASS=$(bashio::config 'smtp_pass')
# set data paths
export DB_PATH=/data/db/connectd.db
export CACHE_DIR=/data/cache
bashio::log.info "starting connectd daemon..."
bashio::log.info "HOST_USER: ${HOST_USER}"
cd /app
exec python3 daemon.py

View file

@ -0,0 +1,29 @@
"""
scoutd - discovery module
finds humans across platforms
"""
from .github import scrape_github, get_github_user
from .reddit import scrape_reddit
from .mastodon import scrape_mastodon
from .lobsters import scrape_lobsters
from .matrix import scrape_matrix
from .twitter import scrape_twitter
from .bluesky import scrape_bluesky
from .lemmy import scrape_lemmy
from .discord import scrape_discord, send_discord_dm
from .deep import (
deep_scrape_github_user, check_already_connected, save_deep_profile,
determine_contact_method, get_cached_orgs, cache_orgs,
get_emails_from_commit_history, scrape_website_for_emails,
)
__all__ = [
'scrape_github', 'scrape_reddit', 'scrape_mastodon', 'scrape_lobsters',
'scrape_matrix', 'scrape_twitter', 'scrape_bluesky', 'scrape_lemmy',
'scrape_discord', 'send_discord_dm',
'get_github_user', 'deep_scrape_github_user',
'check_already_connected', 'save_deep_profile', 'determine_contact_method',
'get_cached_orgs', 'cache_orgs', 'get_emails_from_commit_history',
'scrape_website_for_emails',
]

216
connectd/scoutd/bluesky.py Normal file
View file

@ -0,0 +1,216 @@
"""
scoutd/bluesky.py - bluesky/atproto discovery
bluesky has an open API via AT Protocol - no auth needed for public data
many twitter refugees landed here, good source for aligned builders
"""
import requests
import json
import time
from datetime import datetime
from pathlib import Path
from .signals import analyze_text
HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'bluesky'
# public bluesky API
BSKY_API = 'https://public.api.bsky.app'
# hashtags to search
ALIGNED_HASHTAGS = [
'selfhosted', 'homelab', 'homeassistant', 'foss', 'opensource',
'privacy', 'solarpunk', 'cooperative', 'mutualaid', 'localfirst',
'indieweb', 'smallweb', 'permacomputing', 'techworkers', 'coops',
]
def _api_get(endpoint, params=None):
"""rate-limited API request with caching"""
url = f"{BSKY_API}{endpoint}"
cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
if cache_file.exists():
try:
data = json.loads(cache_file.read_text())
if time.time() - data.get('_cached_at', 0) < 3600:
return data.get('_data')
except:
pass
time.sleep(0.5) # rate limit
try:
resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
resp.raise_for_status()
result = resp.json()
cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
return result
except requests.exceptions.RequestException as e:
print(f" bluesky api error: {e}")
return None
def search_posts(query, limit=50):
"""search for posts containing query"""
result = _api_get('/xrpc/app.bsky.feed.searchPosts', {
'q': query,
'limit': min(limit, 100),
})
if not result:
return []
posts = result.get('posts', [])
return posts
def get_profile(handle):
"""get user profile by handle (e.g., user.bsky.social)"""
result = _api_get('/xrpc/app.bsky.actor.getProfile', {'actor': handle})
return result
def get_author_feed(handle, limit=30):
"""get user's recent posts"""
result = _api_get('/xrpc/app.bsky.feed.getAuthorFeed', {
'actor': handle,
'limit': limit,
})
if not result:
return []
return result.get('feed', [])
def analyze_bluesky_user(handle):
"""analyze a bluesky user for alignment"""
profile = get_profile(handle)
if not profile:
return None
# collect text
text_parts = []
# bio/description
description = profile.get('description', '')
if description:
text_parts.append(description)
display_name = profile.get('displayName', '')
if display_name:
text_parts.append(display_name)
# recent posts
feed = get_author_feed(handle, limit=20)
for item in feed:
post = item.get('post', {})
record = post.get('record', {})
text = record.get('text', '')
if text:
text_parts.append(text)
full_text = ' '.join(text_parts)
text_score, positive_signals, negative_signals = analyze_text(full_text)
# bluesky bonus (decentralized, values-aligned platform choice)
platform_bonus = 10
total_score = text_score + platform_bonus
# activity bonus
followers = profile.get('followersCount', 0)
posts_count = profile.get('postsCount', 0)
if posts_count >= 100:
total_score += 5
if followers >= 100:
total_score += 5
# confidence
confidence = 0.35 # base for bluesky (better signal than twitter)
if len(text_parts) > 5:
confidence += 0.2
if len(positive_signals) >= 3:
confidence += 0.2
if posts_count >= 50:
confidence += 0.1
confidence = min(confidence, 0.85)
reasons = ['on bluesky (atproto)']
if positive_signals:
reasons.append(f"signals: {', '.join(positive_signals[:5])}")
if negative_signals:
reasons.append(f"WARNING: {', '.join(negative_signals)}")
return {
'platform': 'bluesky',
'username': handle,
'url': f"https://bsky.app/profile/{handle}",
'name': display_name or handle,
'bio': description,
'score': total_score,
'confidence': confidence,
'signals': positive_signals,
'negative_signals': negative_signals,
'followers': followers,
'posts_count': posts_count,
'reasons': reasons,
'contact': {
'bluesky': handle,
},
'scraped_at': datetime.now().isoformat(),
}
def scrape_bluesky(db, limit_per_hashtag=30):
"""full bluesky scrape"""
print("scoutd/bluesky: starting scrape...")
all_users = {}
for hashtag in ALIGNED_HASHTAGS:
print(f" #{hashtag}...")
# search for hashtag
posts = search_posts(f"#{hashtag}", limit=limit_per_hashtag)
for post in posts:
author = post.get('author', {})
handle = author.get('handle')
if handle and handle not in all_users:
all_users[handle] = {
'handle': handle,
'display_name': author.get('displayName'),
'hashtags': [hashtag],
}
elif handle:
all_users[handle]['hashtags'].append(hashtag)
print(f" found {len(posts)} posts")
# prioritize users in multiple hashtags
multi_hashtag = {h: d for h, d in all_users.items() if len(d.get('hashtags', [])) >= 2}
print(f" {len(multi_hashtag)} users in 2+ aligned hashtags")
# analyze
results = []
for handle in list(multi_hashtag.keys())[:100]:
try:
result = analyze_bluesky_user(handle)
if result and result['score'] > 0:
results.append(result)
db.save_human(result)
if result['score'] >= 30:
print(f" ★ @{handle}: {result['score']} pts")
except Exception as e:
print(f" error on {handle}: {e}")
print(f"scoutd/bluesky: found {len(results)} aligned humans")
return results

966
connectd/scoutd/deep.py Normal file
View file

@ -0,0 +1,966 @@
"""
scoutd/deep.py - deep profile discovery
when we find someone, follow ALL their links to build complete picture
github profile -> mastodon link -> scrape mastodon
-> website -> scrape for more links
-> twitter handle -> note it
-> email -> store it
email discovery sources:
- github profile (if public)
- git commit history
- personal website/blog contact page
- README "contact me" sections
- mastodon/twitter bio
fallback contact methods if no email:
- github_issue: open issue on their repo
- mastodon: DM if allowed
- manual: pending contact queue for review
also filters out people who clearly already know each other
(same org, co-contributors to same repos)
"""
import re
import json
import requests
import time
import subprocess
import tempfile
import shutil
from datetime import datetime
from urllib.parse import urlparse
from pathlib import Path
from .signals import analyze_text
from .github import get_github_user, get_user_repos, _api_get as github_api
from .mastodon import analyze_mastodon_user, _api_get as mastodon_api
from .handles import discover_all_handles, extract_handles_from_text, scrape_website_for_handles
# local cache for org memberships
ORG_CACHE_FILE = Path(__file__).parent.parent / 'data' / 'org_cache.json'
_org_cache = None
# patterns to find social links in text
MASTODON_PATTERN = r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-z]{2,})'
TWITTER_PATTERN = r'(?:twitter\.com/|x\.com/)([a-zA-Z0-9_]+)'
GITHUB_PATTERN = r'github\.com/([a-zA-Z0-9_-]+)'
MATRIX_PATTERN = r'@([a-zA-Z0-9_]+):([a-zA-Z0-9.-]+)'
EMAIL_PATTERN = r'\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b'
# known mastodon instances for validation
KNOWN_INSTANCES = [
'mastodon.social', 'fosstodon.org', 'tech.lgbt', 'social.coop',
'hackers.town', 'hachyderm.io', 'infosec.exchange', 'chaos.social',
'mas.to', 'mstdn.social', 'mastodon.online', 'universeodon.com',
'mathstodon.xyz', 'ruby.social', 'functional.cafe', 'types.pl',
]
# contact page patterns for website scraping
CONTACT_PAGE_PATHS = [
'/contact', '/contact/', '/contact.html',
'/about', '/about/', '/about.html',
'/connect', '/reach-out', '/hire', '/hire-me',
]
# patterns to find emails in contact sections
CONTACT_SECTION_PATTERNS = [
r'(?:contact|email|reach|mail)[:\s]+([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
r'([a-zA-Z0-9._%+-]+)\s*(?:\[at\]|\(at\)|@)\s*([a-zA-Z0-9.-]+)\s*(?:\[dot\]|\(dot\)|\.)\s*([a-zA-Z]{2,})',
]
def load_org_cache():
"""load org membership cache from disk"""
global _org_cache
if _org_cache is not None:
return _org_cache
try:
ORG_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
if ORG_CACHE_FILE.exists():
with open(ORG_CACHE_FILE) as f:
_org_cache = json.load(f)
else:
_org_cache = {'users': {}, 'updated': {}}
except:
_org_cache = {'users': {}, 'updated': {}}
return _org_cache
def save_org_cache():
"""save org membership cache to disk"""
global _org_cache
if _org_cache is None:
return
try:
ORG_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(ORG_CACHE_FILE, 'w') as f:
json.dump(_org_cache, f, indent=2)
except:
pass
def get_cached_orgs(username):
"""get orgs from cache if available and fresh (< 7 days old)"""
cache = load_org_cache()
if username not in cache['users']:
return None
updated = cache['updated'].get(username)
if updated:
updated_dt = datetime.fromisoformat(updated)
if (datetime.now() - updated_dt).days < 7:
return cache['users'][username]
return None
def cache_orgs(username, orgs):
"""cache org membership for a user"""
cache = load_org_cache()
cache['users'][username] = orgs
cache['updated'][username] = datetime.now().isoformat()
save_org_cache()
def get_emails_from_commit_history(repo_url, limit=50):
"""
clone a repo (shallow) and extract unique committer emails from git log
"""
emails = set()
try:
# create temp dir
with tempfile.TemporaryDirectory() as tmpdir:
# shallow clone with limited depth
result = subprocess.run(
['git', 'clone', '--depth', '50', '--single-branch', repo_url, tmpdir],
capture_output=True,
text=True,
timeout=30
)
if result.returncode != 0:
return []
# get unique emails from commit log
result = subprocess.run(
['git', 'log', f'--max-count={limit}', '--format=%ae'],
cwd=tmpdir,
capture_output=True,
text=True,
timeout=10
)
if result.returncode == 0:
for email in result.stdout.strip().split('\n'):
email = email.strip().lower()
# filter out bot/noreply emails
if email and not any(x in email for x in [
'noreply', 'no-reply', 'dependabot', 'github-actions',
'renovate', 'greenkeeper', 'snyk-bot', 'users.noreply.github'
]):
emails.add(email)
except (subprocess.TimeoutExpired, Exception):
pass
return list(emails)
def scrape_website_for_emails(url, timeout=10):
"""
scrape a personal website for email addresses
checks main page and common contact pages
"""
emails = set()
if not is_personal_website(url):
return []
headers = {'User-Agent': 'connectd/1.0 (looking for contact info)'}
# normalize url
if not url.startswith('http'):
url = 'https://' + url
base_url = url.rstrip('/')
# pages to check
pages_to_check = [base_url] + [base_url + path for path in CONTACT_PAGE_PATHS]
for page_url in pages_to_check:
try:
resp = requests.get(page_url, timeout=timeout, headers=headers)
if resp.status_code == 200:
text = resp.text
# standard email pattern
for match in re.finditer(EMAIL_PATTERN, text):
email = match.group(0).lower()
if not any(x in email for x in ['noreply', 'no-reply', 'example.com', 'users.noreply']):
emails.add(email)
# obfuscated email patterns like "user [at] domain [dot] com"
for pattern in CONTACT_SECTION_PATTERNS:
for match in re.finditer(pattern, text, re.IGNORECASE):
if len(match.groups()) == 3:
email = f"{match.group(1)}@{match.group(2)}.{match.group(3)}".lower()
emails.add(email)
elif len(match.groups()) == 1:
emails.add(match.group(1).lower())
# mailto: links
for match in re.finditer(r'mailto:([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text):
emails.add(match.group(1).lower())
except:
continue
return list(emails)
def extract_emails_from_readme(text):
"""
extract emails from README text, looking for contact sections
"""
emails = set()
if not text:
return []
# look for contact-related sections
contact_patterns = [
r'(?:##?\s*)?(?:contact|reach|email|get in touch|connect)[^\n]*\n([^\n#]+)',
r'(?:email|contact|reach me)[:\s]+([^\n]+)',
]
for pattern in contact_patterns:
for match in re.finditer(pattern, text, re.IGNORECASE):
section = match.group(1)
# extract emails from this section
for email_match in re.finditer(EMAIL_PATTERN, section):
email = email_match.group(0).lower()
if not any(x in email for x in ['noreply', 'no-reply', 'example.com']):
emails.add(email)
# also check for obfuscated emails
for match in re.finditer(r'([a-zA-Z0-9._%+-]+)\s*(?:\[at\]|\(at\))\s*([a-zA-Z0-9.-]+)\s*(?:\[dot\]|\(dot\))\s*([a-zA-Z]{2,})', text, re.IGNORECASE):
email = f"{match.group(1)}@{match.group(2)}.{match.group(3)}".lower()
emails.add(email)
return list(emails)
def get_mastodon_dm_allowed(handle):
"""check if a mastodon user allows DMs"""
profile = get_mastodon_profile(handle)
if not profile:
return False
# check if they're locked (requires follow approval)
if profile.get('locked'):
return False
# check bio for "DMs open" type messages
bio = (profile.get('note') or profile.get('summary') or '').lower()
if any(x in bio for x in ['dms open', 'dm me', 'message me', 'dms welcome']):
return True
# default: assume open if not locked
return True
def determine_contact_method(profile):
"""
determine the best way to contact someone
returns (method, details) where method is one of:
- 'email': direct email contact
- 'github_issue': open issue on their repo
- 'mastodon': DM on mastodon
- 'manual': needs manual review
"""
# prefer email
if profile.get('email'):
return 'email', {'email': profile['email']}
# check for multiple emails to pick from
if profile.get('emails') and len(profile['emails']) > 0:
# prefer non-github, non-work emails
for email in profile['emails']:
if not any(x in email.lower() for x in ['github', 'noreply', '@company', '@corp']):
return 'email', {'email': email}
# fall back to first one
return 'email', {'email': profile['emails'][0]}
# try mastodon DM
if profile.get('mastodon'):
handles = profile['mastodon'] if isinstance(profile['mastodon'], list) else [profile['mastodon']]
for handle in handles:
if get_mastodon_dm_allowed(handle):
return 'mastodon', {'handle': handle}
# try github issue on their most-starred repo
if profile.get('top_repos'):
# find repo with issues enabled and good stars
for repo in sorted(profile['top_repos'], key=lambda r: r.get('stars', 0), reverse=True):
if repo.get('stars', 0) >= 10:
repo_name = repo.get('name')
if repo_name:
return 'github_issue', {
'repo': f"{profile['username']}/{repo_name}",
'stars': repo.get('stars'),
}
# manual review needed
return 'manual', {
'reason': 'no email, mastodon, or suitable repo found',
'available': {
'twitter': profile.get('twitter'),
'websites': profile.get('websites'),
'matrix': profile.get('matrix'),
}
}
def extract_links_from_text(text):
"""extract social links from bio/readme text"""
if not text:
return {}
links = {
'mastodon': [],
'twitter': [],
'github': [],
'matrix': [],
'email': [],
'websites': [],
}
# mastodon handles - only accept known instances or ones with 'mastodon'/'social' in name
for match in re.finditer(MASTODON_PATTERN, text):
user, instance = match.groups()
instance_lower = instance.lower()
# validate it's a known instance or looks like one
is_known = instance_lower in KNOWN_INSTANCES
looks_like_masto = any(x in instance_lower for x in ['mastodon', 'social', 'fedi', '.town', '.cafe'])
if is_known or looks_like_masto:
links['mastodon'].append(f"{user}@{instance}")
# twitter
for match in re.finditer(TWITTER_PATTERN, text, re.IGNORECASE):
links['twitter'].append(match.group(1))
# github (for cross-referencing)
for match in re.finditer(GITHUB_PATTERN, text, re.IGNORECASE):
links['github'].append(match.group(1))
# matrix
for match in re.finditer(MATRIX_PATTERN, text):
user, server = match.groups()
links['matrix'].append(f"@{user}:{server}")
# email
for match in re.finditer(EMAIL_PATTERN, text):
email = match.group(0)
# filter out obvious non-personal emails
if not any(x in email.lower() for x in ['noreply', 'no-reply', 'example.com', 'users.noreply']):
links['email'].append(email)
# websites (http/https links that aren't social platforms)
url_pattern = r'https?://([a-zA-Z0-9.-]+\.[a-z]{2,})[/\w.-]*'
for match in re.finditer(url_pattern, text):
domain = match.group(1).lower()
if not any(x in domain for x in ['github.com', 'twitter.com', 'mastodon', 'linkedin.com', 't.co']):
links['websites'].append(match.group(0))
# dedupe
for key in links:
links[key] = list(set(links[key]))
return links
def is_personal_website(url):
"""check if URL looks like a personal website vs corporate site"""
domain = urlparse(url).netloc.lower()
# skip obvious corporate/platform sites
skip_domains = [
'github.com', 'gitlab.com', 'bitbucket.org',
'twitter.com', 'x.com', 'linkedin.com', 'facebook.com',
'youtube.com', 'medium.com', 'dev.to', 'hashnode.com',
'wedo.com', 'google.com', 'microsoft.com', 'apple.com',
'amazon.com', 'stackoverflow.com', 'reddit.com',
]
if any(skip in domain for skip in skip_domains):
return False
# looks personal if: short domain, has common personal TLDs, contains username-like string
personal_tlds = ['.io', '.dev', '.me', '.co', '.xyz', '.page', '.codes', '.software']
if any(domain.endswith(tld) for tld in personal_tlds):
return True
# if domain is just name.com or similar
parts = domain.replace('www.', '').split('.')
if len(parts) == 2 and len(parts[0]) < 20:
return True
return False
def scrape_website_for_links(url, timeout=10):
"""scrape a personal website for more social links"""
if not is_personal_website(url):
return {}
try:
resp = requests.get(url, timeout=timeout, headers={'User-Agent': 'connectd/1.0'})
resp.raise_for_status()
return extract_links_from_text(resp.text)
except:
return {}
def get_mastodon_profile(handle):
"""
fetch mastodon profile from handle like user@instance
returns profile data or None
"""
if '@' not in handle:
return None
parts = handle.split('@')
if len(parts) == 2:
user, instance = parts
elif len(parts) == 3 and parts[0] == '':
# @user@instance format
user, instance = parts[1], parts[2]
else:
return None
# try to look up via webfinger
try:
webfinger_url = f"https://{instance}/.well-known/webfinger"
resp = requests.get(
webfinger_url,
params={'resource': f'acct:{user}@{instance}'},
timeout=10,
headers={'Accept': 'application/json'}
)
if resp.status_code == 200:
data = resp.json()
# find the profile link
for link in data.get('links', []):
if link.get('type') == 'application/activity+json':
profile_url = link.get('href')
# fetch the profile
profile_resp = requests.get(
profile_url,
timeout=10,
headers={'Accept': 'application/activity+json'}
)
if profile_resp.status_code == 200:
return profile_resp.json()
except:
pass
# fallback: try direct API
try:
search_url = f"https://{instance}/api/v1/accounts/lookup"
resp = requests.get(search_url, params={'acct': user}, timeout=10)
if resp.status_code == 200:
return resp.json()
except:
pass
return None
def deep_scrape_github_user(login, scrape_commits=True):
"""
deep scrape a github user - follow all links, build complete profile
email discovery sources:
1. github profile (if public)
2. git commit history (if scrape_commits=True)
3. personal website/blog contact pages
4. README "contact me" sections
5. mastodon bio
"""
print(f" deep scraping {login}...")
user = get_github_user(login)
if not user:
return None
repos = get_user_repos(login, per_page=50)
# collect all text to search for links
all_text = []
readme_text = None
if user.get('bio'):
all_text.append(user['bio'])
if user.get('blog'):
all_text.append(user['blog'])
if user.get('company'):
all_text.append(user['company'])
# check readme of profile repo (username/username)
for branch in ['main', 'master']:
readme_url = f"https://raw.githubusercontent.com/{login}/{login}/{branch}/README.md"
try:
resp = requests.get(readme_url, timeout=10)
if resp.status_code == 200:
readme_text = resp.text
all_text.append(readme_text)
break
except:
pass
# extract links from all collected text
combined_text = '\n'.join(all_text)
found_links = extract_links_from_text(combined_text)
# ensure all keys exist
for key in ['email', 'twitter', 'github', 'matrix', 'mastodon', 'websites']:
if key not in found_links:
found_links[key] = []
# add explicit github fields
if user.get('email'):
found_links['email'].append(user['email'])
if user.get('twitter_username'):
found_links['twitter'].append(user['twitter_username'])
if user.get('blog'):
found_links['websites'].append(user['blog'])
# EMAIL DISCOVERY: extract emails from README contact sections
if readme_text:
readme_emails = extract_emails_from_readme(readme_text)
found_links['email'].extend(readme_emails)
if readme_emails:
print(f" found {len(readme_emails)} email(s) in README")
# dedupe
for key in found_links:
found_links[key] = list(set(found_links[key]))
# now follow the links to gather more data
profile = {
'source': 'github',
'username': login,
'url': f"https://github.com/{login}",
'real_name': user.get('name'),
'bio': user.get('bio'),
'location': user.get('location'),
'company': user.get('company'),
'hireable': user.get('hireable'),
'created_at': user.get('created_at'),
'public_repos': user.get('public_repos'),
'followers': user.get('followers'),
# contact points
'email': found_links['email'][0] if found_links['email'] else user.get('email'),
'emails': list(found_links['email']),
'twitter': found_links['twitter'][0] if found_links['twitter'] else user.get('twitter_username'),
'mastodon': found_links['mastodon'],
'matrix': found_links['matrix'],
'websites': found_links['websites'],
# cross-platform profiles we find
'linked_profiles': {},
# repos and languages
'top_repos': [],
'languages': {},
'topics': [],
'orgs': [],
# contact method (will be determined at end)
'contact_method': None,
'contact_details': None,
}
# analyze repos
top_starred_repo = None
for repo in repos[:30]:
if not repo.get('fork'):
repo_info = {
'name': repo.get('name'),
'description': repo.get('description'),
'stars': repo.get('stargazers_count'),
'language': repo.get('language'),
'topics': repo.get('topics', []),
'html_url': repo.get('html_url'),
'pushed_at': repo.get('pushed_at'), # for activity-based contact selection
}
profile['top_repos'].append(repo_info)
# track top starred for commit email scraping
if not top_starred_repo or repo.get('stargazers_count', 0) > top_starred_repo.get('stars', 0):
top_starred_repo = repo_info
if repo.get('language'):
lang = repo['language']
profile['languages'][lang] = profile['languages'].get(lang, 0) + 1
profile['topics'].extend(repo.get('topics', []))
profile['topics'] = list(set(profile['topics']))
# get orgs - check cache first
cached_orgs = get_cached_orgs(login)
if cached_orgs is not None:
print(f" using cached orgs: {cached_orgs}")
profile['orgs'] = cached_orgs
else:
orgs_url = f"https://api.github.com/users/{login}/orgs"
orgs_data = github_api(orgs_url) or []
profile['orgs'] = [o.get('login') for o in orgs_data]
# cache for future use
cache_orgs(login, profile['orgs'])
if profile['orgs']:
print(f" fetched & cached orgs: {profile['orgs']}")
# EMAIL DISCOVERY: scrape commit history from top repo
if scrape_commits and top_starred_repo and not profile['emails']:
repo_url = f"https://github.com/{login}/{top_starred_repo['name']}.git"
print(f" checking commit history in {top_starred_repo['name']}...")
commit_emails = get_emails_from_commit_history(repo_url)
if commit_emails:
print(f" found {len(commit_emails)} email(s) in commits")
profile['emails'].extend(commit_emails)
# follow mastodon links
for masto_handle in found_links['mastodon'][:2]: # limit to 2
print(f" following mastodon: {masto_handle}")
masto_profile = get_mastodon_profile(masto_handle)
if masto_profile:
profile['linked_profiles']['mastodon'] = {
'handle': masto_handle,
'display_name': masto_profile.get('display_name') or masto_profile.get('name'),
'bio': masto_profile.get('note') or masto_profile.get('summary'),
'followers': masto_profile.get('followers_count'),
'url': masto_profile.get('url'),
'locked': masto_profile.get('locked', False),
}
# extract more links from mastodon bio
masto_bio = masto_profile.get('note') or masto_profile.get('summary') or ''
masto_links = extract_links_from_text(masto_bio)
profile['emails'].extend(masto_links.get('email', []))
profile['websites'].extend(masto_links.get('websites', []))
# EMAIL DISCOVERY: scrape personal website for contact info
for website in found_links['websites'][:2]: # check up to 2 sites
print(f" following website: {website}")
# basic link extraction
site_links = scrape_website_for_links(website)
if site_links.get('mastodon') and not profile['mastodon']:
profile['mastodon'] = site_links['mastodon']
# enhanced email discovery - check contact pages
website_emails = scrape_website_for_emails(website)
if website_emails:
print(f" found {len(website_emails)} email(s) on website")
profile['emails'].extend(website_emails)
# dedupe emails and pick best one
profile['emails'] = list(set(profile['emails']))
# rank emails by preference
def email_score(email):
email_lower = email.lower()
score = 0
# prefer personal domains
if any(x in email_lower for x in ['@gmail', '@proton', '@hey.com', '@fastmail']):
score += 10
# deprioritize github emails
if 'github' in email_lower:
score -= 20
# deprioritize noreply
if 'noreply' in email_lower:
score -= 50
# prefer emails matching username
if login.lower() in email_lower:
score += 5
return score
if profile['emails']:
profile['emails'].sort(key=email_score, reverse=True)
profile['email'] = profile['emails'][0]
# COMPREHENSIVE HANDLE DISCOVERY
# find ALL social handles from website, README, rel="me" links, etc.
discovered_handles, discovered_emails = discover_all_handles(user)
# merge discovered handles into profile
profile['handles'] = discovered_handles
# update individual fields from discovered handles
if discovered_handles.get('mastodon') and not profile.get('mastodon'):
profile['mastodon'] = discovered_handles['mastodon']
if discovered_handles.get('twitter') and not profile.get('twitter'):
profile['twitter'] = discovered_handles['twitter']
if discovered_handles.get('bluesky'):
profile['bluesky'] = discovered_handles['bluesky']
if discovered_handles.get('matrix') and not profile.get('matrix'):
profile['matrix'] = discovered_handles['matrix']
if discovered_handles.get('linkedin'):
profile['linkedin'] = discovered_handles['linkedin']
if discovered_handles.get('youtube'):
profile['youtube'] = discovered_handles['youtube']
if discovered_handles.get('discord'):
profile['discord'] = discovered_handles['discord']
if discovered_handles.get('telegram'):
profile['telegram'] = discovered_handles['telegram']
# merge discovered emails
for email in discovered_emails:
if email not in profile['emails']:
profile['emails'].append(email)
print(f" handles found: {list(discovered_handles.keys())}")
# determine best contact method
contact_method, contact_details = determine_contact_method(profile)
profile['contact_method'] = contact_method
profile['contact_details'] = contact_details
print(f" contact method: {contact_method}")
# analyze all text for signals
all_profile_text = ' '.join([
profile.get('bio') or '',
profile.get('company') or '',
profile.get('location') or '',
' '.join(profile.get('topics', [])),
])
for linked in profile.get('linked_profiles', {}).values():
if linked.get('bio'):
all_profile_text += ' ' + linked['bio']
text_score, signals, negative = analyze_text(all_profile_text)
profile['signals'] = signals
profile['negative_signals'] = negative
profile['score'] = text_score
# add builder score
if len(repos) > 20:
profile['score'] += 15
elif len(repos) > 10:
profile['score'] += 10
# add topic alignment
from .signals import TARGET_TOPICS
aligned_topics = set(profile['topics']) & set(TARGET_TOPICS)
profile['score'] += len(aligned_topics) * 10
profile['aligned_topics'] = list(aligned_topics)
profile['scraped_at'] = datetime.now().isoformat()
return profile
def check_mutual_github_follows(user_a, user_b):
"""check if two github users follow each other"""
# check if a follows b
url = f"https://api.github.com/users/{user_a}/following/{user_b}"
try:
resp = requests.get(url, timeout=10, headers={'Accept': 'application/vnd.github.v3+json'})
if resp.status_code == 204: # 204 = follows
return True
except:
pass
return False
def check_shared_repo_contributions(user_a, user_b):
"""
check if two users have contributed to the same repos
returns (bool, list of shared repos)
"""
# this would require checking contribution history
# for now, we check via the orgs and top_repos stored in extra
# the full implementation would query:
# GET /repos/{owner}/{repo}/contributors for their top repos
return False, []
def check_github_interactions(user_a, user_b):
"""
check if users have had public interactions
(comments on each other's issues/PRs)
this is expensive - only do for high-score matches
"""
# would need to search:
# GET /search/issues?q=author:{user_a}+commenter:{user_b}
# GET /search/issues?q=author:{user_b}+commenter:{user_a}
return False
def check_already_connected(human_a, human_b, deep_check=False):
"""
check if two humans are likely already connected
(same org, co-contributors, mutual follows, interactions)
connectd's job is connecting ISOLATED builders, not re-introducing coworkers
"""
# parse extra data if stored as json string
extra_a = human_a.get('extra', {})
extra_b = human_b.get('extra', {})
if isinstance(extra_a, str):
extra_a = json.loads(extra_a) if extra_a else {}
if isinstance(extra_b, str):
extra_b = json.loads(extra_b) if extra_b else {}
# 1. same github org - check cache first, then stored data
orgs_a = set(extra_a.get('orgs', []))
orgs_b = set(extra_b.get('orgs', []))
# also check org cache for fresher data
if human_a.get('platform') == 'github':
cached_a = get_cached_orgs(human_a.get('username', ''))
if cached_a:
orgs_a.update(cached_a)
if human_b.get('platform') == 'github':
cached_b = get_cached_orgs(human_b.get('username', ''))
if cached_b:
orgs_b.update(cached_b)
shared_orgs = orgs_a & orgs_b
if shared_orgs:
return True, f"same org: {', '.join(list(shared_orgs)[:3])}"
# 2. same company
company_a = (extra_a.get('company') or '').lower().strip('@').strip()
company_b = (extra_b.get('company') or '').lower().strip('@').strip()
if company_a and company_b and len(company_a) > 2:
if company_a == company_b or company_a in company_b or company_b in company_a:
return True, f"same company: {company_a or company_b}"
# 3. co-contributors to same major repos (from stored top_repos)
repos_a = set()
repos_b = set()
for r in extra_a.get('top_repos', []):
if r.get('stars', 0) > 50: # only significant repos
repos_a.add(r.get('name', '').lower())
for r in extra_b.get('top_repos', []):
if r.get('stars', 0) > 50:
repos_b.add(r.get('name', '').lower())
shared_repos = repos_a & repos_b
if len(shared_repos) >= 2:
return True, f"co-contributors: {', '.join(list(shared_repos)[:3])}"
# 4. deep checks (more API calls - only if requested)
if deep_check:
user_a = human_a.get('username', '')
user_b = human_b.get('username', '')
# check mutual follows
if human_a.get('platform') == 'github' and human_b.get('platform') == 'github':
if check_mutual_github_follows(user_a, user_b):
return True, "mutual github follows"
if check_mutual_github_follows(user_b, user_a):
return True, "mutual github follows"
return False, None
def save_deep_profile(db, profile):
"""save a deep-scraped profile to the database"""
# convert to standard human format
# IMPORTANT: extra field contains ALL data for activity-based contact selection
human_data = {
'platform': profile['source'],
'username': profile['username'],
'url': profile['url'],
'name': profile.get('real_name'),
'bio': profile.get('bio'),
'location': profile.get('location'),
'score': profile.get('score', 0),
'confidence': 0.8 if profile.get('linked_profiles') else 0.5,
'signals': profile.get('signals', []),
'negative_signals': profile.get('negative_signals', []),
'reasons': [],
'contact': {
'email': profile.get('email'),
'emails': profile.get('emails', []),
'twitter': profile.get('twitter'),
'mastodon': profile.get('mastodon'),
'matrix': profile.get('matrix'),
'websites': profile.get('websites'),
'contact_method': profile.get('contact_method'),
'contact_details': profile.get('contact_details'),
},
'extra': {
# identity
'real_name': profile.get('real_name'),
'company': profile.get('company'),
'hireable': profile.get('hireable'),
'orgs': profile.get('orgs'),
# github activity (for activity-based contact)
'top_repos': profile.get('top_repos'),
'languages': profile.get('languages'),
'topics': profile.get('topics'),
'aligned_topics': profile.get('aligned_topics'),
'followers': profile.get('followers'),
'public_repos': profile.get('public_repos'),
'commit_count': len(profile.get('emails', [])), # rough proxy
# cross-platform links (for activity-based contact)
'email': profile.get('email'),
'emails': profile.get('emails', []),
'twitter': profile.get('twitter'),
'mastodon': profile.get('mastodon'),
'matrix': profile.get('matrix'),
'bluesky': profile.get('bluesky'),
'reddit': profile.get('reddit'),
'lobsters': profile.get('lobsters'),
'linkedin': profile.get('linkedin'),
'youtube': profile.get('youtube'),
'discord': profile.get('discord'),
'telegram': profile.get('telegram'),
'linked_profiles': profile.get('linked_profiles'),
# ALL discovered handles (comprehensive)
'handles': profile.get('handles', {}),
# activity counts (populated by platform scrapers)
'mastodon_statuses': profile.get('mastodon_statuses', 0),
'twitter_tweets': profile.get('twitter_tweets', 0),
'reddit_activity': profile.get('reddit_activity', 0),
'reddit_karma': profile.get('reddit_karma', 0),
'lobsters_karma': profile.get('lobsters_karma', 0),
'bluesky_posts': profile.get('bluesky_posts', 0),
},
'scraped_at': profile.get('scraped_at'),
}
# build reasons
if profile.get('signals'):
human_data['reasons'].append(f"signals: {', '.join(profile['signals'][:5])}")
if profile.get('aligned_topics'):
human_data['reasons'].append(f"topics: {', '.join(profile['aligned_topics'][:5])}")
if profile.get('linked_profiles'):
platforms = list(profile['linked_profiles'].keys())
human_data['reasons'].append(f"also on: {', '.join(platforms)}")
if profile.get('location'):
human_data['reasons'].append(f"location: {profile['location']}")
if profile.get('contact_method'):
human_data['reasons'].append(f"contact: {profile['contact_method']}")
db.save_human(human_data)
return human_data

323
connectd/scoutd/discord.py Normal file
View file

@ -0,0 +1,323 @@
"""
scoutd/discord.py - discord discovery
discord requires a bot token to read messages.
target servers: programming help, career transition, indie hackers, etc.
SETUP:
1. create discord app at discord.com/developers
2. add bot, get token
3. join target servers with bot
4. set DISCORD_BOT_TOKEN env var
"""
import requests
import json
import time
import os
from datetime import datetime
from pathlib import Path
from .signals import analyze_text
from .lost import (
analyze_social_for_lost_signals,
classify_user,
)
DISCORD_BOT_TOKEN = os.environ.get('DISCORD_BOT_TOKEN', '')
DISCORD_API = 'https://discord.com/api/v10'
# default server IDs - values-aligned communities
# bot must be invited to these servers to scout them
# invite links for reference (use numeric IDs below):
# - self-hosted: discord.gg/self-hosted
# - foss-dev: discord.gg/foss-developers-group
# - grapheneos: discord.gg/grapheneos
# - queer-coded: discord.me/queer-coded
# - homelab: discord.gg/homelab
# - esphome: discord.gg/n9sdw7pnsn
# - home-assistant: discord.gg/home-assistant
# - linuxserver: discord.gg/linuxserver
# - proxmox-scripts: discord.gg/jsYVk5JBxq
DEFAULT_SERVERS = [
# self-hosted / foss / privacy
'693469700109369394', # self-hosted (selfhosted.show)
'920089648842293248', # foss developers group
'1176414688112820234', # grapheneos
# queer tech
'925804557001437184', # queer coded
# home automation / homelab
# note: these are large servers, bot needs to be invited
# '330944238910963714', # home assistant (150k+ members)
# '429907082951524364', # esphome (35k members)
# '478094546522079232', # homelab (35k members)
# '354974912613449730', # linuxserver.io (41k members)
]
# merge env var servers with defaults
_env_servers = os.environ.get('DISCORD_TARGET_SERVERS', '').split(',')
_env_servers = [s.strip() for s in _env_servers if s.strip()]
TARGET_SERVERS = list(set(DEFAULT_SERVERS + _env_servers))
# channels to focus on (keywords in channel name)
TARGET_CHANNEL_KEYWORDS = [
'help', 'career', 'jobs', 'learning', 'beginner',
'general', 'introductions', 'showcase', 'projects',
]
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'discord'
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def get_headers():
"""get discord api headers"""
if not DISCORD_BOT_TOKEN:
return None
return {
'Authorization': f'Bot {DISCORD_BOT_TOKEN}',
'Content-Type': 'application/json',
}
def get_guild_channels(guild_id):
"""get channels in a guild"""
headers = get_headers()
if not headers:
return []
try:
resp = requests.get(
f'{DISCORD_API}/guilds/{guild_id}/channels',
headers=headers,
timeout=30
)
if resp.status_code == 200:
return resp.json()
return []
except Exception:
return []
def get_channel_messages(channel_id, limit=100):
"""get recent messages from a channel"""
headers = get_headers()
if not headers:
return []
try:
resp = requests.get(
f'{DISCORD_API}/channels/{channel_id}/messages',
headers=headers,
params={'limit': limit},
timeout=30
)
if resp.status_code == 200:
return resp.json()
return []
except Exception:
return []
def get_user_info(user_id):
"""get discord user info"""
headers = get_headers()
if not headers:
return None
try:
resp = requests.get(
f'{DISCORD_API}/users/{user_id}',
headers=headers,
timeout=30
)
if resp.status_code == 200:
return resp.json()
return None
except Exception:
return None
def analyze_discord_user(user_data, messages=None):
"""analyze a discord user for values alignment and lost signals"""
username = user_data.get('username', '')
display_name = user_data.get('global_name') or username
user_id = user_data.get('id')
# analyze messages
all_signals = []
all_text = []
total_score = 0
if messages:
for msg in messages[:20]:
content = msg.get('content', '')
if not content or len(content) < 20:
continue
all_text.append(content)
score, signals, _ = analyze_text(content)
all_signals.extend(signals)
total_score += score
all_signals = list(set(all_signals))
# lost builder detection
profile_for_lost = {
'bio': '',
'message_count': len(messages) if messages else 0,
}
posts_for_lost = [{'text': t} for t in all_text]
lost_signals, lost_weight = analyze_social_for_lost_signals(profile_for_lost, posts_for_lost)
lost_potential_score = lost_weight
user_type = classify_user(lost_potential_score, 50, total_score)
return {
'platform': 'discord',
'username': username,
'url': f"https://discord.com/users/{user_id}",
'name': display_name,
'bio': '',
'location': None,
'score': total_score,
'confidence': min(0.8, 0.2 + len(all_signals) * 0.1),
'signals': all_signals,
'negative_signals': [],
'reasons': [],
'contact': {'discord': f"{username}#{user_data.get('discriminator', '0')}"},
'extra': {
'user_id': user_id,
'message_count': len(messages) if messages else 0,
},
'lost_potential_score': lost_potential_score,
'lost_signals': lost_signals,
'user_type': user_type,
}
def scrape_discord(db, limit_per_channel=50):
"""scrape discord servers for aligned builders"""
if not DISCORD_BOT_TOKEN:
print("discord: DISCORD_BOT_TOKEN not set, skipping")
return 0
if not TARGET_SERVERS or TARGET_SERVERS == ['']:
print("discord: DISCORD_TARGET_SERVERS not set, skipping")
return 0
print("scouting discord...")
found = 0
lost_found = 0
seen_users = set()
for guild_id in TARGET_SERVERS:
if not guild_id:
continue
guild_id = guild_id.strip()
channels = get_guild_channels(guild_id)
if not channels:
print(f" guild {guild_id}: no access or no channels")
continue
# filter to relevant channels
target_channels = []
for ch in channels:
if ch.get('type') != 0: # text channels only
continue
name = ch.get('name', '').lower()
if any(kw in name for kw in TARGET_CHANNEL_KEYWORDS):
target_channels.append(ch)
print(f" guild {guild_id}: {len(target_channels)} relevant channels")
for channel in target_channels[:5]: # limit channels per server
messages = get_channel_messages(channel['id'], limit=limit_per_channel)
if not messages:
continue
# group messages by user
user_messages = {}
for msg in messages:
author = msg.get('author', {})
if author.get('bot'):
continue
user_id = author.get('id')
if not user_id or user_id in seen_users:
continue
if user_id not in user_messages:
user_messages[user_id] = {'user': author, 'messages': []}
user_messages[user_id]['messages'].append(msg)
# analyze each user
for user_id, data in user_messages.items():
if user_id in seen_users:
continue
seen_users.add(user_id)
result = analyze_discord_user(data['user'], data['messages'])
if not result:
continue
if result['score'] >= 20 or result.get('lost_potential_score', 0) >= 30:
db.save_human(result)
found += 1
if result.get('user_type') in ['lost', 'both']:
lost_found += 1
time.sleep(1) # rate limit between channels
time.sleep(2) # between guilds
print(f"discord: found {found} humans ({lost_found} lost builders)")
return found
def send_discord_dm(user_id, message, dry_run=False):
"""send a DM to a discord user"""
if not DISCORD_BOT_TOKEN:
return False, "DISCORD_BOT_TOKEN not set"
if dry_run:
print(f" [dry run] would DM discord user {user_id}")
return True, "dry run"
headers = get_headers()
try:
# create DM channel
dm_resp = requests.post(
f'{DISCORD_API}/users/@me/channels',
headers=headers,
json={'recipient_id': user_id},
timeout=30
)
if dm_resp.status_code not in [200, 201]:
return False, f"couldn't create DM channel: {dm_resp.status_code}"
channel_id = dm_resp.json().get('id')
# send message
msg_resp = requests.post(
f'{DISCORD_API}/channels/{channel_id}/messages',
headers=headers,
json={'content': message},
timeout=30
)
if msg_resp.status_code in [200, 201]:
return True, f"sent to {user_id}"
else:
return False, f"send failed: {msg_resp.status_code}"
except Exception as e:
return False, str(e)

330
connectd/scoutd/github.py Normal file
View file

@ -0,0 +1,330 @@
"""
scoutd/github.py - github discovery
scrapes repos, bios, commit patterns to find aligned builders
also detects lost builders - people with potential who haven't started yet
"""
import requests
import json
import time
import os
from datetime import datetime
from pathlib import Path
from collections import defaultdict
from .signals import analyze_text, TARGET_TOPICS, ECOSYSTEM_REPOS
from .lost import (
analyze_github_for_lost_signals,
analyze_text_for_lost_signals,
classify_user,
get_signal_descriptions,
)
from .handles import discover_all_handles
# rate limit: 60/hr unauthenticated, 5000/hr with token
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
HEADERS = {'Accept': 'application/vnd.github.v3+json'}
if GITHUB_TOKEN:
HEADERS['Authorization'] = f'token {GITHUB_TOKEN}'
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'github'
def _api_get(url, params=None):
"""rate-limited api request with caching"""
cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
# check cache (1 hour expiry)
if cache_file.exists():
try:
data = json.loads(cache_file.read_text())
if time.time() - data.get('_cached_at', 0) < 3600:
return data.get('_data')
except:
pass
# rate limit
time.sleep(0.5 if GITHUB_TOKEN else 2)
try:
resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
resp.raise_for_status()
result = resp.json()
# cache
cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
return result
except requests.exceptions.RequestException as e:
print(f" github api error: {e}")
return None
def search_repos_by_topic(topic, per_page=100):
"""search repos by topic tag"""
url = 'https://api.github.com/search/repositories'
params = {'q': f'topic:{topic}', 'sort': 'stars', 'order': 'desc', 'per_page': per_page}
data = _api_get(url, params)
return data.get('items', []) if data else []
def get_repo_contributors(repo_full_name, per_page=100):
"""get top contributors to a repo"""
url = f'https://api.github.com/repos/{repo_full_name}/contributors'
return _api_get(url, {'per_page': per_page}) or []
def get_github_user(login):
"""get full user profile"""
url = f'https://api.github.com/users/{login}'
return _api_get(url)
def get_user_repos(login, per_page=100):
"""get user's repos"""
url = f'https://api.github.com/users/{login}/repos'
return _api_get(url, {'per_page': per_page, 'sort': 'pushed'}) or []
def analyze_github_user(login):
"""
analyze a github user for values alignment
returns dict with score, confidence, signals, contact info
"""
user = get_github_user(login)
if not user:
return None
repos = get_user_repos(login)
# collect text corpus
text_parts = []
if user.get('bio'):
text_parts.append(user['bio'])
if user.get('company'):
text_parts.append(user['company'])
if user.get('location'):
text_parts.append(user['location'])
# analyze repos
all_topics = []
languages = defaultdict(int)
total_stars = 0
for repo in repos:
if repo.get('description'):
text_parts.append(repo['description'])
if repo.get('topics'):
all_topics.extend(repo['topics'])
if repo.get('language'):
languages[repo['language']] += 1
total_stars += repo.get('stargazers_count', 0)
full_text = ' '.join(text_parts)
# analyze signals
text_score, positive_signals, negative_signals = analyze_text(full_text)
# topic alignment
aligned_topics = set(all_topics) & set(TARGET_TOPICS)
topic_score = len(aligned_topics) * 10
# builder score (repos indicate building, not just talking)
builder_score = 0
if len(repos) > 20:
builder_score = 15
elif len(repos) > 10:
builder_score = 10
elif len(repos) > 5:
builder_score = 5
# hireable bonus
hireable_score = 5 if user.get('hireable') else 0
# total score
total_score = text_score + topic_score + builder_score + hireable_score
# === LOST BUILDER DETECTION ===
# build profile dict for lost analysis
profile_for_lost = {
'bio': user.get('bio'),
'repos': repos,
'public_repos': user.get('public_repos', len(repos)),
'followers': user.get('followers', 0),
'following': user.get('following', 0),
'extra': {
'top_repos': repos[:10],
},
}
# analyze for lost signals
lost_signals, lost_weight = analyze_github_for_lost_signals(profile_for_lost)
# also check text for lost language patterns
text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
for sig in text_lost_signals:
if sig not in lost_signals:
lost_signals.append(sig)
lost_weight += text_lost_weight
lost_potential_score = lost_weight
# classify: builder, lost, both, or none
user_type = classify_user(lost_potential_score, builder_score, total_score)
# confidence based on data richness
confidence = 0.3
if user.get('bio'):
confidence += 0.15
if len(repos) > 5:
confidence += 0.15
if len(text_parts) > 5:
confidence += 0.15
if user.get('email') or user.get('blog') or user.get('twitter_username'):
confidence += 0.15
if total_stars > 100:
confidence += 0.1
confidence = min(confidence, 1.0)
# build reasons
reasons = []
if positive_signals:
reasons.append(f"signals: {', '.join(positive_signals[:5])}")
if aligned_topics:
reasons.append(f"topics: {', '.join(list(aligned_topics)[:5])}")
if builder_score > 0:
reasons.append(f"builder ({len(repos)} repos)")
if negative_signals:
reasons.append(f"WARNING: {', '.join(negative_signals)}")
# add lost reasons if applicable
if user_type == 'lost' or user_type == 'both':
lost_descriptions = get_signal_descriptions(lost_signals)
if lost_descriptions:
reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
# === DEEP HANDLE DISCOVERY ===
# follow blog links, scrape websites, find ALL social handles
handles, discovered_emails = discover_all_handles(user)
# merge discovered emails with github email
all_emails = discovered_emails or []
if user.get('email'):
all_emails.append(user['email'])
all_emails = list(set(e for e in all_emails if e and 'noreply' not in e.lower()))
return {
'platform': 'github',
'username': login,
'url': f"https://github.com/{login}",
'name': user.get('name'),
'bio': user.get('bio'),
'location': user.get('location'),
'score': total_score,
'confidence': confidence,
'signals': positive_signals,
'negative_signals': negative_signals,
'topics': list(aligned_topics),
'languages': dict(languages),
'repo_count': len(repos),
'total_stars': total_stars,
'reasons': reasons,
'contact': {
'email': all_emails[0] if all_emails else None,
'emails': all_emails,
'blog': user.get('blog'),
'twitter': user.get('twitter_username') or handles.get('twitter'),
'mastodon': handles.get('mastodon'),
'bluesky': handles.get('bluesky'),
'matrix': handles.get('matrix'),
'lemmy': handles.get('lemmy'),
},
'extra': {
'topics': list(aligned_topics),
'languages': dict(languages),
'repo_count': len(repos),
'total_stars': total_stars,
'hireable': user.get('hireable', False),
'handles': handles, # all discovered handles
},
'hireable': user.get('hireable', False),
'scraped_at': datetime.now().isoformat(),
# lost builder fields
'lost_potential_score': lost_potential_score,
'lost_signals': lost_signals,
'user_type': user_type, # 'builder', 'lost', 'both', 'none'
}
def scrape_github(db, limit_per_source=50):
"""
full github scrape
returns list of analyzed users
"""
print("scoutd/github: starting scrape...")
all_logins = set()
# 1. ecosystem repo contributors
print(" scraping ecosystem repo contributors...")
for repo in ECOSYSTEM_REPOS:
contributors = get_repo_contributors(repo, per_page=limit_per_source)
for c in contributors:
login = c.get('login')
if login and not login.endswith('[bot]'):
all_logins.add(login)
print(f" {repo}: {len(contributors)} contributors")
# 2. topic repos
print(" scraping topic repos...")
for topic in TARGET_TOPICS[:10]:
repos = search_repos_by_topic(topic, per_page=30)
for repo in repos:
owner = repo.get('owner', {}).get('login')
if owner and not owner.endswith('[bot]'):
all_logins.add(owner)
print(f" #{topic}: {len(repos)} repos")
print(f" found {len(all_logins)} unique users to analyze")
# analyze each
results = []
builders_found = 0
lost_found = 0
for i, login in enumerate(all_logins):
if i % 20 == 0:
print(f" analyzing... {i}/{len(all_logins)}")
try:
result = analyze_github_user(login)
if result and result['score'] > 0:
results.append(result)
db.save_human(result)
user_type = result.get('user_type', 'none')
if user_type == 'builder':
builders_found += 1
if result['score'] >= 50:
print(f"{login}: {result['score']} pts, {result['confidence']:.0%} conf")
elif user_type == 'lost':
lost_found += 1
lost_score = result.get('lost_potential_score', 0)
if lost_score >= 40:
print(f" 💔 {login}: lost_score={lost_score}, values={result['score']} pts")
elif user_type == 'both':
builders_found += 1
lost_found += 1
print(f"{login}: recovering builder (lost={result.get('lost_potential_score', 0)}, active={result['score']})")
except Exception as e:
print(f" error on {login}: {e}")
print(f"scoutd/github: found {len(results)} aligned humans")
print(f" - {builders_found} active builders")
print(f" - {lost_found} lost builders (need encouragement)")
return results

507
connectd/scoutd/handles.py Normal file
View file

@ -0,0 +1,507 @@
"""
scoutd/handles.py - comprehensive social handle discovery
finds ALL social handles from:
- github bio/profile
- personal websites (rel="me", footers, contact pages, json-ld)
- README files
- linktree/bio.link/carrd pages
- any linked pages
stores structured handle data for activity-based contact selection
"""
import re
import json
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
HEADERS = {'User-Agent': 'Mozilla/5.0 (compatible; connectd/1.0)'}
# platform URL patterns -> (platform, handle_extractor)
PLATFORM_PATTERNS = {
# fediverse
'mastodon': [
(r'https?://([^/]+)/@([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
(r'https?://([^/]+)/users/([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
(r'https?://mastodon\.social/@([^/?#]+)', lambda m: f"@{m.group(1)}@mastodon.social"),
],
'pixelfed': [
(r'https?://pixelfed\.social/@([^/?#]+)', lambda m: f"@{m.group(1)}@pixelfed.social"),
(r'https?://([^/]*pixelfed[^/]*)/@([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
],
'lemmy': [
(r'https?://([^/]+)/u/([^/?#]+)', lambda m: f"@{m.group(2)}@{m.group(1)}"),
(r'https?://lemmy\.([^/]+)/u/([^/?#]+)', lambda m: f"@{m.group(2)}@lemmy.{m.group(1)}"),
],
# mainstream
'twitter': [
(r'https?://(?:www\.)?(?:twitter|x)\.com/([^/?#]+)', lambda m: f"@{m.group(1)}"),
],
'bluesky': [
(r'https?://bsky\.app/profile/([^/?#]+)', lambda m: m.group(1)),
(r'https?://([^.]+)\.bsky\.social', lambda m: f"{m.group(1)}.bsky.social"),
],
'threads': [
(r'https?://(?:www\.)?threads\.net/@([^/?#]+)', lambda m: f"@{m.group(1)}"),
],
'instagram': [
(r'https?://(?:www\.)?instagram\.com/([^/?#]+)', lambda m: f"@{m.group(1)}"),
],
'facebook': [
(r'https?://(?:www\.)?facebook\.com/([^/?#]+)', lambda m: m.group(1)),
],
'linkedin': [
(r'https?://(?:www\.)?linkedin\.com/in/([^/?#]+)', lambda m: m.group(1)),
(r'https?://(?:www\.)?linkedin\.com/company/([^/?#]+)', lambda m: f"company/{m.group(1)}"),
],
# dev platforms
'github': [
(r'https?://(?:www\.)?github\.com/([^/?#]+)', lambda m: m.group(1)),
],
'gitlab': [
(r'https?://(?:www\.)?gitlab\.com/([^/?#]+)', lambda m: m.group(1)),
],
'codeberg': [
(r'https?://codeberg\.org/([^/?#]+)', lambda m: m.group(1)),
],
'sourcehut': [
(r'https?://sr\.ht/~([^/?#]+)', lambda m: f"~{m.group(1)}"),
(r'https?://git\.sr\.ht/~([^/?#]+)', lambda m: f"~{m.group(1)}"),
],
# chat
'matrix': [
(r'https?://matrix\.to/#/(@[^:]+:[^/?#]+)', lambda m: m.group(1)),
],
'discord': [
(r'https?://discord\.gg/([^/?#]+)', lambda m: f"invite/{m.group(1)}"),
(r'https?://discord\.com/invite/([^/?#]+)', lambda m: f"invite/{m.group(1)}"),
],
'telegram': [
(r'https?://t\.me/([^/?#]+)', lambda m: f"@{m.group(1)}"),
],
# content
'youtube': [
(r'https?://(?:www\.)?youtube\.com/@([^/?#]+)', lambda m: f"@{m.group(1)}"),
(r'https?://(?:www\.)?youtube\.com/c(?:hannel)?/([^/?#]+)', lambda m: m.group(1)),
],
'twitch': [
(r'https?://(?:www\.)?twitch\.tv/([^/?#]+)', lambda m: m.group(1)),
],
'substack': [
(r'https?://([^.]+)\.substack\.com', lambda m: m.group(1)),
],
'medium': [
(r'https?://(?:www\.)?medium\.com/@([^/?#]+)', lambda m: f"@{m.group(1)}"),
(r'https?://([^.]+)\.medium\.com', lambda m: m.group(1)),
],
'devto': [
(r'https?://dev\.to/([^/?#]+)', lambda m: m.group(1)),
],
# funding
'kofi': [
(r'https?://ko-fi\.com/([^/?#]+)', lambda m: m.group(1)),
],
'patreon': [
(r'https?://(?:www\.)?patreon\.com/([^/?#]+)', lambda m: m.group(1)),
],
'liberapay': [
(r'https?://liberapay\.com/([^/?#]+)', lambda m: m.group(1)),
],
'github_sponsors': [
(r'https?://github\.com/sponsors/([^/?#]+)', lambda m: m.group(1)),
],
# link aggregators (we'll parse these specially)
'linktree': [
(r'https?://linktr\.ee/([^/?#]+)', lambda m: m.group(1)),
],
'biolink': [
(r'https?://bio\.link/([^/?#]+)', lambda m: m.group(1)),
],
'carrd': [
(r'https?://([^.]+)\.carrd\.co', lambda m: m.group(1)),
],
}
# fediverse handle pattern: @user@instance
FEDIVERSE_HANDLE_PATTERN = re.compile(r'@([\w.-]+)@([\w.-]+\.[\w]+)')
# email pattern
EMAIL_PATTERN = re.compile(r'\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b')
# known fediverse instances (for context-free handle detection)
KNOWN_FEDIVERSE_INSTANCES = [
'mastodon.social', 'mastodon.online', 'mstdn.social', 'mas.to',
'tech.lgbt', 'fosstodon.org', 'hackers.town', 'social.coop',
'kolektiva.social', 'solarpunk.moe', 'wandering.shop',
'elekk.xyz', 'cybre.space', 'octodon.social', 'chaos.social',
'infosec.exchange', 'ruby.social', 'phpc.social', 'toot.cafe',
'mstdn.io', 'pixelfed.social', 'lemmy.ml', 'lemmy.world',
'kbin.social', 'pleroma.site', 'akkoma.dev',
]
def extract_handle_from_url(url):
"""extract platform and handle from a URL"""
for platform, patterns in PLATFORM_PATTERNS.items():
for pattern, extractor in patterns:
match = re.match(pattern, url, re.I)
if match:
return platform, extractor(match)
return None, None
def extract_fediverse_handles(text):
"""find @user@instance.tld patterns in text"""
handles = []
for match in FEDIVERSE_HANDLE_PATTERN.finditer(text):
user, instance = match.groups()
handles.append(f"@{user}@{instance}")
return handles
def extract_emails(text):
"""find email addresses in text"""
emails = []
for match in EMAIL_PATTERN.finditer(text):
email = match.group(1)
# filter out common non-personal emails
if not any(x in email.lower() for x in ['noreply', 'no-reply', 'donotreply', 'example.com']):
emails.append(email)
return emails
def scrape_page(url, timeout=15):
"""fetch and parse a web page"""
try:
resp = requests.get(url, headers=HEADERS, timeout=timeout, allow_redirects=True)
resp.raise_for_status()
return BeautifulSoup(resp.text, 'html.parser'), resp.text
except Exception as e:
return None, None
def extract_rel_me_links(soup):
"""extract rel="me" links (used for verification)"""
links = []
if not soup:
return links
for a in soup.find_all('a', rel=lambda x: x and 'me' in x):
href = a.get('href')
if href:
links.append(href)
return links
def extract_social_links_from_page(soup, base_url=None):
"""extract all social links from a page"""
links = []
if not soup:
return links
# all links
for a in soup.find_all('a', href=True):
href = a['href']
if base_url and not href.startswith('http'):
href = urljoin(base_url, href)
# check if it's a known social platform
platform, handle = extract_handle_from_url(href)
if platform:
links.append({'platform': platform, 'handle': handle, 'url': href})
return links
def extract_json_ld(soup):
"""extract structured data from JSON-LD"""
data = {}
if not soup:
return data
for script in soup.find_all('script', type='application/ld+json'):
try:
ld = json.loads(script.string)
# look for sameAs links (social profiles)
if isinstance(ld, dict):
same_as = ld.get('sameAs', [])
if isinstance(same_as, str):
same_as = [same_as]
for url in same_as:
platform, handle = extract_handle_from_url(url)
if platform:
data[platform] = handle
except:
pass
return data
def scrape_linktree(url):
"""scrape a linktree/bio.link/carrd page for all links"""
handles = {}
soup, raw = scrape_page(url)
if not soup:
return handles
# linktree uses data attributes and JS, but links are often in the HTML
links = extract_social_links_from_page(soup, url)
for link in links:
if link['platform'] not in ['linktree', 'biolink', 'carrd']:
handles[link['platform']] = link['handle']
# also check for fediverse handles in text
if raw:
fedi_handles = extract_fediverse_handles(raw)
if fedi_handles:
handles['mastodon'] = fedi_handles[0]
return handles
def scrape_website_for_handles(url, follow_links=True):
"""
comprehensive website scrape for social handles
checks:
- rel="me" links
- social links in page
- json-ld structured data
- /about and /contact pages
- fediverse handles in text
- emails
"""
handles = {}
emails = []
soup, raw = scrape_page(url)
if not soup:
return handles, emails
# 1. rel="me" links (most authoritative)
rel_me = extract_rel_me_links(soup)
for link in rel_me:
platform, handle = extract_handle_from_url(link)
if platform and platform not in handles:
handles[platform] = handle
# 2. all social links on page
social_links = extract_social_links_from_page(soup, url)
for link in social_links:
if link['platform'] not in handles:
handles[link['platform']] = link['handle']
# 3. json-ld structured data
json_ld = extract_json_ld(soup)
for platform, handle in json_ld.items():
if platform not in handles:
handles[platform] = handle
# 4. fediverse handles in text
if raw:
fedi = extract_fediverse_handles(raw)
if fedi and 'mastodon' not in handles:
handles['mastodon'] = fedi[0]
# emails
emails = extract_emails(raw)
# 5. follow links to /about, /contact
if follow_links:
parsed = urlparse(url)
base = f"{parsed.scheme}://{parsed.netloc}"
for path in ['/about', '/contact', '/links', '/social']:
try:
sub_soup, sub_raw = scrape_page(base + path)
if sub_soup:
sub_links = extract_social_links_from_page(sub_soup, base)
for link in sub_links:
if link['platform'] not in handles:
handles[link['platform']] = link['handle']
if sub_raw:
fedi = extract_fediverse_handles(sub_raw)
if fedi and 'mastodon' not in handles:
handles['mastodon'] = fedi[0]
emails.extend(extract_emails(sub_raw))
except:
pass
# 6. check for linktree etc in links and follow them
for platform in ['linktree', 'biolink', 'carrd']:
if platform in handles:
# this is actually a link aggregator, scrape it
link_url = None
for link in social_links:
if link['platform'] == platform:
link_url = link['url']
break
if link_url:
aggregator_handles = scrape_linktree(link_url)
for p, h in aggregator_handles.items():
if p not in handles:
handles[p] = h
del handles[platform] # remove the aggregator itself
return handles, list(set(emails))
def extract_handles_from_text(text):
"""extract handles from plain text (bio, README, etc)"""
handles = {}
if not text:
return handles
# fediverse handles
fedi = extract_fediverse_handles(text)
if fedi:
handles['mastodon'] = fedi[0]
# URL patterns in text
url_pattern = re.compile(r'https?://[^\s<>"\']+')
for match in url_pattern.finditer(text):
url = match.group(0).rstrip('.,;:!?)')
platform, handle = extract_handle_from_url(url)
if platform and platform not in handles:
handles[platform] = handle
# twitter-style @mentions (only if looks like twitter context)
if 'twitter' in text.lower() or 'x.com' in text.lower():
twitter_pattern = re.compile(r'(?:^|[^\w])@(\w{1,15})(?:[^\w]|$)')
for match in twitter_pattern.finditer(text):
if 'twitter' not in handles:
handles['twitter'] = f"@{match.group(1)}"
# matrix handles
matrix_pattern = re.compile(r'@([\w.-]+):([\w.-]+)')
for match in matrix_pattern.finditer(text):
if 'matrix' not in handles:
handles['matrix'] = f"@{match.group(1)}:{match.group(2)}"
return handles
def scrape_github_readme(username):
"""scrape user's profile README (username/username repo)"""
handles = {}
emails = []
url = f"https://raw.githubusercontent.com/{username}/{username}/main/README.md"
try:
resp = requests.get(url, headers=HEADERS, timeout=10)
if resp.status_code == 200:
text = resp.text
# extract handles from text
handles = extract_handles_from_text(text)
# extract emails
emails = extract_emails(text)
return handles, emails
except:
pass
# try master branch
url = f"https://raw.githubusercontent.com/{username}/{username}/master/README.md"
try:
resp = requests.get(url, headers=HEADERS, timeout=10)
if resp.status_code == 200:
text = resp.text
handles = extract_handles_from_text(text)
emails = extract_emails(text)
except:
pass
return handles, emails
def discover_all_handles(github_profile):
"""
comprehensive handle discovery from a github profile dict
github_profile should contain:
- username
- bio
- blog (website URL)
- twitter_username
- etc.
"""
handles = {}
emails = []
username = github_profile.get('login') or github_profile.get('username')
print(f" discovering handles for {username}...")
# 1. github bio
bio = github_profile.get('bio', '')
if bio:
bio_handles = extract_handles_from_text(bio)
handles.update(bio_handles)
emails.extend(extract_emails(bio))
# 2. twitter from github profile
twitter = github_profile.get('twitter_username')
if twitter and 'twitter' not in handles:
handles['twitter'] = f"@{twitter}"
# 3. website from github profile
website = github_profile.get('blog')
if website:
if not website.startswith('http'):
website = f"https://{website}"
print(f" scraping website: {website}")
site_handles, site_emails = scrape_website_for_handles(website)
for p, h in site_handles.items():
if p not in handles:
handles[p] = h
emails.extend(site_emails)
# 4. profile README
if username:
print(f" checking profile README...")
readme_handles, readme_emails = scrape_github_readme(username)
for p, h in readme_handles.items():
if p not in handles:
handles[p] = h
emails.extend(readme_emails)
# 5. email from github profile
github_email = github_profile.get('email')
if github_email:
emails.append(github_email)
# dedupe emails
emails = list(set(e for e in emails if e and '@' in e and 'noreply' not in e.lower()))
print(f" found {len(handles)} handles, {len(emails)} emails")
return handles, emails
def merge_handles(existing, new):
"""merge new handles into existing, preferring more specific handles"""
for platform, handle in new.items():
if platform not in existing:
existing[platform] = handle
elif len(handle) > len(existing[platform]):
# prefer longer/more specific handles
existing[platform] = handle
return existing

322
connectd/scoutd/lemmy.py Normal file
View file

@ -0,0 +1,322 @@
"""
scoutd/lemmy.py - lemmy (fediverse reddit) discovery
lemmy is federated so we hit multiple instances.
great for finding lost builders in communities like:
- /c/programming, /c/technology, /c/linux
- /c/antiwork, /c/workreform (lost builders!)
- /c/selfhosted, /c/privacy, /c/opensource
supports authenticated access for private instances and DM delivery.
"""
import requests
import json
import time
import os
from datetime import datetime
from pathlib import Path
from .signals import analyze_text
from .lost import (
analyze_social_for_lost_signals,
analyze_text_for_lost_signals,
classify_user,
)
# auth config from environment
LEMMY_INSTANCE = os.environ.get('LEMMY_INSTANCE', '')
LEMMY_USERNAME = os.environ.get('LEMMY_USERNAME', '')
LEMMY_PASSWORD = os.environ.get('LEMMY_PASSWORD', '')
# auth token cache
_auth_token = None
# popular lemmy instances
LEMMY_INSTANCES = [
'lemmy.ml',
'lemmy.world',
'programming.dev',
'lemm.ee',
'sh.itjust.works',
]
# communities to scout (format: community@instance or just community for local)
TARGET_COMMUNITIES = [
# builder communities
'programming',
'selfhosted',
'linux',
'opensource',
'privacy',
'technology',
'webdev',
'rust',
'python',
'golang',
# lost builder communities (people struggling, stuck, seeking)
'antiwork',
'workreform',
'careerguidance',
'cscareerquestions',
'learnprogramming',
'findapath',
]
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'lemmy'
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def get_auth_token(instance=None):
"""get auth token for lemmy instance"""
global _auth_token
if _auth_token:
return _auth_token
instance = instance or LEMMY_INSTANCE
if not all([instance, LEMMY_USERNAME, LEMMY_PASSWORD]):
return None
try:
url = f"https://{instance}/api/v3/user/login"
resp = requests.post(url, json={
'username_or_email': LEMMY_USERNAME,
'password': LEMMY_PASSWORD,
}, timeout=30)
if resp.status_code == 200:
_auth_token = resp.json().get('jwt')
return _auth_token
return None
except Exception as e:
print(f"lemmy auth error: {e}")
return None
def send_lemmy_dm(recipient_username, message, dry_run=False):
"""send a private message via lemmy"""
if not LEMMY_INSTANCE:
return False, "LEMMY_INSTANCE not configured"
if dry_run:
print(f"[dry run] would send lemmy DM to {recipient_username}")
return True, None
token = get_auth_token()
if not token:
return False, "failed to authenticate with lemmy"
try:
# parse recipient - could be username@instance or just username
if '@' in recipient_username:
username, instance = recipient_username.split('@', 1)
else:
username = recipient_username
instance = LEMMY_INSTANCE
# get recipient user id
user_url = f"https://{LEMMY_INSTANCE}/api/v3/user"
resp = requests.get(user_url, params={'username': f"{username}@{instance}"}, timeout=30)
if resp.status_code != 200:
# try without instance suffix for local users
resp = requests.get(user_url, params={'username': username}, timeout=30)
if resp.status_code != 200:
return False, f"could not find user {recipient_username}"
recipient_id = resp.json().get('person_view', {}).get('person', {}).get('id')
if not recipient_id:
return False, "could not get recipient id"
# send DM
dm_url = f"https://{LEMMY_INSTANCE}/api/v3/private_message"
resp = requests.post(dm_url,
headers={'Authorization': f'Bearer {token}'},
json={
'content': message,
'recipient_id': recipient_id,
},
timeout=30
)
if resp.status_code == 200:
return True, None
else:
return False, f"lemmy DM error: {resp.status_code} - {resp.text}"
except Exception as e:
return False, f"lemmy DM error: {str(e)}"
def get_community_posts(instance, community, limit=50, sort='New'):
"""get posts from a lemmy community"""
try:
url = f"https://{instance}/api/v3/post/list"
params = {
'community_name': community,
'sort': sort,
'limit': limit,
}
resp = requests.get(url, params=params, timeout=30)
if resp.status_code == 200:
return resp.json().get('posts', [])
return []
except Exception as e:
return []
def get_user_profile(instance, username):
"""get lemmy user profile"""
try:
url = f"https://{instance}/api/v3/user"
params = {'username': username}
resp = requests.get(url, params=params, timeout=30)
if resp.status_code == 200:
return resp.json()
return None
except Exception:
return None
def analyze_lemmy_user(instance, username, posts=None):
"""analyze a lemmy user for values alignment and lost signals"""
profile = get_user_profile(instance, username)
if not profile:
return None
person = profile.get('person_view', {}).get('person', {})
counts = profile.get('person_view', {}).get('counts', {})
bio = person.get('bio', '') or ''
display_name = person.get('display_name') or person.get('name', username)
# analyze bio
bio_score, bio_signals, bio_reasons = analyze_text(bio)
# analyze posts if provided
post_signals = []
post_text = []
if posts:
for post in posts[:10]:
post_data = post.get('post', {})
title = post_data.get('name', '')
body = post_data.get('body', '')
post_text.append(f"{title} {body}")
_, signals, _ = analyze_text(f"{title} {body}")
post_signals.extend(signals)
all_signals = list(set(bio_signals + post_signals))
total_score = bio_score + len(post_signals) * 5
# lost builder detection
profile_for_lost = {
'bio': bio,
'post_count': counts.get('post_count', 0),
'comment_count': counts.get('comment_count', 0),
}
posts_for_lost = [{'text': t} for t in post_text]
lost_signals, lost_weight = analyze_social_for_lost_signals(profile_for_lost, posts_for_lost)
lost_potential_score = lost_weight
user_type = classify_user(lost_potential_score, 50, total_score)
return {
'platform': 'lemmy',
'username': f"{username}@{instance}",
'url': f"https://{instance}/u/{username}",
'name': display_name,
'bio': bio,
'location': None,
'score': total_score,
'confidence': min(0.9, 0.3 + len(all_signals) * 0.1),
'signals': all_signals,
'negative_signals': [],
'reasons': bio_reasons,
'contact': {},
'extra': {
'instance': instance,
'post_count': counts.get('post_count', 0),
'comment_count': counts.get('comment_count', 0),
},
'lost_potential_score': lost_potential_score,
'lost_signals': lost_signals,
'user_type': user_type,
}
def scrape_lemmy(db, limit_per_community=30):
"""scrape lemmy instances for aligned builders"""
print("scouting lemmy...")
found = 0
lost_found = 0
seen_users = set()
# build instance list - user's instance first if configured
instances = list(LEMMY_INSTANCES)
if LEMMY_INSTANCE and LEMMY_INSTANCE not in instances:
instances.insert(0, LEMMY_INSTANCE)
for instance in instances:
print(f" instance: {instance}")
for community in TARGET_COMMUNITIES:
posts = get_community_posts(instance, community, limit=limit_per_community)
if not posts:
continue
print(f" /c/{community}: {len(posts)} posts")
# group posts by user
user_posts = {}
for post in posts:
creator = post.get('creator', {})
username = creator.get('name')
if not username:
continue
user_key = f"{username}@{instance}"
if user_key in seen_users:
continue
if user_key not in user_posts:
user_posts[user_key] = []
user_posts[user_key].append(post)
# analyze each user
for user_key, posts in user_posts.items():
username = user_key.split('@')[0]
if user_key in seen_users:
continue
seen_users.add(user_key)
result = analyze_lemmy_user(instance, username, posts)
if not result:
continue
if result['score'] >= 20 or result.get('lost_potential_score', 0) >= 30:
db.save_human(result)
found += 1
if result.get('user_type') in ['lost', 'both']:
lost_found += 1
print(f" {result['username']}: {result['score']:.0f} (lost: {result['lost_potential_score']:.0f})")
elif result['score'] >= 40:
print(f" {result['username']}: {result['score']:.0f}")
time.sleep(0.5) # rate limit
time.sleep(1) # between communities
time.sleep(2) # between instances
print(f"lemmy: found {found} humans ({lost_found} lost builders)")
return found

169
connectd/scoutd/lobsters.py Normal file
View file

@ -0,0 +1,169 @@
"""
scoutd/lobsters.py - lobste.rs discovery
high-signal invite-only tech community
"""
import requests
import json
import time
from datetime import datetime
from pathlib import Path
from .signals import analyze_text
HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'lobsters'
ALIGNED_TAGS = ['privacy', 'security', 'distributed', 'rust', 'linux', 'culture', 'practices']
def _api_get(url, params=None):
"""rate-limited request"""
cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
if cache_file.exists():
try:
data = json.loads(cache_file.read_text())
if time.time() - data.get('_cached_at', 0) < 3600:
return data.get('_data')
except:
pass
time.sleep(2)
try:
resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
resp.raise_for_status()
result = resp.json()
cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
return result
except requests.exceptions.RequestException as e:
print(f" lobsters api error: {e}")
return None
def get_stories_by_tag(tag):
"""get recent stories by tag"""
url = f'https://lobste.rs/t/{tag}.json'
return _api_get(url) or []
def get_newest_stories():
"""get newest stories"""
return _api_get('https://lobste.rs/newest.json') or []
def get_user(username):
"""get user profile"""
return _api_get(f'https://lobste.rs/u/{username}.json')
def analyze_lobsters_user(username):
"""analyze a lobste.rs user"""
user = get_user(username)
if not user:
return None
text_parts = []
if user.get('about'):
text_parts.append(user['about'])
full_text = ' '.join(text_parts)
text_score, positive_signals, negative_signals = analyze_text(full_text)
# lobsters base bonus (invite-only, high signal)
base_score = 15
# karma bonus
karma = user.get('karma', 0)
karma_score = 0
if karma > 100:
karma_score = 10
elif karma > 50:
karma_score = 5
# github presence
github_score = 5 if user.get('github_username') else 0
# homepage
homepage_score = 5 if user.get('homepage') else 0
total_score = text_score + base_score + karma_score + github_score + homepage_score
# confidence
confidence = 0.4 # higher base for invite-only
if text_parts:
confidence += 0.2
if karma > 50:
confidence += 0.2
confidence = min(confidence, 0.9)
reasons = ['on lobste.rs (invite-only)']
if karma > 50:
reasons.append(f"active ({karma} karma)")
if positive_signals:
reasons.append(f"signals: {', '.join(positive_signals[:5])}")
if negative_signals:
reasons.append(f"WARNING: {', '.join(negative_signals)}")
return {
'platform': 'lobsters',
'username': username,
'url': f"https://lobste.rs/u/{username}",
'score': total_score,
'confidence': confidence,
'signals': positive_signals,
'negative_signals': negative_signals,
'karma': karma,
'reasons': reasons,
'contact': {
'github': user.get('github_username'),
'twitter': user.get('twitter_username'),
'homepage': user.get('homepage'),
},
'scraped_at': datetime.now().isoformat(),
}
def scrape_lobsters(db):
"""full lobste.rs scrape"""
print("scoutd/lobsters: starting scrape...")
all_users = set()
# stories by aligned tags
for tag in ALIGNED_TAGS:
print(f" tag: {tag}...")
stories = get_stories_by_tag(tag)
for story in stories:
submitter = story.get('submitter_user', {}).get('username')
if submitter:
all_users.add(submitter)
# newest stories
print(" newest stories...")
for story in get_newest_stories():
submitter = story.get('submitter_user', {}).get('username')
if submitter:
all_users.add(submitter)
print(f" {len(all_users)} unique users to analyze")
# analyze
results = []
for username in all_users:
try:
result = analyze_lobsters_user(username)
if result and result['score'] > 0:
results.append(result)
db.save_human(result)
if result['score'] >= 30:
print(f"{username}: {result['score']} pts")
except Exception as e:
print(f" error on {username}: {e}")
print(f"scoutd/lobsters: found {len(results)} aligned humans")
return results

491
connectd/scoutd/lost.py Normal file
View file

@ -0,0 +1,491 @@
"""
scoutd/lost.py - lost builder detection
finds people with potential who haven't found it yet, gave up, or are too beaten down to try.
these aren't failures. they're seeds that never got water.
detection signals:
- github: forked but never modified, starred many but built nothing, learning repos abandoned
- reddit/forums: "i wish i could...", stuck asking beginner questions for years, helping others but never sharing
- social: retoots builders but never posts own work, imposter syndrome language, isolation signals
- profiles: bio says what they WANT to be, "aspiring" for 2+ years, empty portfolios
the goal isn't to recruit them. it's to show them the door exists.
"""
import re
from datetime import datetime, timedelta
from collections import defaultdict
# signal definitions with weights
LOST_SIGNALS = {
# github signals
'forked_never_modified': {
'weight': 15,
'category': 'github',
'description': 'forked repos but never pushed changes',
},
'starred_many_built_nothing': {
'weight': 20,
'category': 'github',
'description': 'starred 50+ repos but has 0-2 own repos',
},
'account_no_repos': {
'weight': 10,
'category': 'github',
'description': 'account exists but no public repos',
},
'inactivity_bursts': {
'weight': 15,
'category': 'github',
'description': 'long gaps then brief activity bursts',
},
'only_issues_comments': {
'weight': 12,
'category': 'github',
'description': 'only activity is issues/comments on others work',
},
'abandoned_learning_repos': {
'weight': 18,
'category': 'github',
'description': 'learning/tutorial repos that were never finished',
},
'readme_only_repos': {
'weight': 10,
'category': 'github',
'description': 'repos with just README, no actual code',
},
# language signals (from posts/comments/bio)
'wish_i_could': {
'weight': 12,
'category': 'language',
'description': '"i wish i could..." language',
'patterns': [
r'i wish i could',
r'i wish i knew how',
r'wish i had the (time|energy|motivation|skills?)',
],
},
'someday_want': {
'weight': 10,
'category': 'language',
'description': '"someday i want to..." language',
'patterns': [
r'someday i (want|hope|plan) to',
r'one day i\'ll',
r'eventually i\'ll',
r'when i have time i\'ll',
],
},
'stuck_beginner': {
'weight': 20,
'category': 'language',
'description': 'asking beginner questions for years',
'patterns': [
r'still (trying|learning|struggling) (to|with)',
r'can\'t seem to (get|understand|figure)',
r'been trying for (months|years)',
],
},
'self_deprecating': {
'weight': 15,
'category': 'language',
'description': 'self-deprecating about abilities',
'patterns': [
r'i\'m (not smart|too dumb|not good) enough',
r'i (suck|am terrible) at',
r'i\'ll never be able to',
r'people like me (can\'t|don\'t)',
r'i\'m just not (a|the) (type|kind)',
],
},
'no_energy': {
'weight': 18,
'category': 'language',
'description': '"how do people have energy" posts',
'patterns': [
r'how do (people|you|they) have (the )?(energy|time|motivation)',
r'where do (people|you|they) find (the )?(energy|motivation)',
r'i\'m (always|constantly) (tired|exhausted|drained)',
r'no (energy|motivation) (left|anymore)',
],
},
'imposter_syndrome': {
'weight': 15,
'category': 'language',
'description': 'imposter syndrome language',
'patterns': [
r'imposter syndrome',
r'feel like (a |an )?(fraud|fake|imposter)',
r'don\'t (belong|deserve)',
r'everyone else (seems|is) (so much )?(better|smarter)',
r'they\'ll (find out|realize) i\'m',
],
},
'should_really': {
'weight': 8,
'category': 'language',
'description': '"i should really..." posts',
'patterns': [
r'i (should|need to) really',
r'i keep (meaning|wanting) to',
r'i\'ve been (meaning|wanting) to',
],
},
'isolation_signals': {
'weight': 20,
'category': 'language',
'description': 'isolation/loneliness language',
'patterns': [
r'no one (understands|gets it|to talk to)',
r'(feel|feeling) (so )?(alone|isolated|lonely)',
r'don\'t have anyone (to|who)',
r'wish i (had|knew) (someone|people)',
],
},
'enthusiasm_for_others': {
'weight': 10,
'category': 'behavior',
'description': 'celebrates others but dismissive of self',
},
# subreddit/community signals
'stuck_communities': {
'weight': 15,
'category': 'community',
'description': 'active in stuck/struggling communities',
'subreddits': [
'learnprogramming',
'findapath',
'getdisciplined',
'getmotivated',
'decidingtobebetter',
'selfimprovement',
'adhd',
'depression',
'anxiety',
],
},
# profile signals
'aspirational_bio': {
'weight': 12,
'category': 'profile',
'description': 'bio says what they WANT to be',
'patterns': [
r'aspiring',
r'future',
r'want(ing)? to (be|become)',
r'learning to',
r'trying to (become|be|learn)',
r'hoping to',
],
},
'empty_portfolio': {
'weight': 15,
'category': 'profile',
'description': 'links to empty portfolio sites',
},
'long_aspiring': {
'weight': 20,
'category': 'profile',
'description': '"aspiring" in bio for 2+ years',
},
}
# subreddits that indicate someone might be stuck
STUCK_SUBREDDITS = {
'learnprogramming': 8,
'findapath': 15,
'getdisciplined': 12,
'getmotivated': 10,
'decidingtobebetter': 12,
'selfimprovement': 8,
'adhd': 10,
'depression': 15,
'anxiety': 12,
'socialanxiety': 12,
'neet': 20,
'lostgeneration': 15,
'antiwork': 5, # could be aligned OR stuck
'careerguidance': 8,
'cscareerquestions': 5,
}
def analyze_text_for_lost_signals(text):
"""analyze text for lost builder language patterns"""
if not text:
return [], 0
text_lower = text.lower()
signals_found = []
total_weight = 0
for signal_name, signal_data in LOST_SIGNALS.items():
if 'patterns' not in signal_data:
continue
for pattern in signal_data['patterns']:
if re.search(pattern, text_lower):
signals_found.append(signal_name)
total_weight += signal_data['weight']
break # only count each signal once
return signals_found, total_weight
def analyze_github_for_lost_signals(profile):
"""analyze github profile for lost builder signals"""
signals_found = []
total_weight = 0
if not profile:
return signals_found, total_weight
repos = profile.get('repos', []) or profile.get('top_repos', [])
extra = profile.get('extra', {})
public_repos = profile.get('public_repos', len(repos))
followers = profile.get('followers', 0)
following = profile.get('following', 0)
# starred many but built nothing
# (we'd need to fetch starred count separately, approximate with following ratio)
if public_repos <= 2 and following > 50:
signals_found.append('starred_many_built_nothing')
total_weight += LOST_SIGNALS['starred_many_built_nothing']['weight']
# account but no repos
if public_repos == 0:
signals_found.append('account_no_repos')
total_weight += LOST_SIGNALS['account_no_repos']['weight']
# check repos for signals
forked_count = 0
forked_modified = 0
learning_repos = 0
readme_only = 0
learning_keywords = ['learning', 'tutorial', 'course', 'practice', 'exercise',
'bootcamp', 'udemy', 'freecodecamp', 'odin', 'codecademy']
for repo in repos:
name = (repo.get('name') or '').lower()
description = (repo.get('description') or '').lower()
language = repo.get('language')
is_fork = repo.get('fork', False)
# forked but never modified
if is_fork:
forked_count += 1
# if pushed_at is close to created_at, never modified
# (simplified: just count forks for now)
# learning/tutorial repos
if any(kw in name or kw in description for kw in learning_keywords):
learning_repos += 1
# readme only (no language detected usually means no code)
if not language and not is_fork:
readme_only += 1
if forked_count >= 5 and public_repos - forked_count <= 2:
signals_found.append('forked_never_modified')
total_weight += LOST_SIGNALS['forked_never_modified']['weight']
if learning_repos >= 3:
signals_found.append('abandoned_learning_repos')
total_weight += LOST_SIGNALS['abandoned_learning_repos']['weight']
if readme_only >= 2:
signals_found.append('readme_only_repos')
total_weight += LOST_SIGNALS['readme_only_repos']['weight']
# check bio for lost signals
bio = profile.get('bio') or ''
bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
signals_found.extend(bio_signals)
total_weight += bio_weight
# aspirational bio check
bio_lower = bio.lower()
if any(re.search(p, bio_lower) for p in LOST_SIGNALS['aspirational_bio']['patterns']):
if 'aspirational_bio' not in signals_found:
signals_found.append('aspirational_bio')
total_weight += LOST_SIGNALS['aspirational_bio']['weight']
return signals_found, total_weight
def analyze_reddit_for_lost_signals(activity, subreddits):
"""analyze reddit activity for lost builder signals"""
signals_found = []
total_weight = 0
# check subreddit activity
stuck_sub_activity = 0
for sub in subreddits:
if sub.lower() in STUCK_SUBREDDITS:
stuck_sub_activity += STUCK_SUBREDDITS[sub.lower()]
if stuck_sub_activity >= 20:
signals_found.append('stuck_communities')
total_weight += min(stuck_sub_activity, 30) # cap at 30
# analyze post/comment text
all_text = []
for item in activity:
if item.get('title'):
all_text.append(item['title'])
if item.get('body'):
all_text.append(item['body'])
combined_text = ' '.join(all_text)
text_signals, text_weight = analyze_text_for_lost_signals(combined_text)
signals_found.extend(text_signals)
total_weight += text_weight
# check for helping others but never sharing own work
help_count = 0
share_count = 0
for item in activity:
body = (item.get('body') or '').lower()
title = (item.get('title') or '').lower()
# helping patterns
if any(p in body for p in ['try this', 'you could', 'have you tried', 'i recommend']):
help_count += 1
# sharing patterns
if any(p in body + title for p in ['i built', 'i made', 'my project', 'check out my', 'i created']):
share_count += 1
if help_count >= 5 and share_count == 0:
signals_found.append('enthusiasm_for_others')
total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
return signals_found, total_weight
def analyze_social_for_lost_signals(profile, posts):
"""analyze mastodon/social for lost builder signals"""
signals_found = []
total_weight = 0
# check bio
bio = profile.get('bio') or profile.get('note') or ''
bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
signals_found.extend(bio_signals)
total_weight += bio_weight
# check posts
boost_count = 0
original_count = 0
own_work_count = 0
for post in posts:
content = (post.get('content') or '').lower()
is_boost = post.get('reblog') is not None or post.get('repost')
if is_boost:
boost_count += 1
else:
original_count += 1
# check if sharing own work
if any(p in content for p in ['i built', 'i made', 'my project', 'working on', 'just shipped']):
own_work_count += 1
# analyze text
text_signals, text_weight = analyze_text_for_lost_signals(content)
for sig in text_signals:
if sig not in signals_found:
signals_found.append(sig)
total_weight += LOST_SIGNALS[sig]['weight']
# boosts builders but never posts own work
if boost_count >= 10 and own_work_count == 0:
signals_found.append('enthusiasm_for_others')
total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
return signals_found, total_weight
def calculate_lost_potential_score(signals_found):
"""calculate overall lost potential score from signals"""
total = 0
for signal in signals_found:
if signal in LOST_SIGNALS:
total += LOST_SIGNALS[signal]['weight']
return total
def classify_user(lost_score, builder_score, values_score):
"""
classify user as builder, lost, or neither
returns: 'builder' | 'lost' | 'both' | 'none'
"""
# high builder score = active builder
if builder_score >= 50 and lost_score < 30:
return 'builder'
# high lost score + values alignment = lost builder (priority outreach)
if lost_score >= 40 and values_score >= 20:
return 'lost'
# both signals = complex case, might be recovering
if lost_score >= 30 and builder_score >= 30:
return 'both'
return 'none'
def get_signal_descriptions(signals_found):
"""get human-readable descriptions of detected signals"""
descriptions = []
for signal in signals_found:
if signal in LOST_SIGNALS:
descriptions.append(LOST_SIGNALS[signal]['description'])
return descriptions
def should_outreach_lost(user_data, config=None):
"""
determine if we should reach out to a lost builder
considers:
- lost_potential_score threshold
- values alignment
- cooldown period
- manual review requirement
"""
config = config or {}
lost_score = user_data.get('lost_potential_score', 0)
values_score = user_data.get('score', 0) # regular alignment score
# minimum thresholds
min_lost = config.get('min_lost_score', 40)
min_values = config.get('min_values_score', 20)
if lost_score < min_lost:
return False, 'lost_score too low'
if values_score < min_values:
return False, 'values_score too low'
# check cooldown
last_outreach = user_data.get('last_lost_outreach')
if last_outreach:
cooldown_days = config.get('cooldown_days', 90)
last_dt = datetime.fromisoformat(last_outreach)
if datetime.now() - last_dt < timedelta(days=cooldown_days):
return False, f'cooldown active (90 days)'
# always require manual review for lost outreach
return True, 'requires_review'

290
connectd/scoutd/mastodon.py Normal file
View file

@ -0,0 +1,290 @@
"""
scoutd/mastodon.py - fediverse discovery
scrapes high-signal instances: tech.lgbt, social.coop, fosstodon, hackers.town
also detects lost builders - social isolation, imposter syndrome, struggling folks
"""
import requests
import json
import time
import re
from datetime import datetime
from pathlib import Path
from .signals import analyze_text, ALIGNED_INSTANCES
from .lost import (
analyze_social_for_lost_signals,
analyze_text_for_lost_signals,
classify_user,
get_signal_descriptions,
)
HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'mastodon'
TARGET_HASHTAGS = [
'selfhosted', 'homelab', 'homeassistant', 'foss', 'opensource',
'privacy', 'solarpunk', 'cooperative', 'cohousing', 'mutualaid',
'intentionalcommunity', 'degoogle', 'fediverse', 'indieweb',
]
def _api_get(url, params=None):
"""rate-limited request"""
cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
if cache_file.exists():
try:
data = json.loads(cache_file.read_text())
if time.time() - data.get('_cached_at', 0) < 3600:
return data.get('_data')
except:
pass
time.sleep(1)
try:
resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
resp.raise_for_status()
result = resp.json()
cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
return result
except requests.exceptions.RequestException as e:
print(f" mastodon api error: {e}")
return None
def strip_html(text):
"""strip html tags"""
return re.sub(r'<[^>]+>', ' ', text) if text else ''
def get_instance_directory(instance, limit=40):
"""get users from instance directory"""
url = f'https://{instance}/api/v1/directory'
return _api_get(url, {'limit': limit, 'local': 'true'}) or []
def get_hashtag_timeline(instance, hashtag, limit=40):
"""get posts from hashtag"""
url = f'https://{instance}/api/v1/timelines/tag/{hashtag}'
return _api_get(url, {'limit': limit}) or []
def get_user_statuses(instance, user_id, limit=30):
"""get user's recent posts"""
url = f'https://{instance}/api/v1/accounts/{user_id}/statuses'
return _api_get(url, {'limit': limit, 'exclude_reblogs': 'true'}) or []
def analyze_mastodon_user(account, instance):
"""analyze a mastodon account"""
acct = account.get('acct', '')
if '@' not in acct:
acct = f"{acct}@{instance}"
# collect text
text_parts = []
bio = strip_html(account.get('note', ''))
if bio:
text_parts.append(bio)
display_name = account.get('display_name', '')
if display_name:
text_parts.append(display_name)
# profile fields
for field in account.get('fields', []):
if field.get('name'):
text_parts.append(field['name'])
if field.get('value'):
text_parts.append(strip_html(field['value']))
# get recent posts
user_id = account.get('id')
if user_id:
statuses = get_user_statuses(instance, user_id)
for status in statuses:
content = strip_html(status.get('content', ''))
if content:
text_parts.append(content)
full_text = ' '.join(text_parts)
text_score, positive_signals, negative_signals = analyze_text(full_text)
# instance bonus
instance_bonus = ALIGNED_INSTANCES.get(instance, 0)
total_score = text_score + instance_bonus
# pronouns bonus
if re.search(r'\b(they/them|she/her|he/him|xe/xem)\b', full_text, re.I):
total_score += 10
positive_signals.append('pronouns')
# activity level
statuses_count = account.get('statuses_count', 0)
followers = account.get('followers_count', 0)
if statuses_count > 100:
total_score += 5
# === LOST BUILDER DETECTION ===
# build profile and posts for lost analysis
profile_for_lost = {
'bio': bio,
'note': account.get('note'),
}
# convert statuses to posts format for analyze_social_for_lost_signals
posts_for_lost = []
if user_id:
statuses = get_user_statuses(instance, user_id)
for status in statuses:
posts_for_lost.append({
'content': strip_html(status.get('content', '')),
'reblog': status.get('reblog'),
})
# analyze for lost signals
lost_signals, lost_weight = analyze_social_for_lost_signals(profile_for_lost, posts_for_lost)
# also check combined text for lost patterns
text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
for sig in text_lost_signals:
if sig not in lost_signals:
lost_signals.append(sig)
lost_weight += text_lost_weight
lost_potential_score = lost_weight
# classify: builder, lost, both, or none
# for mastodon, we use statuses_count as a proxy for builder activity
builder_activity = 10 if statuses_count > 100 else 5 if statuses_count > 50 else 0
user_type = classify_user(lost_potential_score, builder_activity, total_score)
# confidence
confidence = 0.3
if len(text_parts) > 5:
confidence += 0.2
if statuses_count > 50:
confidence += 0.2
if len(positive_signals) > 3:
confidence += 0.2
confidence = min(confidence, 0.9)
reasons = []
if instance in ALIGNED_INSTANCES:
reasons.append(f"on {instance}")
if positive_signals:
reasons.append(f"signals: {', '.join(positive_signals[:5])}")
if negative_signals:
reasons.append(f"WARNING: {', '.join(negative_signals)}")
# add lost reasons if applicable
if user_type == 'lost' or user_type == 'both':
lost_descriptions = get_signal_descriptions(lost_signals)
if lost_descriptions:
reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
return {
'platform': 'mastodon',
'username': acct,
'url': account.get('url'),
'name': display_name,
'bio': bio,
'instance': instance,
'score': total_score,
'confidence': confidence,
'signals': positive_signals,
'negative_signals': negative_signals,
'statuses_count': statuses_count,
'followers': followers,
'reasons': reasons,
'scraped_at': datetime.now().isoformat(),
# lost builder fields
'lost_potential_score': lost_potential_score,
'lost_signals': lost_signals,
'user_type': user_type,
}
def scrape_mastodon(db, limit_per_instance=40):
"""full mastodon scrape"""
print("scoutd/mastodon: starting scrape...")
all_accounts = []
# 1. instance directories
print(" scraping instance directories...")
for instance in ALIGNED_INSTANCES:
accounts = get_instance_directory(instance, limit=limit_per_instance)
for acct in accounts:
acct['_instance'] = instance
all_accounts.append(acct)
print(f" {instance}: {len(accounts)} users")
# 2. hashtag timelines
print(" scraping hashtags...")
seen = set()
for tag in TARGET_HASHTAGS[:8]:
for instance in ['fosstodon.org', 'tech.lgbt', 'social.coop']:
posts = get_hashtag_timeline(instance, tag, limit=20)
for post in posts:
account = post.get('account', {})
acct = account.get('acct', '')
if '@' not in acct:
acct = f"{acct}@{instance}"
if acct not in seen:
seen.add(acct)
account['_instance'] = instance
all_accounts.append(account)
# dedupe
unique = {}
for acct in all_accounts:
key = acct.get('acct', acct.get('id', ''))
if key not in unique:
unique[key] = acct
print(f" {len(unique)} unique accounts to analyze")
# analyze
results = []
builders_found = 0
lost_found = 0
for acct_data in unique.values():
instance = acct_data.get('_instance', 'mastodon.social')
try:
result = analyze_mastodon_user(acct_data, instance)
if result and result['score'] > 0:
results.append(result)
db.save_human(result)
user_type = result.get('user_type', 'none')
if user_type == 'builder':
builders_found += 1
if result['score'] >= 40:
print(f" ★ @{result['username']}: {result['score']} pts")
elif user_type == 'lost':
lost_found += 1
lost_score = result.get('lost_potential_score', 0)
if lost_score >= 40:
print(f" 💔 @{result['username']}: lost_score={lost_score}, values={result['score']} pts")
elif user_type == 'both':
builders_found += 1
lost_found += 1
print(f" ⚡ @{result['username']}: recovering builder")
except Exception as e:
print(f" error: {e}")
print(f"scoutd/mastodon: found {len(results)} aligned humans")
print(f" - {builders_found} active builders")
print(f" - {lost_found} lost builders (need encouragement)")
return results

196
connectd/scoutd/matrix.py Normal file
View file

@ -0,0 +1,196 @@
"""
scoutd/matrix.py - matrix room membership discovery
finds users in multiple aligned public rooms
"""
import requests
import json
import time
from datetime import datetime
from pathlib import Path
from collections import defaultdict
from .signals import analyze_text
HEADERS = {'User-Agent': 'connectd/1.0', 'Accept': 'application/json'}
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'matrix'
# public matrix rooms to check membership
ALIGNED_ROOMS = [
'#homeassistant:matrix.org',
'#esphome:matrix.org',
'#selfhosted:matrix.org',
'#privacy:matrix.org',
'#solarpunk:matrix.org',
'#cooperative:matrix.org',
'#foss:matrix.org',
'#linux:matrix.org',
]
# homeservers to query
HOMESERVERS = [
'matrix.org',
'matrix.envs.net',
'tchncs.de',
]
def _api_get(url, params=None):
"""rate-limited request"""
cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
if cache_file.exists():
try:
data = json.loads(cache_file.read_text())
if time.time() - data.get('_cached_at', 0) < 3600:
return data.get('_data')
except:
pass
time.sleep(1)
try:
resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
resp.raise_for_status()
result = resp.json()
cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
return result
except requests.exceptions.RequestException as e:
# matrix apis often fail, don't spam errors
return None
def get_room_members(homeserver, room_alias):
"""
get members of a public room
note: most matrix servers don't expose this publicly
this is a best-effort scrape
"""
# resolve room alias to id first
try:
alias_url = f'https://{homeserver}/_matrix/client/r0/directory/room/{room_alias}'
alias_data = _api_get(alias_url)
if not alias_data or 'room_id' not in alias_data:
return []
room_id = alias_data['room_id']
# try to get members (usually requires auth)
members_url = f'https://{homeserver}/_matrix/client/r0/rooms/{room_id}/members'
members_data = _api_get(members_url)
if members_data and 'chunk' in members_data:
members = []
for event in members_data['chunk']:
if event.get('type') == 'm.room.member' and event.get('content', {}).get('membership') == 'join':
user_id = event.get('state_key')
display_name = event.get('content', {}).get('displayname')
if user_id:
members.append({'user_id': user_id, 'display_name': display_name})
return members
except:
pass
return []
def get_public_rooms(homeserver, limit=100):
"""get public rooms directory"""
url = f'https://{homeserver}/_matrix/client/r0/publicRooms'
data = _api_get(url, {'limit': limit})
return data.get('chunk', []) if data else []
def analyze_matrix_user(user_id, rooms_joined, display_name=None):
"""analyze a matrix user based on room membership"""
# score based on room membership overlap
room_score = len(rooms_joined) * 10
# multi-room bonus
if len(rooms_joined) >= 4:
room_score += 20
elif len(rooms_joined) >= 2:
room_score += 10
# analyze display name if available
text_score = 0
signals = []
if display_name:
text_score, signals, _ = analyze_text(display_name)
total_score = room_score + text_score
confidence = 0.3
if len(rooms_joined) >= 3:
confidence += 0.3
if display_name:
confidence += 0.1
confidence = min(confidence, 0.8)
reasons = [f"in {len(rooms_joined)} aligned rooms: {', '.join(rooms_joined[:3])}"]
if signals:
reasons.append(f"signals: {', '.join(signals[:3])}")
return {
'platform': 'matrix',
'username': user_id,
'url': f"https://matrix.to/#/{user_id}",
'name': display_name,
'score': total_score,
'confidence': confidence,
'signals': signals,
'rooms': rooms_joined,
'reasons': reasons,
'scraped_at': datetime.now().isoformat(),
}
def scrape_matrix(db):
"""
matrix scrape - limited due to auth requirements
best effort on public room data
"""
print("scoutd/matrix: starting scrape (limited - most apis require auth)...")
user_rooms = defaultdict(list)
# try to get public room directories
for homeserver in HOMESERVERS:
print(f" checking {homeserver} public rooms...")
rooms = get_public_rooms(homeserver, limit=50)
for room in rooms:
room_alias = room.get('canonical_alias', '')
# check if it matches any aligned room patterns
aligned_keywords = ['homeassistant', 'selfhosted', 'privacy', 'linux', 'foss', 'cooperative']
if any(kw in room_alias.lower() or kw in room.get('name', '').lower() for kw in aligned_keywords):
print(f" found aligned room: {room_alias or room.get('name')}")
# try to get members from aligned rooms (usually fails without auth)
for room_alias in ALIGNED_ROOMS[:3]: # limit attempts
for homeserver in HOMESERVERS[:1]: # just try matrix.org
members = get_room_members(homeserver, room_alias)
if members:
print(f" {room_alias}: {len(members)} members")
for member in members:
user_rooms[member['user_id']].append(room_alias)
# filter for multi-room users
multi_room = {u: rooms for u, rooms in user_rooms.items() if len(rooms) >= 2}
print(f" {len(multi_room)} users in 2+ aligned rooms")
# analyze
results = []
for user_id, rooms in multi_room.items():
try:
result = analyze_matrix_user(user_id, rooms)
if result and result['score'] > 0:
results.append(result)
db.save_human(result)
except Exception as e:
print(f" error: {e}")
print(f"scoutd/matrix: found {len(results)} aligned humans (limited by auth)")
return results

503
connectd/scoutd/reddit.py Normal file
View file

@ -0,0 +1,503 @@
"""
scoutd/reddit.py - reddit discovery (DISCOVERY ONLY, NOT OUTREACH)
reddit is a SIGNAL SOURCE, not a contact channel.
flow:
1. scrape reddit for users active in target subs
2. extract their reddit profile
3. look for links TO other platforms (github, mastodon, website, etc.)
4. add to scout database with reddit as signal source
5. reach out via their OTHER platforms, never reddit
if reddit user has no external links:
- add to manual_queue with note "reddit-only, needs manual review"
also detects lost builders - stuck in learnprogramming for years, imposter syndrome, etc.
"""
import requests
import json
import time
import re
from datetime import datetime
from pathlib import Path
from collections import defaultdict
from .signals import analyze_text, ALIGNED_SUBREDDITS, NEGATIVE_SUBREDDITS
from .lost import (
analyze_reddit_for_lost_signals,
analyze_text_for_lost_signals,
classify_user,
get_signal_descriptions,
STUCK_SUBREDDITS,
)
HEADERS = {'User-Agent': 'connectd:v1.0 (community discovery)'}
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'reddit'
# patterns for extracting external platform links
PLATFORM_PATTERNS = {
'github': [
r'github\.com/([a-zA-Z0-9_-]+)',
r'gh:\s*@?([a-zA-Z0-9_-]+)',
],
'mastodon': [
r'@([a-zA-Z0-9_]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
r'mastodon\.social/@([a-zA-Z0-9_]+)',
r'fosstodon\.org/@([a-zA-Z0-9_]+)',
r'hachyderm\.io/@([a-zA-Z0-9_]+)',
r'tech\.lgbt/@([a-zA-Z0-9_]+)',
],
'twitter': [
r'twitter\.com/([a-zA-Z0-9_]+)',
r'x\.com/([a-zA-Z0-9_]+)',
r'(?:^|\s)@([a-zA-Z0-9_]{1,15})(?:\s|$)', # bare @handle
],
'bluesky': [
r'bsky\.app/profile/([a-zA-Z0-9_.-]+)',
r'([a-zA-Z0-9_-]+)\.bsky\.social',
],
'website': [
r'https?://([a-zA-Z0-9_-]+\.[a-zA-Z]{2,}[a-zA-Z0-9./_-]*)',
],
'matrix': [
r'@([a-zA-Z0-9_-]+):([a-zA-Z0-9.-]+)',
],
}
def _api_get(url, params=None):
"""rate-limited request"""
cache_key = f"{url}_{json.dumps(params or {}, sort_keys=True)}"
cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
if cache_file.exists():
try:
data = json.loads(cache_file.read_text())
if time.time() - data.get('_cached_at', 0) < 3600:
return data.get('_data')
except:
pass
time.sleep(2) # reddit rate limit
try:
resp = requests.get(url, headers=HEADERS, params=params, timeout=30)
resp.raise_for_status()
result = resp.json()
cache_file.write_text(json.dumps({'_cached_at': time.time(), '_data': result}))
return result
except requests.exceptions.RequestException as e:
print(f" reddit api error: {e}")
return None
def extract_external_links(text):
"""extract links to other platforms from text"""
links = {}
if not text:
return links
for platform, patterns in PLATFORM_PATTERNS.items():
for pattern in patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
if matches:
if platform == 'mastodon' and isinstance(matches[0], tuple):
# full fediverse handle
links[platform] = f"@{matches[0][0]}@{matches[0][1]}"
elif platform == 'matrix' and isinstance(matches[0], tuple):
links[platform] = f"@{matches[0][0]}:{matches[0][1]}"
elif platform == 'website':
# skip reddit/imgur/etc
for match in matches:
if not any(x in match.lower() for x in ['reddit', 'imgur', 'redd.it', 'i.redd']):
links[platform] = f"https://{match}"
break
else:
links[platform] = matches[0]
break
return links
def get_user_profile(username):
"""get user profile including bio/description"""
url = f'https://www.reddit.com/user/{username}/about.json'
data = _api_get(url)
if not data or 'data' not in data:
return None
profile = data['data']
return {
'username': username,
'name': profile.get('name'),
'bio': profile.get('subreddit', {}).get('public_description', ''),
'title': profile.get('subreddit', {}).get('title', ''),
'icon': profile.get('icon_img'),
'created_utc': profile.get('created_utc'),
'total_karma': profile.get('total_karma', 0),
'link_karma': profile.get('link_karma', 0),
'comment_karma': profile.get('comment_karma', 0),
}
def get_subreddit_users(subreddit, limit=100):
"""get recent posters/commenters from a subreddit"""
users = set()
# posts
url = f'https://www.reddit.com/r/{subreddit}/new.json'
data = _api_get(url, {'limit': limit})
if data and 'data' in data:
for post in data['data'].get('children', []):
author = post['data'].get('author')
if author and author not in ['[deleted]', 'AutoModerator']:
users.add(author)
# comments
url = f'https://www.reddit.com/r/{subreddit}/comments.json'
data = _api_get(url, {'limit': limit})
if data and 'data' in data:
for comment in data['data'].get('children', []):
author = comment['data'].get('author')
if author and author not in ['[deleted]', 'AutoModerator']:
users.add(author)
return users
def get_user_activity(username):
"""get user's posts and comments"""
activity = []
# posts
url = f'https://www.reddit.com/user/{username}/submitted.json'
data = _api_get(url, {'limit': 100})
if data and 'data' in data:
for post in data['data'].get('children', []):
activity.append({
'type': 'post',
'subreddit': post['data'].get('subreddit'),
'title': post['data'].get('title', ''),
'body': post['data'].get('selftext', ''),
'score': post['data'].get('score', 0),
})
# comments
url = f'https://www.reddit.com/user/{username}/comments.json'
data = _api_get(url, {'limit': 100})
if data and 'data' in data:
for comment in data['data'].get('children', []):
activity.append({
'type': 'comment',
'subreddit': comment['data'].get('subreddit'),
'body': comment['data'].get('body', ''),
'score': comment['data'].get('score', 0),
})
return activity
def analyze_reddit_user(username):
"""
analyze a reddit user for alignment and extract external platform links.
reddit is DISCOVERY ONLY - we find users here but contact them elsewhere.
"""
activity = get_user_activity(username)
if not activity:
return None
# get profile for bio
profile = get_user_profile(username)
# count subreddit activity
sub_activity = defaultdict(int)
text_parts = []
total_karma = 0
for item in activity:
sub = item.get('subreddit', '').lower()
if sub:
sub_activity[sub] += 1
if item.get('title'):
text_parts.append(item['title'])
if item.get('body'):
text_parts.append(item['body'])
total_karma += item.get('score', 0)
full_text = ' '.join(text_parts)
text_score, positive_signals, negative_signals = analyze_text(full_text)
# EXTRACT EXTERNAL LINKS - this is the key part
# check profile bio first
external_links = {}
if profile:
bio_text = f"{profile.get('bio', '')} {profile.get('title', '')}"
external_links.update(extract_external_links(bio_text))
# also scan posts/comments for links (people often share their github etc)
activity_links = extract_external_links(full_text)
for platform, link in activity_links.items():
if platform not in external_links:
external_links[platform] = link
# subreddit scoring
sub_score = 0
aligned_subs = []
for sub, count in sub_activity.items():
weight = ALIGNED_SUBREDDITS.get(sub, 0)
if weight > 0:
sub_score += weight * min(count, 5)
aligned_subs.append(sub)
# multi-sub bonus
if len(aligned_subs) >= 5:
sub_score += 30
elif len(aligned_subs) >= 3:
sub_score += 15
# negative sub penalty
for sub in sub_activity:
if sub.lower() in [n.lower() for n in NEGATIVE_SUBREDDITS]:
sub_score -= 50
negative_signals.append(f"r/{sub}")
total_score = text_score + sub_score
# bonus if they have external links (we can actually contact them)
if external_links.get('github'):
total_score += 10
positive_signals.append('has github')
if external_links.get('mastodon'):
total_score += 10
positive_signals.append('has mastodon')
if external_links.get('website'):
total_score += 5
positive_signals.append('has website')
# === LOST BUILDER DETECTION ===
# reddit is HIGH SIGNAL for lost builders - stuck in learnprogramming,
# imposter syndrome posts, "i wish i could" language, etc.
subreddits_list = list(sub_activity.keys())
lost_signals, lost_weight = analyze_reddit_for_lost_signals(activity, subreddits_list)
# also check full text for lost patterns (already done partially in analyze_reddit_for_lost_signals)
text_lost_signals, text_lost_weight = analyze_text_for_lost_signals(full_text)
for sig in text_lost_signals:
if sig not in lost_signals:
lost_signals.append(sig)
lost_weight += text_lost_weight
lost_potential_score = lost_weight
# classify: builder, lost, both, or none
# for reddit, builder_score is based on having external links + high karma
builder_activity = 0
if external_links.get('github'):
builder_activity += 20
if total_karma > 1000:
builder_activity += 15
elif total_karma > 500:
builder_activity += 10
user_type = classify_user(lost_potential_score, builder_activity, total_score)
# confidence
confidence = 0.3
if len(activity) > 20:
confidence += 0.2
if len(aligned_subs) >= 2:
confidence += 0.2
if len(text_parts) > 10:
confidence += 0.2
# higher confidence if we have contact methods
if external_links:
confidence += 0.1
confidence = min(confidence, 0.95)
reasons = []
if aligned_subs:
reasons.append(f"active in: {', '.join(aligned_subs[:5])}")
if positive_signals:
reasons.append(f"signals: {', '.join(positive_signals[:5])}")
if negative_signals:
reasons.append(f"WARNING: {', '.join(negative_signals)}")
if external_links:
reasons.append(f"external: {', '.join(external_links.keys())}")
# add lost reasons if applicable
if user_type == 'lost' or user_type == 'both':
lost_descriptions = get_signal_descriptions(lost_signals)
if lost_descriptions:
reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
# determine if this is reddit-only (needs manual review)
reddit_only = len(external_links) == 0
if reddit_only:
reasons.append("REDDIT-ONLY: needs manual review for outreach")
return {
'platform': 'reddit',
'username': username,
'url': f"https://reddit.com/u/{username}",
'score': total_score,
'confidence': confidence,
'signals': positive_signals,
'negative_signals': negative_signals,
'subreddits': aligned_subs,
'activity_count': len(activity),
'karma': total_karma,
'reasons': reasons,
'scraped_at': datetime.now().isoformat(),
# external platform links for outreach
'external_links': external_links,
'reddit_only': reddit_only,
'extra': {
'github': external_links.get('github'),
'mastodon': external_links.get('mastodon'),
'twitter': external_links.get('twitter'),
'bluesky': external_links.get('bluesky'),
'website': external_links.get('website'),
'matrix': external_links.get('matrix'),
'reddit_karma': total_karma,
'reddit_activity': len(activity),
},
# lost builder fields
'lost_potential_score': lost_potential_score,
'lost_signals': lost_signals,
'user_type': user_type,
}
def scrape_reddit(db, limit_per_sub=50):
"""
full reddit scrape - DISCOVERY ONLY
finds aligned users, extracts external links for outreach.
reddit-only users go to manual queue.
"""
print("scoutd/reddit: starting scrape (discovery only, not outreach)...")
# find users in multiple aligned subs
user_subs = defaultdict(set)
# aligned subs - active builders
priority_subs = ['intentionalcommunity', 'cohousing', 'selfhosted',
'homeassistant', 'solarpunk', 'cooperatives', 'privacy',
'localllama', 'homelab', 'degoogle', 'pihole', 'unraid']
# lost builder subs - people who need encouragement
# these folks might be stuck, but they have aligned interests
lost_subs = ['learnprogramming', 'findapath', 'getdisciplined',
'careerguidance', 'cscareerquestions', 'decidingtobebetter']
# scrape both - we want to find lost builders with aligned interests
all_subs = priority_subs + lost_subs
for sub in all_subs:
print(f" scraping r/{sub}...")
users = get_subreddit_users(sub, limit=limit_per_sub)
for user in users:
user_subs[user].add(sub)
print(f" found {len(users)} users")
# filter for multi-sub users
multi_sub = {u: subs for u, subs in user_subs.items() if len(subs) >= 2}
print(f" {len(multi_sub)} users in 2+ aligned subs")
# analyze
results = []
reddit_only_count = 0
external_link_count = 0
builders_found = 0
lost_found = 0
for username in multi_sub:
try:
result = analyze_reddit_user(username)
if result and result['score'] > 0:
results.append(result)
db.save_human(result)
user_type = result.get('user_type', 'none')
# track lost builders - reddit is high signal for these
if user_type == 'lost':
lost_found += 1
lost_score = result.get('lost_potential_score', 0)
if lost_score >= 40:
print(f" 💔 u/{username}: lost_score={lost_score}, values={result['score']} pts")
# lost builders also go to manual queue if reddit-only
if result.get('reddit_only'):
_add_to_manual_queue(result)
elif user_type == 'builder':
builders_found += 1
elif user_type == 'both':
builders_found += 1
lost_found += 1
print(f" ⚡ u/{username}: recovering builder")
# track external links
if result.get('reddit_only'):
reddit_only_count += 1
# add high-value users to manual queue for review
if result['score'] >= 50 and user_type != 'lost': # lost already added above
_add_to_manual_queue(result)
print(f" 📋 u/{username}: {result['score']} pts (reddit-only → manual queue)")
else:
external_link_count += 1
if result['score'] >= 50 and user_type == 'builder':
links = list(result.get('external_links', {}).keys())
print(f" ★ u/{username}: {result['score']} pts → {', '.join(links)}")
except Exception as e:
print(f" error on {username}: {e}")
print(f"scoutd/reddit: found {len(results)} aligned humans")
print(f" - {builders_found} active builders")
print(f" - {lost_found} lost builders (need encouragement)")
print(f" - {external_link_count} with external links (reachable)")
print(f" - {reddit_only_count} reddit-only (manual queue)")
return results
def _add_to_manual_queue(result):
"""add reddit-only user to manual queue for review"""
from pathlib import Path
import json
queue_file = Path(__file__).parent.parent / 'data' / 'manual_queue.json'
queue_file.parent.mkdir(parents=True, exist_ok=True)
queue = []
if queue_file.exists():
try:
queue = json.loads(queue_file.read_text())
except:
pass
# check if already in queue
existing = [q for q in queue if q.get('username') == result['username'] and q.get('platform') == 'reddit']
if existing:
return
queue.append({
'platform': 'reddit',
'username': result['username'],
'url': result['url'],
'score': result['score'],
'subreddits': result.get('subreddits', []),
'signals': result.get('signals', []),
'reasons': result.get('reasons', []),
'note': 'reddit-only user - no external links found. DM manually if promising.',
'queued_at': datetime.now().isoformat(),
'status': 'pending',
})
queue_file.write_text(json.dumps(queue, indent=2))

158
connectd/scoutd/signals.py Normal file
View file

@ -0,0 +1,158 @@
"""
shared signal patterns for all scrapers
"""
import re
# positive signals - what we're looking for
POSITIVE_PATTERNS = [
# values
(r'\b(solarpunk|cyberpunk)\b', 'solarpunk', 10),
(r'\b(anarchis[tm]|mutual.?aid)\b', 'mutual_aid', 10),
(r'\b(cooperative|collective|worker.?owned?|coop|co.?op)\b', 'cooperative', 15),
(r'\b(community|commons)\b', 'community', 5),
(r'\b(intentional.?community|cohousing|commune)\b', 'intentional_community', 20),
# queer-friendly
(r'\b(queer|lgbtq?|trans|nonbinary|enby|genderqueer)\b', 'queer', 15),
(r'\b(they/them|she/her|he/him|xe/xem|any.?pronouns)\b', 'pronouns', 10),
(r'\bblm\b', 'blm', 5),
(r'\b(acab|1312)\b', 'acab', 5),
# tech values
(r'\b(privacy|surveillance|anti.?surveillance)\b', 'privacy', 10),
(r'\b(self.?host(?:ed|ing)?|homelab|home.?server)\b', 'selfhosted', 15),
(r'\b(local.?first|offline.?first)\b', 'local_first', 15),
(r'\b(decentralized?|federation|federated|fediverse)\b', 'decentralized', 10),
(r'\b(foss|libre|open.?source|copyleft)\b', 'foss', 10),
(r'\b(home.?assistant|home.?automation)\b', 'home_automation', 10),
(r'\b(mesh|p2p|peer.?to.?peer)\b', 'p2p', 10),
(r'\b(matrix|xmpp|irc)\b', 'federated_chat', 5),
(r'\b(degoogle|de.?google)\b', 'degoogle', 10),
# location/availability
(r'\b(seattle|portland|pnw|cascadia|pacific.?northwest)\b', 'pnw', 20),
(r'\b(washington|oregon)\b', 'pnw_state', 10),
(r'\b(remote|anywhere|relocate|looking.?to.?move)\b', 'remote', 10),
# anti-capitalism
(r'\b(anti.?capitalis[tm]|post.?capitalis[tm]|degrowth)\b', 'anticapitalist', 10),
# neurodivergent (often overlaps with our values)
(r'\b(neurodivergent|adhd|autistic|autism)\b', 'neurodivergent', 5),
# technical skills (bonus for builders)
(r'\b(rust|go|python|typescript)\b', 'modern_lang', 3),
(r'\b(linux|bsd|nixos)\b', 'unix', 3),
(r'\b(kubernetes|docker|podman)\b', 'containers', 3),
]
# negative signals - red flags
NEGATIVE_PATTERNS = [
(r'\b(qanon|maga|trump|wwg1wga)\b', 'maga', -50),
(r'\b(covid.?hoax|plandemic|5g.?conspiracy)\b', 'conspiracy', -50),
(r'\b(nwo|illuminati|deep.?state)\b', 'conspiracy', -30),
(r'\b(anti.?vax|antivax)\b', 'antivax', -30),
(r'\b(sovereign.?citizen)\b', 'sovcit', -40),
(r'\b(crypto.?bro|web3|nft|blockchain|bitcoin|ethereum)\b', 'crypto', -15),
(r'\b(conservative|republican)\b', 'conservative', -20),
(r'\b(free.?speech.?absolutist)\b', 'freeze_peach', -20),
]
# target topics for repo discovery
TARGET_TOPICS = [
'local-first', 'self-hosted', 'privacy', 'mesh-network',
'cooperative', 'solarpunk', 'decentralized', 'p2p',
'fediverse', 'activitypub', 'matrix-org', 'homeassistant',
'esphome', 'open-source-hardware', 'right-to-repair',
'mutual-aid', 'commons', 'degoogle', 'privacy-tools',
]
# ecosystem repos - high signal contributors
ECOSYSTEM_REPOS = [
'home-assistant/core',
'esphome/esphome',
'matrix-org/synapse',
'LemmyNet/lemmy',
'mastodon/mastodon',
'owncast/owncast',
'nextcloud/server',
'immich-app/immich',
'jellyfin/jellyfin',
'navidrome/navidrome',
'paperless-ngx/paperless-ngx',
'actualbudget/actual',
'firefly-iii/firefly-iii',
'logseq/logseq',
'AppFlowy-IO/AppFlowy',
'siyuan-note/siyuan',
'anytype/anytype-ts',
'calcom/cal.com',
'plausible/analytics',
'umami-software/umami',
]
# aligned subreddits
ALIGNED_SUBREDDITS = {
'intentionalcommunity': 25,
'cohousing': 25,
'cooperatives': 20,
'solarpunk': 20,
'selfhosted': 15,
'homeassistant': 15,
'homelab': 10,
'privacy': 15,
'PrivacyGuides': 15,
'degoogle': 15,
'anticonsumption': 10,
'Frugal': 5,
'simpleliving': 5,
'Seattle': 10,
'Portland': 10,
'cascadia': 15,
'linux': 5,
'opensource': 10,
'FOSS': 10,
}
# negative subreddits
NEGATIVE_SUBREDDITS = [
'conspiracy', 'conservative', 'walkaway', 'louderwithcrowder',
'JordanPeterson', 'TimPool', 'NoNewNormal', 'LockdownSkepticism',
]
# high-signal mastodon instances
ALIGNED_INSTANCES = {
'tech.lgbt': 20,
'social.coop': 25,
'fosstodon.org': 10,
'hackers.town': 15,
'hachyderm.io': 10,
'infosec.exchange': 5,
}
def analyze_text(text):
"""
analyze text for signals
returns: (score, signals_found, negative_signals)
"""
if not text:
return 0, [], []
text = text.lower()
score = 0
signals = []
negatives = []
for pattern, signal_name, points in POSITIVE_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
score += points
signals.append(signal_name)
for pattern, signal_name, points in NEGATIVE_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
score += points # points are already negative
negatives.append(signal_name)
return score, list(set(signals)), list(set(negatives))

255
connectd/scoutd/twitter.py Normal file
View file

@ -0,0 +1,255 @@
"""
scoutd/twitter.py - twitter/x discovery via nitter instances
scrapes nitter (twitter frontend) to find users posting about aligned topics
without needing twitter API access
nitter instances rotate to avoid rate limits
"""
import requests
import json
import time
import re
from datetime import datetime
from pathlib import Path
from bs4 import BeautifulSoup
from .signals import analyze_text
HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0'}
CACHE_DIR = Path(__file__).parent.parent / 'db' / 'cache' / 'twitter'
# nitter instances (rotate through these)
NITTER_INSTANCES = [
'nitter.privacydev.net',
'nitter.poast.org',
'nitter.woodland.cafe',
'nitter.esmailelbob.xyz',
]
# hashtags to search
ALIGNED_HASHTAGS = [
'selfhosted', 'homelab', 'homeassistant', 'foss', 'opensource',
'privacy', 'solarpunk', 'cooperative', 'mutualaid', 'localfirst',
'indieweb', 'smallweb', 'permacomputing', 'degrowth', 'techworkers',
]
_current_instance_idx = 0
def get_nitter_instance():
"""get current nitter instance, rotate on failure"""
global _current_instance_idx
return NITTER_INSTANCES[_current_instance_idx % len(NITTER_INSTANCES)]
def rotate_instance():
"""switch to next nitter instance"""
global _current_instance_idx
_current_instance_idx += 1
def _scrape_page(url, retries=3):
"""scrape a nitter page with instance rotation"""
for attempt in range(retries):
instance = get_nitter_instance()
full_url = url.replace('{instance}', instance)
# check cache
cache_key = f"{full_url}"
cache_file = CACHE_DIR / f"{hash(cache_key) & 0xffffffff}.json"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
if cache_file.exists():
try:
data = json.loads(cache_file.read_text())
if time.time() - data.get('_cached_at', 0) < 3600:
return data.get('_html')
except:
pass
time.sleep(2) # rate limit
try:
resp = requests.get(full_url, headers=HEADERS, timeout=30)
if resp.status_code == 200:
cache_file.write_text(json.dumps({
'_cached_at': time.time(),
'_html': resp.text
}))
return resp.text
elif resp.status_code in [429, 503]:
print(f" nitter {instance} rate limited, rotating...")
rotate_instance()
else:
print(f" nitter error: {resp.status_code}")
return None
except Exception as e:
print(f" nitter {instance} error: {e}")
rotate_instance()
return None
def search_hashtag(hashtag):
"""search for tweets with hashtag"""
url = f"https://{{instance}}/search?q=%23{hashtag}&f=tweets"
html = _scrape_page(url)
if not html:
return []
soup = BeautifulSoup(html, 'html.parser')
tweets = []
for tweet_div in soup.select('.timeline-item'):
try:
username_elem = tweet_div.select_one('.username')
content_elem = tweet_div.select_one('.tweet-content')
fullname_elem = tweet_div.select_one('.fullname')
if username_elem and content_elem:
username = username_elem.text.strip().lstrip('@')
tweets.append({
'username': username,
'name': fullname_elem.text.strip() if fullname_elem else username,
'content': content_elem.text.strip(),
})
except Exception as e:
continue
return tweets
def get_user_profile(username):
"""get user profile from nitter"""
url = f"https://{{instance}}/{username}"
html = _scrape_page(url)
if not html:
return None
soup = BeautifulSoup(html, 'html.parser')
try:
bio_elem = soup.select_one('.profile-bio')
bio = bio_elem.text.strip() if bio_elem else ''
location_elem = soup.select_one('.profile-location')
location = location_elem.text.strip() if location_elem else ''
website_elem = soup.select_one('.profile-website a')
website = website_elem.get('href') if website_elem else ''
# get recent tweets for more signal
tweets = []
for tweet_div in soup.select('.timeline-item')[:10]:
content_elem = tweet_div.select_one('.tweet-content')
if content_elem:
tweets.append(content_elem.text.strip())
return {
'username': username,
'bio': bio,
'location': location,
'website': website,
'recent_tweets': tweets,
}
except Exception as e:
print(f" error parsing {username}: {e}")
return None
def analyze_twitter_user(username, profile=None):
"""analyze a twitter user for alignment"""
if not profile:
profile = get_user_profile(username)
if not profile:
return None
# collect text
text_parts = [profile.get('bio', '')]
text_parts.extend(profile.get('recent_tweets', []))
full_text = ' '.join(text_parts)
text_score, positive_signals, negative_signals = analyze_text(full_text)
# twitter is noisy, lower base confidence
confidence = 0.25
if len(positive_signals) >= 3:
confidence += 0.2
if profile.get('website'):
confidence += 0.1
if len(profile.get('recent_tweets', [])) >= 5:
confidence += 0.1
confidence = min(confidence, 0.7) # cap lower for twitter
reasons = []
if positive_signals:
reasons.append(f"signals: {', '.join(positive_signals[:5])}")
if negative_signals:
reasons.append(f"WARNING: {', '.join(negative_signals)}")
return {
'platform': 'twitter',
'username': username,
'url': f"https://twitter.com/{username}",
'name': profile.get('name', username),
'bio': profile.get('bio'),
'location': profile.get('location'),
'score': text_score,
'confidence': confidence,
'signals': positive_signals,
'negative_signals': negative_signals,
'reasons': reasons,
'contact': {
'twitter': username,
'website': profile.get('website'),
},
'scraped_at': datetime.now().isoformat(),
}
def scrape_twitter(db, limit_per_hashtag=50):
"""full twitter scrape via nitter"""
print("scoutd/twitter: starting scrape via nitter...")
all_users = {}
for hashtag in ALIGNED_HASHTAGS:
print(f" #{hashtag}...")
tweets = search_hashtag(hashtag)
for tweet in tweets[:limit_per_hashtag]:
username = tweet.get('username')
if username and username not in all_users:
all_users[username] = {
'username': username,
'name': tweet.get('name'),
'hashtags': [hashtag],
}
elif username:
all_users[username]['hashtags'].append(hashtag)
print(f" found {len(tweets)} tweets")
# prioritize users in multiple hashtags
multi_hashtag = {u: d for u, d in all_users.items() if len(d.get('hashtags', [])) >= 2}
print(f" {len(multi_hashtag)} users in 2+ aligned hashtags")
# analyze
results = []
for username, data in list(multi_hashtag.items())[:100]: # limit to prevent rate limits
try:
result = analyze_twitter_user(username)
if result and result['score'] > 0:
results.append(result)
db.save_human(result)
if result['score'] >= 30:
print(f" ★ @{username}: {result['score']} pts")
except Exception as e:
print(f" error on {username}: {e}")
print(f"scoutd/twitter: found {len(results)} aligned humans")
return results

143
connectd/setup_user.py Normal file
View file

@ -0,0 +1,143 @@
#!/usr/bin/env python3
"""
setup priority user - add yourself to get matches
usage:
python setup_user.py # interactive setup
python setup_user.py --show # show your profile
python setup_user.py --matches # show your matches
"""
import argparse
import json
from db import Database
from db.users import (init_users_table, add_priority_user, get_priority_users,
get_priority_user_matches)
def interactive_setup(db):
"""interactive priority user setup"""
print("=" * 60)
print("connectd priority user setup")
print("=" * 60)
print("\nlink your profiles so connectd can find matches for YOU\n")
name = input("name: ").strip()
email = input("email (for notifications): ").strip()
github = input("github username (optional): ").strip() or None
reddit = input("reddit username (optional): ").strip() or None
mastodon = input("mastodon handle e.g. user@instance (optional): ").strip() or None
lobsters = input("lobste.rs username (optional): ").strip() or None
matrix = input("matrix id e.g. @user:matrix.org (optional): ").strip() or None
location = input("location (e.g. seattle, remote): ").strip() or None
print("\nwhat are you interested in? (comma separated)")
print("examples: self-hosting, cooperatives, solarpunk, home automation")
interests_raw = input("interests: ").strip()
interests = [i.strip() for i in interests_raw.split(',')] if interests_raw else []
print("\nwhat kind of people are you looking to connect with?")
looking_for = input("looking for: ").strip() or None
user_data = {
'name': name,
'email': email,
'github': github,
'reddit': reddit,
'mastodon': mastodon,
'lobsters': lobsters,
'matrix': matrix,
'location': location,
'interests': interests,
'looking_for': looking_for,
}
user_id = add_priority_user(db.conn, user_data)
print(f"\n✓ added as priority user #{user_id}")
print("connectd will now find matches for you")
def show_profile(db):
"""show current priority user profile"""
users = get_priority_users(db.conn)
if not users:
print("no priority users configured")
print("run: python setup_user.py")
return
for user in users:
print("=" * 60)
print(f"priority user #{user['id']}: {user['name']}")
print("=" * 60)
print(f"email: {user['email']}")
if user['github']:
print(f"github: {user['github']}")
if user['reddit']:
print(f"reddit: {user['reddit']}")
if user['mastodon']:
print(f"mastodon: {user['mastodon']}")
if user['lobsters']:
print(f"lobsters: {user['lobsters']}")
if user['matrix']:
print(f"matrix: {user['matrix']}")
if user['location']:
print(f"location: {user['location']}")
if user['interests']:
interests = json.loads(user['interests']) if isinstance(user['interests'], str) else user['interests']
print(f"interests: {', '.join(interests)}")
if user['looking_for']:
print(f"looking for: {user['looking_for']}")
def show_matches(db):
"""show matches for priority user"""
users = get_priority_users(db.conn)
if not users:
print("no priority users configured")
return
for user in users:
print(f"\n=== matches for {user['name']} ===\n")
matches = get_priority_user_matches(db.conn, user['id'], limit=20)
if not matches:
print("no matches yet - run the daemon to discover people")
continue
for i, match in enumerate(matches, 1):
print(f"{i}. {match['username']} ({match['platform']})")
print(f" score: {match['overlap_score']:.0f}")
print(f" url: {match['url']}")
reasons = match.get('overlap_reasons', '[]')
if isinstance(reasons, str):
reasons = json.loads(reasons)
if reasons:
print(f" why: {reasons[0] if reasons else ''}")
print()
def main():
parser = argparse.ArgumentParser(description='setup priority user')
parser.add_argument('--show', action='store_true', help='show your profile')
parser.add_argument('--matches', action='store_true', help='show your matches')
args = parser.parse_args()
db = Database()
init_users_table(db.conn)
if args.show:
show_profile(db)
elif args.matches:
show_matches(db)
else:
interactive_setup(db)
db.close()
if __name__ == '__main__':
main()

5
repository.json Normal file
View file

@ -0,0 +1,5 @@
{
"name": "connectd add-ons",
"url": "https://github.com/sudoxnym/ha-addons",
"maintainer": "sudoxnym"
}