ha-addons/connectd/db/users.py
2025-12-15 11:06:57 -06:00

510 lines
17 KiB
Python

"""
priority users - people who host connectd get direct matching
"""
import sqlite3
import json
from datetime import datetime
from pathlib import Path
DB_PATH = Path(__file__).parent / 'connectd.db'
# map user-friendly interests to signal terms
INTEREST_TO_SIGNALS = {
'self-hosting': ['selfhosted', 'home_automation'],
'home-assistant': ['home_automation'],
'intentional-community': ['community', 'cooperative'],
'cooperatives': ['cooperative', 'community'],
'solarpunk': ['solarpunk'],
'privacy': ['privacy', 'local_first'],
'local-first': ['local_first', 'privacy'],
'queer-friendly': ['queer'],
'anti-capitalism': ['cooperative', 'decentralized', 'community'],
'esports-venue': [],
'foss': ['foss'],
'decentralized': ['decentralized'],
'federated': ['federated_chat'],
'mesh': ['mesh'],
}
def init_users_table(conn):
"""create priority users table"""
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS priority_users (
id INTEGER PRIMARY KEY,
name TEXT,
email TEXT UNIQUE,
github TEXT,
reddit TEXT,
mastodon TEXT,
lobsters TEXT,
matrix TEXT,
lemmy TEXT,
discord TEXT,
bluesky TEXT,
location TEXT,
bio TEXT,
interests TEXT,
looking_for TEXT,
created_at TEXT,
active INTEGER DEFAULT 1,
score REAL DEFAULT 0,
signals TEXT,
scraped_profile TEXT,
last_scored_at TEXT
)''')
# add missing columns to existing table
for col in ['lemmy', 'discord', 'bluesky']:
try:
c.execute(f'ALTER TABLE priority_users ADD COLUMN {col} TEXT')
except:
pass # column already exists
# matches specifically for priority users
c.execute('''CREATE TABLE IF NOT EXISTS priority_matches (
id INTEGER PRIMARY KEY,
priority_user_id INTEGER,
matched_human_id INTEGER,
overlap_score REAL,
overlap_reasons TEXT,
status TEXT DEFAULT 'new',
notified_at TEXT,
viewed_at TEXT,
FOREIGN KEY(priority_user_id) REFERENCES priority_users(id),
FOREIGN KEY(matched_human_id) REFERENCES humans(id)
)''')
conn.commit()
def add_priority_user(conn, user_data):
"""add a priority user (someone hosting connectd)"""
c = conn.cursor()
c.execute('''INSERT OR REPLACE INTO priority_users
(name, email, github, reddit, mastodon, lobsters, matrix, lemmy, discord, bluesky,
location, bio, interests, looking_for, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
(user_data.get('name'),
user_data.get('email'),
user_data.get('github'),
user_data.get('reddit'),
user_data.get('mastodon'),
user_data.get('lobsters'),
user_data.get('matrix'),
user_data.get('lemmy'),
user_data.get('discord'),
user_data.get('bluesky'),
user_data.get('location'),
user_data.get('bio'),
json.dumps(user_data.get('interests', [])),
user_data.get('looking_for'),
datetime.now().isoformat()))
conn.commit()
return c.lastrowid
def get_priority_users(conn):
"""get all active priority users"""
c = conn.cursor()
c.execute('SELECT * FROM priority_users WHERE active = 1')
return [dict(row) for row in c.fetchall()]
def get_priority_user(conn, user_id):
"""get a specific priority user"""
c = conn.cursor()
c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
row = c.fetchone()
return dict(row) if row else None
def save_priority_match(conn, priority_user_id, human_id, overlap_data):
"""save a match for a priority user"""
c = conn.cursor()
c.execute('''INSERT OR IGNORE INTO priority_matches
(priority_user_id, matched_human_id, overlap_score, overlap_reasons, status)
VALUES (?, ?, ?, ?, 'new')''',
(priority_user_id, human_id,
overlap_data.get('overlap_score', 0),
json.dumps(overlap_data.get('overlap_reasons', []))))
conn.commit()
return c.lastrowid
def get_priority_user_matches(conn, priority_user_id, status=None, limit=50):
"""get matches for a priority user"""
c = conn.cursor()
if status:
c.execute('''SELECT pm.*, h.* FROM priority_matches pm
JOIN humans h ON pm.matched_human_id = h.id
WHERE pm.priority_user_id = ? AND pm.status = ?
ORDER BY pm.overlap_score DESC
LIMIT ?''', (priority_user_id, status, limit))
else:
c.execute('''SELECT pm.*, h.* FROM priority_matches pm
JOIN humans h ON pm.matched_human_id = h.id
WHERE pm.priority_user_id = ?
ORDER BY pm.overlap_score DESC
LIMIT ?''', (priority_user_id, limit))
return [dict(row) for row in c.fetchall()]
def mark_match_viewed(conn, match_id):
"""mark a priority match as viewed"""
c = conn.cursor()
c.execute('''UPDATE priority_matches SET status = 'viewed', viewed_at = ?
WHERE id = ?''', (datetime.now().isoformat(), match_id))
conn.commit()
def expand_interests_to_signals(interests):
"""expand user-friendly interests to signal terms"""
signals = set()
for interest in interests:
interest_lower = interest.lower().strip()
if interest_lower in INTEREST_TO_SIGNALS:
signals.update(INTEREST_TO_SIGNALS[interest_lower])
else:
signals.add(interest_lower)
# always add these aligned signals for priority users
signals.update(['foss', 'decentralized', 'federated_chat', 'containers', 'unix', 'selfhosted'])
return list(signals)
def score_priority_user(conn, user_id, scraped_profile=None):
"""
calculate a score for a priority user based on:
- their stated interests
- their scraped github profile (if available)
- their repos and activity
"""
c = conn.cursor()
c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
row = c.fetchone()
if not row:
return None
user = dict(row)
score = 0
signals = set()
# 1. score from stated interests
interests = user.get('interests')
if isinstance(interests, str):
interests = json.loads(interests) if interests else []
for interest in interests:
interest_lower = interest.lower()
# high-value interests
if 'solarpunk' in interest_lower:
score += 30
signals.add('solarpunk')
if 'queer' in interest_lower:
score += 30
signals.add('queer')
if 'cooperative' in interest_lower or 'intentional' in interest_lower:
score += 20
signals.add('cooperative')
if 'privacy' in interest_lower:
score += 10
signals.add('privacy')
if 'self-host' in interest_lower or 'selfhost' in interest_lower:
score += 15
signals.add('selfhosted')
if 'home-assistant' in interest_lower:
score += 15
signals.add('home_automation')
if 'foss' in interest_lower or 'open source' in interest_lower:
score += 10
signals.add('foss')
# 2. score from scraped profile
if scraped_profile:
# repos
repos = scraped_profile.get('top_repos', [])
if len(repos) >= 20:
score += 20
elif len(repos) >= 10:
score += 10
elif len(repos) >= 5:
score += 5
# languages
languages = scraped_profile.get('languages', {})
if 'Python' in languages or 'Rust' in languages:
score += 5
signals.add('modern_lang')
# topics from repos
topics = scraped_profile.get('topics', [])
for topic in topics:
if topic in ['self-hosted', 'home-assistant', 'privacy', 'foss']:
score += 10
signals.add(topic.replace('-', '_'))
# followers
followers = scraped_profile.get('followers', 0)
if followers >= 100:
score += 15
elif followers >= 50:
score += 10
elif followers >= 10:
score += 5
# 3. add expanded signals
expanded = expand_interests_to_signals(interests)
signals.update(expanded)
# update user
c.execute('''UPDATE priority_users
SET score = ?, signals = ?, scraped_profile = ?, last_scored_at = ?
WHERE id = ?''',
(score, json.dumps(list(signals)), json.dumps(scraped_profile) if scraped_profile else None,
datetime.now().isoformat(), user_id))
conn.commit()
return {'score': score, 'signals': list(signals)}
def auto_match_priority_user(conn, user_id, min_overlap=40):
"""
automatically find and save matches for a priority user
uses relationship filtering to skip already-connected people
"""
from scoutd.deep import check_already_connected
c = conn.cursor()
# get user
c.execute('SELECT * FROM priority_users WHERE id = ?', (user_id,))
row = c.fetchone()
if not row:
return []
user = dict(row)
# get user signals
user_signals = set()
if user.get('signals'):
signals = json.loads(user['signals']) if isinstance(user['signals'], str) else user['signals']
user_signals.update(signals)
# also expand interests
if user.get('interests'):
interests = json.loads(user['interests']) if isinstance(user['interests'], str) else user['interests']
user_signals.update(expand_interests_to_signals(interests))
# clear old matches
c.execute('DELETE FROM priority_matches WHERE priority_user_id = ?', (user_id,))
conn.commit()
# get all humans
c.execute('SELECT * FROM humans WHERE score >= 25')
columns = [d[0] for d in c.description]
matches = []
for row in c.fetchall():
human = dict(zip(columns, row))
# skip own profiles
username = (human.get('username') or '').lower()
if user.get('github') and username == user['github'].lower():
continue
if user.get('reddit') and username == user.get('reddit', '').lower():
continue
# check if already connected
user_human = {'username': user.get('github'), 'platform': 'github', 'extra': {}}
connected, reason = check_already_connected(user_human, human)
if connected:
continue
# get human signals
human_signals = human.get('signals', [])
if isinstance(human_signals, str):
human_signals = json.loads(human_signals) if human_signals else []
# calculate overlap
shared = user_signals & set(human_signals)
overlap_score = len(shared) * 10
# high-value bonuses
if 'queer' in human_signals:
overlap_score += 40
shared.add('queer (rare!)')
if 'solarpunk' in human_signals:
overlap_score += 30
shared.add('solarpunk (rare!)')
if 'cooperative' in human_signals:
overlap_score += 20
shared.add('cooperative (values)')
# location bonus
location = (human.get('location') or '').lower()
user_location = (user.get('location') or '').lower()
if user_location and location:
if any(x in location for x in ['seattle', 'portland', 'pnw', 'washington', 'oregon']):
if 'seattle' in user_location or 'pnw' in user_location:
overlap_score += 25
shared.add('PNW location!')
if overlap_score >= min_overlap:
matches.append({
'human': human,
'overlap_score': overlap_score,
'shared': list(shared),
})
# sort and save top matches
matches.sort(key=lambda x: x['overlap_score'], reverse=True)
for m in matches[:50]: # save top 50
save_priority_match(conn, user_id, m['human']['id'], {
'overlap_score': m['overlap_score'],
'overlap_reasons': m['shared'],
})
return matches
def update_priority_user_profile(conn, user_id, profile_data):
"""update a priority user's profile with new data"""
c = conn.cursor()
updates = []
values = []
for field in ['name', 'email', 'github', 'reddit', 'mastodon', 'lobsters',
'matrix', 'lemmy', 'discord', 'bluesky', 'location', 'bio', 'looking_for']:
if field in profile_data and profile_data[field]:
updates.append(f'{field} = ?')
values.append(profile_data[field])
if 'interests' in profile_data:
updates.append('interests = ?')
values.append(json.dumps(profile_data['interests']))
if updates:
values.append(user_id)
c.execute(f'''UPDATE priority_users SET {', '.join(updates)} WHERE id = ?''', values)
conn.commit()
return True
def discover_host_user(conn, alias):
"""
auto-discover a host user by their alias (username).
scrapes github and discovers all connected social handles.
also merges in HOST_ env vars from config for manual overrides.
returns the priority user id
"""
from scoutd.github import analyze_github_user
from config import (HOST_NAME, HOST_EMAIL, HOST_GITHUB, HOST_MASTODON,
HOST_REDDIT, HOST_LEMMY, HOST_LOBSTERS, HOST_MATRIX,
HOST_DISCORD, HOST_BLUESKY, HOST_LOCATION, HOST_INTERESTS, HOST_LOOKING_FOR)
print(f"connectd: discovering host user '{alias}'...")
# scrape github for full profile
profile = analyze_github_user(alias)
if not profile:
print(f" could not find github user '{alias}'")
# still create from env vars if no github found
profile = {'name': HOST_NAME or alias, 'bio': '', 'location': HOST_LOCATION,
'contact': {}, 'extra': {'handles': {}}, 'topics': [], 'signals': []}
print(f" found: {profile.get('name')} ({alias})")
print(f" score: {profile.get('score', 0)}, signals: {len(profile.get('signals', []))}")
# extract contact info
contact = profile.get('contact', {})
handles = profile.get('extra', {}).get('handles', {})
# merge in HOST_ env vars (override discovered values)
if HOST_MASTODON:
handles['mastodon'] = HOST_MASTODON
if HOST_REDDIT:
handles['reddit'] = HOST_REDDIT
if HOST_LEMMY:
handles['lemmy'] = HOST_LEMMY
if HOST_LOBSTERS:
handles['lobsters'] = HOST_LOBSTERS
if HOST_MATRIX:
handles['matrix'] = HOST_MATRIX
if HOST_DISCORD:
handles['discord'] = HOST_DISCORD
if HOST_BLUESKY:
handles['bluesky'] = HOST_BLUESKY
# check if user already exists
c = conn.cursor()
c.execute('SELECT id FROM priority_users WHERE github = ?', (alias,))
existing = c.fetchone()
# parse HOST_INTERESTS if provided
interests = profile.get('topics', [])
if HOST_INTERESTS:
interests = [i.strip() for i in HOST_INTERESTS.split(',') if i.strip()]
user_data = {
'name': HOST_NAME or profile.get('name') or alias,
'email': HOST_EMAIL or contact.get('email'),
'github': HOST_GITHUB or alias,
'reddit': handles.get('reddit'),
'mastodon': handles.get('mastodon') or contact.get('mastodon'),
'lobsters': handles.get('lobsters'),
'matrix': handles.get('matrix') or contact.get('matrix'),
'lemmy': handles.get('lemmy') or contact.get('lemmy'),
'discord': handles.get('discord'),
'bluesky': handles.get('bluesky') or contact.get('bluesky'),
'location': HOST_LOCATION or profile.get('location'),
'bio': profile.get('bio'),
'interests': interests,
'looking_for': HOST_LOOKING_FOR,
}
if existing:
# update existing user
user_id = existing['id']
update_priority_user_profile(conn, user_id, user_data)
print(f" updated existing priority user (id={user_id})")
else:
# create new user
user_id = add_priority_user(conn, user_data)
print(f" created new priority user (id={user_id})")
# score the user
scraped_profile = {
'top_repos': profile.get('extra', {}).get('top_repos', []),
'languages': profile.get('languages', {}),
'topics': profile.get('topics', []),
'followers': profile.get('extra', {}).get('followers', 0),
}
score_result = score_priority_user(conn, user_id, scraped_profile)
print(f" scored: {score_result.get('score')}, {len(score_result.get('signals', []))} signals")
# print discovered handles
print(f" discovered handles:")
for platform, handle in handles.items():
print(f" {platform}: {handle}")
return user_id
def get_host_user(conn):
"""get the host user (first priority user)"""
users = get_priority_users(conn)
return users[0] if users else None