mirror of
https://github.com/sudoxnym/connectd.git
synced 2026-04-14 11:37:42 +00:00
- add HOST_USER env var for auto-discovery from github - merge HOST_* env vars with scraped profile data - fix countdown timers to use started_at when no cycles run - add lemmy, discord, bluesky fields to priority_users - expand API user endpoint with all platform handles - update HA sensor with full user profile attributes - add HAOS add-on structure for one-click install - update version to 1.1.0 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
491 lines
15 KiB
Python
491 lines
15 KiB
Python
"""
|
|
scoutd/lost.py - lost builder detection
|
|
|
|
finds people with potential who haven't found it yet, gave up, or are too beaten down to try.
|
|
|
|
these aren't failures. they're seeds that never got water.
|
|
|
|
detection signals:
|
|
- github: forked but never modified, starred many but built nothing, learning repos abandoned
|
|
- reddit/forums: "i wish i could...", stuck asking beginner questions for years, helping others but never sharing
|
|
- social: retoots builders but never posts own work, imposter syndrome language, isolation signals
|
|
- profiles: bio says what they WANT to be, "aspiring" for 2+ years, empty portfolios
|
|
|
|
the goal isn't to recruit them. it's to show them the door exists.
|
|
"""
|
|
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from collections import defaultdict
|
|
|
|
|
|
# signal definitions with weights
|
|
LOST_SIGNALS = {
|
|
# github signals
|
|
'forked_never_modified': {
|
|
'weight': 15,
|
|
'category': 'github',
|
|
'description': 'forked repos but never pushed changes',
|
|
},
|
|
'starred_many_built_nothing': {
|
|
'weight': 20,
|
|
'category': 'github',
|
|
'description': 'starred 50+ repos but has 0-2 own repos',
|
|
},
|
|
'account_no_repos': {
|
|
'weight': 10,
|
|
'category': 'github',
|
|
'description': 'account exists but no public repos',
|
|
},
|
|
'inactivity_bursts': {
|
|
'weight': 15,
|
|
'category': 'github',
|
|
'description': 'long gaps then brief activity bursts',
|
|
},
|
|
'only_issues_comments': {
|
|
'weight': 12,
|
|
'category': 'github',
|
|
'description': 'only activity is issues/comments on others work',
|
|
},
|
|
'abandoned_learning_repos': {
|
|
'weight': 18,
|
|
'category': 'github',
|
|
'description': 'learning/tutorial repos that were never finished',
|
|
},
|
|
'readme_only_repos': {
|
|
'weight': 10,
|
|
'category': 'github',
|
|
'description': 'repos with just README, no actual code',
|
|
},
|
|
|
|
# language signals (from posts/comments/bio)
|
|
'wish_i_could': {
|
|
'weight': 12,
|
|
'category': 'language',
|
|
'description': '"i wish i could..." language',
|
|
'patterns': [
|
|
r'i wish i could',
|
|
r'i wish i knew how',
|
|
r'wish i had the (time|energy|motivation|skills?)',
|
|
],
|
|
},
|
|
'someday_want': {
|
|
'weight': 10,
|
|
'category': 'language',
|
|
'description': '"someday i want to..." language',
|
|
'patterns': [
|
|
r'someday i (want|hope|plan) to',
|
|
r'one day i\'ll',
|
|
r'eventually i\'ll',
|
|
r'when i have time i\'ll',
|
|
],
|
|
},
|
|
'stuck_beginner': {
|
|
'weight': 20,
|
|
'category': 'language',
|
|
'description': 'asking beginner questions for years',
|
|
'patterns': [
|
|
r'still (trying|learning|struggling) (to|with)',
|
|
r'can\'t seem to (get|understand|figure)',
|
|
r'been trying for (months|years)',
|
|
],
|
|
},
|
|
'self_deprecating': {
|
|
'weight': 15,
|
|
'category': 'language',
|
|
'description': 'self-deprecating about abilities',
|
|
'patterns': [
|
|
r'i\'m (not smart|too dumb|not good) enough',
|
|
r'i (suck|am terrible) at',
|
|
r'i\'ll never be able to',
|
|
r'people like me (can\'t|don\'t)',
|
|
r'i\'m just not (a|the) (type|kind)',
|
|
],
|
|
},
|
|
'no_energy': {
|
|
'weight': 18,
|
|
'category': 'language',
|
|
'description': '"how do people have energy" posts',
|
|
'patterns': [
|
|
r'how do (people|you|they) have (the )?(energy|time|motivation)',
|
|
r'where do (people|you|they) find (the )?(energy|motivation)',
|
|
r'i\'m (always|constantly) (tired|exhausted|drained)',
|
|
r'no (energy|motivation) (left|anymore)',
|
|
],
|
|
},
|
|
'imposter_syndrome': {
|
|
'weight': 15,
|
|
'category': 'language',
|
|
'description': 'imposter syndrome language',
|
|
'patterns': [
|
|
r'imposter syndrome',
|
|
r'feel like (a |an )?(fraud|fake|imposter)',
|
|
r'don\'t (belong|deserve)',
|
|
r'everyone else (seems|is) (so much )?(better|smarter)',
|
|
r'they\'ll (find out|realize) i\'m',
|
|
],
|
|
},
|
|
'should_really': {
|
|
'weight': 8,
|
|
'category': 'language',
|
|
'description': '"i should really..." posts',
|
|
'patterns': [
|
|
r'i (should|need to) really',
|
|
r'i keep (meaning|wanting) to',
|
|
r'i\'ve been (meaning|wanting) to',
|
|
],
|
|
},
|
|
'isolation_signals': {
|
|
'weight': 20,
|
|
'category': 'language',
|
|
'description': 'isolation/loneliness language',
|
|
'patterns': [
|
|
r'no one (understands|gets it|to talk to)',
|
|
r'(feel|feeling) (so )?(alone|isolated|lonely)',
|
|
r'don\'t have anyone (to|who)',
|
|
r'wish i (had|knew) (someone|people)',
|
|
],
|
|
},
|
|
'enthusiasm_for_others': {
|
|
'weight': 10,
|
|
'category': 'behavior',
|
|
'description': 'celebrates others but dismissive of self',
|
|
},
|
|
|
|
# subreddit/community signals
|
|
'stuck_communities': {
|
|
'weight': 15,
|
|
'category': 'community',
|
|
'description': 'active in stuck/struggling communities',
|
|
'subreddits': [
|
|
'learnprogramming',
|
|
'findapath',
|
|
'getdisciplined',
|
|
'getmotivated',
|
|
'decidingtobebetter',
|
|
'selfimprovement',
|
|
'adhd',
|
|
'depression',
|
|
'anxiety',
|
|
],
|
|
},
|
|
|
|
# profile signals
|
|
'aspirational_bio': {
|
|
'weight': 12,
|
|
'category': 'profile',
|
|
'description': 'bio says what they WANT to be',
|
|
'patterns': [
|
|
r'aspiring',
|
|
r'future',
|
|
r'want(ing)? to (be|become)',
|
|
r'learning to',
|
|
r'trying to (become|be|learn)',
|
|
r'hoping to',
|
|
],
|
|
},
|
|
'empty_portfolio': {
|
|
'weight': 15,
|
|
'category': 'profile',
|
|
'description': 'links to empty portfolio sites',
|
|
},
|
|
'long_aspiring': {
|
|
'weight': 20,
|
|
'category': 'profile',
|
|
'description': '"aspiring" in bio for 2+ years',
|
|
},
|
|
}
|
|
|
|
# subreddits that indicate someone might be stuck
|
|
STUCK_SUBREDDITS = {
|
|
'learnprogramming': 8,
|
|
'findapath': 15,
|
|
'getdisciplined': 12,
|
|
'getmotivated': 10,
|
|
'decidingtobebetter': 12,
|
|
'selfimprovement': 8,
|
|
'adhd': 10,
|
|
'depression': 15,
|
|
'anxiety': 12,
|
|
'socialanxiety': 12,
|
|
'neet': 20,
|
|
'lostgeneration': 15,
|
|
'antiwork': 5, # could be aligned OR stuck
|
|
'careerguidance': 8,
|
|
'cscareerquestions': 5,
|
|
}
|
|
|
|
|
|
def analyze_text_for_lost_signals(text):
|
|
"""analyze text for lost builder language patterns"""
|
|
if not text:
|
|
return [], 0
|
|
|
|
text_lower = text.lower()
|
|
signals_found = []
|
|
total_weight = 0
|
|
|
|
for signal_name, signal_data in LOST_SIGNALS.items():
|
|
if 'patterns' not in signal_data:
|
|
continue
|
|
|
|
for pattern in signal_data['patterns']:
|
|
if re.search(pattern, text_lower):
|
|
signals_found.append(signal_name)
|
|
total_weight += signal_data['weight']
|
|
break # only count each signal once
|
|
|
|
return signals_found, total_weight
|
|
|
|
|
|
def analyze_github_for_lost_signals(profile):
|
|
"""analyze github profile for lost builder signals"""
|
|
signals_found = []
|
|
total_weight = 0
|
|
|
|
if not profile:
|
|
return signals_found, total_weight
|
|
|
|
repos = profile.get('repos', []) or profile.get('top_repos', [])
|
|
extra = profile.get('extra', {})
|
|
|
|
public_repos = profile.get('public_repos', len(repos))
|
|
followers = profile.get('followers', 0)
|
|
following = profile.get('following', 0)
|
|
|
|
# starred many but built nothing
|
|
# (we'd need to fetch starred count separately, approximate with following ratio)
|
|
if public_repos <= 2 and following > 50:
|
|
signals_found.append('starred_many_built_nothing')
|
|
total_weight += LOST_SIGNALS['starred_many_built_nothing']['weight']
|
|
|
|
# account but no repos
|
|
if public_repos == 0:
|
|
signals_found.append('account_no_repos')
|
|
total_weight += LOST_SIGNALS['account_no_repos']['weight']
|
|
|
|
# check repos for signals
|
|
forked_count = 0
|
|
forked_modified = 0
|
|
learning_repos = 0
|
|
readme_only = 0
|
|
|
|
learning_keywords = ['learning', 'tutorial', 'course', 'practice', 'exercise',
|
|
'bootcamp', 'udemy', 'freecodecamp', 'odin', 'codecademy']
|
|
|
|
for repo in repos:
|
|
name = (repo.get('name') or '').lower()
|
|
description = (repo.get('description') or '').lower()
|
|
language = repo.get('language')
|
|
is_fork = repo.get('fork', False)
|
|
|
|
# forked but never modified
|
|
if is_fork:
|
|
forked_count += 1
|
|
# if pushed_at is close to created_at, never modified
|
|
# (simplified: just count forks for now)
|
|
|
|
# learning/tutorial repos
|
|
if any(kw in name or kw in description for kw in learning_keywords):
|
|
learning_repos += 1
|
|
|
|
# readme only (no language detected usually means no code)
|
|
if not language and not is_fork:
|
|
readme_only += 1
|
|
|
|
if forked_count >= 5 and public_repos - forked_count <= 2:
|
|
signals_found.append('forked_never_modified')
|
|
total_weight += LOST_SIGNALS['forked_never_modified']['weight']
|
|
|
|
if learning_repos >= 3:
|
|
signals_found.append('abandoned_learning_repos')
|
|
total_weight += LOST_SIGNALS['abandoned_learning_repos']['weight']
|
|
|
|
if readme_only >= 2:
|
|
signals_found.append('readme_only_repos')
|
|
total_weight += LOST_SIGNALS['readme_only_repos']['weight']
|
|
|
|
# check bio for lost signals
|
|
bio = profile.get('bio') or ''
|
|
bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
|
|
signals_found.extend(bio_signals)
|
|
total_weight += bio_weight
|
|
|
|
# aspirational bio check
|
|
bio_lower = bio.lower()
|
|
if any(re.search(p, bio_lower) for p in LOST_SIGNALS['aspirational_bio']['patterns']):
|
|
if 'aspirational_bio' not in signals_found:
|
|
signals_found.append('aspirational_bio')
|
|
total_weight += LOST_SIGNALS['aspirational_bio']['weight']
|
|
|
|
return signals_found, total_weight
|
|
|
|
|
|
def analyze_reddit_for_lost_signals(activity, subreddits):
|
|
"""analyze reddit activity for lost builder signals"""
|
|
signals_found = []
|
|
total_weight = 0
|
|
|
|
# check subreddit activity
|
|
stuck_sub_activity = 0
|
|
for sub in subreddits:
|
|
if sub.lower() in STUCK_SUBREDDITS:
|
|
stuck_sub_activity += STUCK_SUBREDDITS[sub.lower()]
|
|
|
|
if stuck_sub_activity >= 20:
|
|
signals_found.append('stuck_communities')
|
|
total_weight += min(stuck_sub_activity, 30) # cap at 30
|
|
|
|
# analyze post/comment text
|
|
all_text = []
|
|
for item in activity:
|
|
if item.get('title'):
|
|
all_text.append(item['title'])
|
|
if item.get('body'):
|
|
all_text.append(item['body'])
|
|
|
|
combined_text = ' '.join(all_text)
|
|
text_signals, text_weight = analyze_text_for_lost_signals(combined_text)
|
|
signals_found.extend(text_signals)
|
|
total_weight += text_weight
|
|
|
|
# check for helping others but never sharing own work
|
|
help_count = 0
|
|
share_count = 0
|
|
for item in activity:
|
|
body = (item.get('body') or '').lower()
|
|
title = (item.get('title') or '').lower()
|
|
|
|
# helping patterns
|
|
if any(p in body for p in ['try this', 'you could', 'have you tried', 'i recommend']):
|
|
help_count += 1
|
|
|
|
# sharing patterns
|
|
if any(p in body + title for p in ['i built', 'i made', 'my project', 'check out my', 'i created']):
|
|
share_count += 1
|
|
|
|
if help_count >= 5 and share_count == 0:
|
|
signals_found.append('enthusiasm_for_others')
|
|
total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
|
|
|
|
return signals_found, total_weight
|
|
|
|
|
|
def analyze_social_for_lost_signals(profile, posts):
|
|
"""analyze mastodon/social for lost builder signals"""
|
|
signals_found = []
|
|
total_weight = 0
|
|
|
|
# check bio
|
|
bio = profile.get('bio') or profile.get('note') or ''
|
|
bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
|
|
signals_found.extend(bio_signals)
|
|
total_weight += bio_weight
|
|
|
|
# check posts
|
|
boost_count = 0
|
|
original_count = 0
|
|
own_work_count = 0
|
|
|
|
for post in posts:
|
|
content = (post.get('content') or '').lower()
|
|
is_boost = post.get('reblog') is not None or post.get('repost')
|
|
|
|
if is_boost:
|
|
boost_count += 1
|
|
else:
|
|
original_count += 1
|
|
|
|
# check if sharing own work
|
|
if any(p in content for p in ['i built', 'i made', 'my project', 'working on', 'just shipped']):
|
|
own_work_count += 1
|
|
|
|
# analyze text
|
|
text_signals, text_weight = analyze_text_for_lost_signals(content)
|
|
for sig in text_signals:
|
|
if sig not in signals_found:
|
|
signals_found.append(sig)
|
|
total_weight += LOST_SIGNALS[sig]['weight']
|
|
|
|
# boosts builders but never posts own work
|
|
if boost_count >= 10 and own_work_count == 0:
|
|
signals_found.append('enthusiasm_for_others')
|
|
total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
|
|
|
|
return signals_found, total_weight
|
|
|
|
|
|
def calculate_lost_potential_score(signals_found):
|
|
"""calculate overall lost potential score from signals"""
|
|
total = 0
|
|
for signal in signals_found:
|
|
if signal in LOST_SIGNALS:
|
|
total += LOST_SIGNALS[signal]['weight']
|
|
return total
|
|
|
|
|
|
def classify_user(lost_score, builder_score, values_score):
|
|
"""
|
|
classify user as builder, lost, or neither
|
|
|
|
returns: 'builder' | 'lost' | 'both' | 'none'
|
|
"""
|
|
# high builder score = active builder
|
|
if builder_score >= 50 and lost_score < 30:
|
|
return 'builder'
|
|
|
|
# high lost score + values alignment = lost builder (priority outreach)
|
|
if lost_score >= 40 and values_score >= 20:
|
|
return 'lost'
|
|
|
|
# both signals = complex case, might be recovering
|
|
if lost_score >= 30 and builder_score >= 30:
|
|
return 'both'
|
|
|
|
return 'none'
|
|
|
|
|
|
def get_signal_descriptions(signals_found):
|
|
"""get human-readable descriptions of detected signals"""
|
|
descriptions = []
|
|
for signal in signals_found:
|
|
if signal in LOST_SIGNALS:
|
|
descriptions.append(LOST_SIGNALS[signal]['description'])
|
|
return descriptions
|
|
|
|
|
|
def should_outreach_lost(user_data, config=None):
|
|
"""
|
|
determine if we should reach out to a lost builder
|
|
|
|
considers:
|
|
- lost_potential_score threshold
|
|
- values alignment
|
|
- cooldown period
|
|
- manual review requirement
|
|
"""
|
|
config = config or {}
|
|
|
|
lost_score = user_data.get('lost_potential_score', 0)
|
|
values_score = user_data.get('score', 0) # regular alignment score
|
|
|
|
# minimum thresholds
|
|
min_lost = config.get('min_lost_score', 40)
|
|
min_values = config.get('min_values_score', 20)
|
|
|
|
if lost_score < min_lost:
|
|
return False, 'lost_score too low'
|
|
|
|
if values_score < min_values:
|
|
return False, 'values_score too low'
|
|
|
|
# check cooldown
|
|
last_outreach = user_data.get('last_lost_outreach')
|
|
if last_outreach:
|
|
cooldown_days = config.get('cooldown_days', 90)
|
|
last_dt = datetime.fromisoformat(last_outreach)
|
|
if datetime.now() - last_dt < timedelta(days=cooldown_days):
|
|
return False, f'cooldown active (90 days)'
|
|
|
|
# always require manual review for lost outreach
|
|
return True, 'requires_review'
|