connectd/haos-addon/scoutd/lost.py

492 lines
15 KiB
Python
Raw Normal View History

"""
scoutd/lost.py - lost builder detection
finds people with potential who haven't found it yet, gave up, or are too beaten down to try.
these aren't failures. they're seeds that never got water.
detection signals:
- github: forked but never modified, starred many but built nothing, learning repos abandoned
- reddit/forums: "i wish i could...", stuck asking beginner questions for years, helping others but never sharing
- social: retoots builders but never posts own work, imposter syndrome language, isolation signals
- profiles: bio says what they WANT to be, "aspiring" for 2+ years, empty portfolios
the goal isn't to recruit them. it's to show them the door exists.
"""
import re
from datetime import datetime, timedelta
from collections import defaultdict
# signal definitions with weights
LOST_SIGNALS = {
# github signals
'forked_never_modified': {
'weight': 15,
'category': 'github',
'description': 'forked repos but never pushed changes',
},
'starred_many_built_nothing': {
'weight': 20,
'category': 'github',
'description': 'starred 50+ repos but has 0-2 own repos',
},
'account_no_repos': {
'weight': 10,
'category': 'github',
'description': 'account exists but no public repos',
},
'inactivity_bursts': {
'weight': 15,
'category': 'github',
'description': 'long gaps then brief activity bursts',
},
'only_issues_comments': {
'weight': 12,
'category': 'github',
'description': 'only activity is issues/comments on others work',
},
'abandoned_learning_repos': {
'weight': 18,
'category': 'github',
'description': 'learning/tutorial repos that were never finished',
},
'readme_only_repos': {
'weight': 10,
'category': 'github',
'description': 'repos with just README, no actual code',
},
# language signals (from posts/comments/bio)
'wish_i_could': {
'weight': 12,
'category': 'language',
'description': '"i wish i could..." language',
'patterns': [
r'i wish i could',
r'i wish i knew how',
r'wish i had the (time|energy|motivation|skills?)',
],
},
'someday_want': {
'weight': 10,
'category': 'language',
'description': '"someday i want to..." language',
'patterns': [
r'someday i (want|hope|plan) to',
r'one day i\'ll',
r'eventually i\'ll',
r'when i have time i\'ll',
],
},
'stuck_beginner': {
'weight': 20,
'category': 'language',
'description': 'asking beginner questions for years',
'patterns': [
r'still (trying|learning|struggling) (to|with)',
r'can\'t seem to (get|understand|figure)',
r'been trying for (months|years)',
],
},
'self_deprecating': {
'weight': 15,
'category': 'language',
'description': 'self-deprecating about abilities',
'patterns': [
r'i\'m (not smart|too dumb|not good) enough',
r'i (suck|am terrible) at',
r'i\'ll never be able to',
r'people like me (can\'t|don\'t)',
r'i\'m just not (a|the) (type|kind)',
],
},
'no_energy': {
'weight': 18,
'category': 'language',
'description': '"how do people have energy" posts',
'patterns': [
r'how do (people|you|they) have (the )?(energy|time|motivation)',
r'where do (people|you|they) find (the )?(energy|motivation)',
r'i\'m (always|constantly) (tired|exhausted|drained)',
r'no (energy|motivation) (left|anymore)',
],
},
'imposter_syndrome': {
'weight': 15,
'category': 'language',
'description': 'imposter syndrome language',
'patterns': [
r'imposter syndrome',
r'feel like (a |an )?(fraud|fake|imposter)',
r'don\'t (belong|deserve)',
r'everyone else (seems|is) (so much )?(better|smarter)',
r'they\'ll (find out|realize) i\'m',
],
},
'should_really': {
'weight': 8,
'category': 'language',
'description': '"i should really..." posts',
'patterns': [
r'i (should|need to) really',
r'i keep (meaning|wanting) to',
r'i\'ve been (meaning|wanting) to',
],
},
'isolation_signals': {
'weight': 20,
'category': 'language',
'description': 'isolation/loneliness language',
'patterns': [
r'no one (understands|gets it|to talk to)',
r'(feel|feeling) (so )?(alone|isolated|lonely)',
r'don\'t have anyone (to|who)',
r'wish i (had|knew) (someone|people)',
],
},
'enthusiasm_for_others': {
'weight': 10,
'category': 'behavior',
'description': 'celebrates others but dismissive of self',
},
# subreddit/community signals
'stuck_communities': {
'weight': 15,
'category': 'community',
'description': 'active in stuck/struggling communities',
'subreddits': [
'learnprogramming',
'findapath',
'getdisciplined',
'getmotivated',
'decidingtobebetter',
'selfimprovement',
'adhd',
'depression',
'anxiety',
],
},
# profile signals
'aspirational_bio': {
'weight': 12,
'category': 'profile',
'description': 'bio says what they WANT to be',
'patterns': [
r'aspiring',
r'future',
r'want(ing)? to (be|become)',
r'learning to',
r'trying to (become|be|learn)',
r'hoping to',
],
},
'empty_portfolio': {
'weight': 15,
'category': 'profile',
'description': 'links to empty portfolio sites',
},
'long_aspiring': {
'weight': 20,
'category': 'profile',
'description': '"aspiring" in bio for 2+ years',
},
}
# subreddits that indicate someone might be stuck
STUCK_SUBREDDITS = {
'learnprogramming': 8,
'findapath': 15,
'getdisciplined': 12,
'getmotivated': 10,
'decidingtobebetter': 12,
'selfimprovement': 8,
'adhd': 10,
'depression': 15,
'anxiety': 12,
'socialanxiety': 12,
'neet': 20,
'lostgeneration': 15,
'antiwork': 5, # could be aligned OR stuck
'careerguidance': 8,
'cscareerquestions': 5,
}
def analyze_text_for_lost_signals(text):
"""analyze text for lost builder language patterns"""
if not text:
return [], 0
text_lower = text.lower()
signals_found = []
total_weight = 0
for signal_name, signal_data in LOST_SIGNALS.items():
if 'patterns' not in signal_data:
continue
for pattern in signal_data['patterns']:
if re.search(pattern, text_lower):
signals_found.append(signal_name)
total_weight += signal_data['weight']
break # only count each signal once
return signals_found, total_weight
def analyze_github_for_lost_signals(profile):
"""analyze github profile for lost builder signals"""
signals_found = []
total_weight = 0
if not profile:
return signals_found, total_weight
repos = profile.get('repos', []) or profile.get('top_repos', [])
extra = profile.get('extra', {})
public_repos = profile.get('public_repos', len(repos))
followers = profile.get('followers', 0)
following = profile.get('following', 0)
# starred many but built nothing
# (we'd need to fetch starred count separately, approximate with following ratio)
if public_repos <= 2 and following > 50:
signals_found.append('starred_many_built_nothing')
total_weight += LOST_SIGNALS['starred_many_built_nothing']['weight']
# account but no repos
if public_repos == 0:
signals_found.append('account_no_repos')
total_weight += LOST_SIGNALS['account_no_repos']['weight']
# check repos for signals
forked_count = 0
forked_modified = 0
learning_repos = 0
readme_only = 0
learning_keywords = ['learning', 'tutorial', 'course', 'practice', 'exercise',
'bootcamp', 'udemy', 'freecodecamp', 'odin', 'codecademy']
for repo in repos:
name = (repo.get('name') or '').lower()
description = (repo.get('description') or '').lower()
language = repo.get('language')
is_fork = repo.get('fork', False)
# forked but never modified
if is_fork:
forked_count += 1
# if pushed_at is close to created_at, never modified
# (simplified: just count forks for now)
# learning/tutorial repos
if any(kw in name or kw in description for kw in learning_keywords):
learning_repos += 1
# readme only (no language detected usually means no code)
if not language and not is_fork:
readme_only += 1
if forked_count >= 5 and public_repos - forked_count <= 2:
signals_found.append('forked_never_modified')
total_weight += LOST_SIGNALS['forked_never_modified']['weight']
if learning_repos >= 3:
signals_found.append('abandoned_learning_repos')
total_weight += LOST_SIGNALS['abandoned_learning_repos']['weight']
if readme_only >= 2:
signals_found.append('readme_only_repos')
total_weight += LOST_SIGNALS['readme_only_repos']['weight']
# check bio for lost signals
bio = profile.get('bio') or ''
bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
signals_found.extend(bio_signals)
total_weight += bio_weight
# aspirational bio check
bio_lower = bio.lower()
if any(re.search(p, bio_lower) for p in LOST_SIGNALS['aspirational_bio']['patterns']):
if 'aspirational_bio' not in signals_found:
signals_found.append('aspirational_bio')
total_weight += LOST_SIGNALS['aspirational_bio']['weight']
return signals_found, total_weight
def analyze_reddit_for_lost_signals(activity, subreddits):
"""analyze reddit activity for lost builder signals"""
signals_found = []
total_weight = 0
# check subreddit activity
stuck_sub_activity = 0
for sub in subreddits:
if sub.lower() in STUCK_SUBREDDITS:
stuck_sub_activity += STUCK_SUBREDDITS[sub.lower()]
if stuck_sub_activity >= 20:
signals_found.append('stuck_communities')
total_weight += min(stuck_sub_activity, 30) # cap at 30
# analyze post/comment text
all_text = []
for item in activity:
if item.get('title'):
all_text.append(item['title'])
if item.get('body'):
all_text.append(item['body'])
combined_text = ' '.join(all_text)
text_signals, text_weight = analyze_text_for_lost_signals(combined_text)
signals_found.extend(text_signals)
total_weight += text_weight
# check for helping others but never sharing own work
help_count = 0
share_count = 0
for item in activity:
body = (item.get('body') or '').lower()
title = (item.get('title') or '').lower()
# helping patterns
if any(p in body for p in ['try this', 'you could', 'have you tried', 'i recommend']):
help_count += 1
# sharing patterns
if any(p in body + title for p in ['i built', 'i made', 'my project', 'check out my', 'i created']):
share_count += 1
if help_count >= 5 and share_count == 0:
signals_found.append('enthusiasm_for_others')
total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
return signals_found, total_weight
def analyze_social_for_lost_signals(profile, posts):
"""analyze mastodon/social for lost builder signals"""
signals_found = []
total_weight = 0
# check bio
bio = profile.get('bio') or profile.get('note') or ''
bio_signals, bio_weight = analyze_text_for_lost_signals(bio)
signals_found.extend(bio_signals)
total_weight += bio_weight
# check posts
boost_count = 0
original_count = 0
own_work_count = 0
for post in posts:
content = (post.get('content') or '').lower()
is_boost = post.get('reblog') is not None or post.get('repost')
if is_boost:
boost_count += 1
else:
original_count += 1
# check if sharing own work
if any(p in content for p in ['i built', 'i made', 'my project', 'working on', 'just shipped']):
own_work_count += 1
# analyze text
text_signals, text_weight = analyze_text_for_lost_signals(content)
for sig in text_signals:
if sig not in signals_found:
signals_found.append(sig)
total_weight += LOST_SIGNALS[sig]['weight']
# boosts builders but never posts own work
if boost_count >= 10 and own_work_count == 0:
signals_found.append('enthusiasm_for_others')
total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight']
return signals_found, total_weight
def calculate_lost_potential_score(signals_found):
"""calculate overall lost potential score from signals"""
total = 0
for signal in signals_found:
if signal in LOST_SIGNALS:
total += LOST_SIGNALS[signal]['weight']
return total
def classify_user(lost_score, builder_score, values_score):
"""
classify user as builder, lost, or neither
returns: 'builder' | 'lost' | 'both' | 'none'
"""
# high builder score = active builder
if builder_score >= 50 and lost_score < 30:
return 'builder'
# high lost score + values alignment = lost builder (priority outreach)
if lost_score >= 40 and values_score >= 20:
return 'lost'
# both signals = complex case, might be recovering
if lost_score >= 30 and builder_score >= 30:
return 'both'
return 'none'
def get_signal_descriptions(signals_found):
"""get human-readable descriptions of detected signals"""
descriptions = []
for signal in signals_found:
if signal in LOST_SIGNALS:
descriptions.append(LOST_SIGNALS[signal]['description'])
return descriptions
def should_outreach_lost(user_data, config=None):
"""
determine if we should reach out to a lost builder
considers:
- lost_potential_score threshold
- values alignment
- cooldown period
- manual review requirement
"""
config = config or {}
lost_score = user_data.get('lost_potential_score', 0)
values_score = user_data.get('score', 0) # regular alignment score
# minimum thresholds
min_lost = config.get('min_lost_score', 40)
min_values = config.get('min_values_score', 20)
if lost_score < min_lost:
return False, 'lost_score too low'
if values_score < min_values:
return False, 'values_score too low'
# check cooldown
last_outreach = user_data.get('last_lost_outreach')
if last_outreach:
cooldown_days = config.get('cooldown_days', 90)
last_dt = datetime.fromisoformat(last_outreach)
if datetime.now() - last_dt < timedelta(days=cooldown_days):
return False, f'cooldown active (90 days)'
# always require manual review for lost outreach
return True, 'requires_review'