""" scoutd/lost.py - lost builder detection finds people with potential who haven't found it yet, gave up, or are too beaten down to try. these aren't failures. they're seeds that never got water. detection signals: - github: forked but never modified, starred many but built nothing, learning repos abandoned - reddit/forums: "i wish i could...", stuck asking beginner questions for years, helping others but never sharing - social: retoots builders but never posts own work, imposter syndrome language, isolation signals - profiles: bio says what they WANT to be, "aspiring" for 2+ years, empty portfolios the goal isn't to recruit them. it's to show them the door exists. """ import re from datetime import datetime, timedelta from collections import defaultdict # signal definitions with weights LOST_SIGNALS = { # github signals 'forked_never_modified': { 'weight': 15, 'category': 'github', 'description': 'forked repos but never pushed changes', }, 'starred_many_built_nothing': { 'weight': 20, 'category': 'github', 'description': 'starred 50+ repos but has 0-2 own repos', }, 'account_no_repos': { 'weight': 10, 'category': 'github', 'description': 'account exists but no public repos', }, 'inactivity_bursts': { 'weight': 15, 'category': 'github', 'description': 'long gaps then brief activity bursts', }, 'only_issues_comments': { 'weight': 12, 'category': 'github', 'description': 'only activity is issues/comments on others work', }, 'abandoned_learning_repos': { 'weight': 18, 'category': 'github', 'description': 'learning/tutorial repos that were never finished', }, 'readme_only_repos': { 'weight': 10, 'category': 'github', 'description': 'repos with just README, no actual code', }, # language signals (from posts/comments/bio) 'wish_i_could': { 'weight': 12, 'category': 'language', 'description': '"i wish i could..." language', 'patterns': [ r'i wish i could', r'i wish i knew how', r'wish i had the (time|energy|motivation|skills?)', ], }, 'someday_want': { 'weight': 10, 'category': 'language', 'description': '"someday i want to..." language', 'patterns': [ r'someday i (want|hope|plan) to', r'one day i\'ll', r'eventually i\'ll', r'when i have time i\'ll', ], }, 'stuck_beginner': { 'weight': 20, 'category': 'language', 'description': 'asking beginner questions for years', 'patterns': [ r'still (trying|learning|struggling) (to|with)', r'can\'t seem to (get|understand|figure)', r'been trying for (months|years)', ], }, 'self_deprecating': { 'weight': 15, 'category': 'language', 'description': 'self-deprecating about abilities', 'patterns': [ r'i\'m (not smart|too dumb|not good) enough', r'i (suck|am terrible) at', r'i\'ll never be able to', r'people like me (can\'t|don\'t)', r'i\'m just not (a|the) (type|kind)', ], }, 'no_energy': { 'weight': 18, 'category': 'language', 'description': '"how do people have energy" posts', 'patterns': [ r'how do (people|you|they) have (the )?(energy|time|motivation)', r'where do (people|you|they) find (the )?(energy|motivation)', r'i\'m (always|constantly) (tired|exhausted|drained)', r'no (energy|motivation) (left|anymore)', ], }, 'imposter_syndrome': { 'weight': 15, 'category': 'language', 'description': 'imposter syndrome language', 'patterns': [ r'imposter syndrome', r'feel like (a |an )?(fraud|fake|imposter)', r'don\'t (belong|deserve)', r'everyone else (seems|is) (so much )?(better|smarter)', r'they\'ll (find out|realize) i\'m', ], }, 'should_really': { 'weight': 8, 'category': 'language', 'description': '"i should really..." posts', 'patterns': [ r'i (should|need to) really', r'i keep (meaning|wanting) to', r'i\'ve been (meaning|wanting) to', ], }, 'isolation_signals': { 'weight': 20, 'category': 'language', 'description': 'isolation/loneliness language', 'patterns': [ r'no one (understands|gets it|to talk to)', r'(feel|feeling) (so )?(alone|isolated|lonely)', r'don\'t have anyone (to|who)', r'wish i (had|knew) (someone|people)', ], }, 'enthusiasm_for_others': { 'weight': 10, 'category': 'behavior', 'description': 'celebrates others but dismissive of self', }, # subreddit/community signals 'stuck_communities': { 'weight': 15, 'category': 'community', 'description': 'active in stuck/struggling communities', 'subreddits': [ 'learnprogramming', 'findapath', 'getdisciplined', 'getmotivated', 'decidingtobebetter', 'selfimprovement', 'adhd', 'depression', 'anxiety', ], }, # profile signals 'aspirational_bio': { 'weight': 12, 'category': 'profile', 'description': 'bio says what they WANT to be', 'patterns': [ r'aspiring', r'future', r'want(ing)? to (be|become)', r'learning to', r'trying to (become|be|learn)', r'hoping to', ], }, 'empty_portfolio': { 'weight': 15, 'category': 'profile', 'description': 'links to empty portfolio sites', }, 'long_aspiring': { 'weight': 20, 'category': 'profile', 'description': '"aspiring" in bio for 2+ years', }, } # subreddits that indicate someone might be stuck STUCK_SUBREDDITS = { 'learnprogramming': 8, 'findapath': 15, 'getdisciplined': 12, 'getmotivated': 10, 'decidingtobebetter': 12, 'selfimprovement': 8, 'adhd': 10, 'depression': 15, 'anxiety': 12, 'socialanxiety': 12, 'neet': 20, 'lostgeneration': 15, 'antiwork': 5, # could be aligned OR stuck 'careerguidance': 8, 'cscareerquestions': 5, } def analyze_text_for_lost_signals(text): """analyze text for lost builder language patterns""" if not text: return [], 0 text_lower = text.lower() signals_found = [] total_weight = 0 for signal_name, signal_data in LOST_SIGNALS.items(): if 'patterns' not in signal_data: continue for pattern in signal_data['patterns']: if re.search(pattern, text_lower): signals_found.append(signal_name) total_weight += signal_data['weight'] break # only count each signal once return signals_found, total_weight def analyze_github_for_lost_signals(profile): """analyze github profile for lost builder signals""" signals_found = [] total_weight = 0 if not profile: return signals_found, total_weight repos = profile.get('repos', []) or profile.get('top_repos', []) extra = profile.get('extra', {}) public_repos = profile.get('public_repos', len(repos)) followers = profile.get('followers', 0) following = profile.get('following', 0) # starred many but built nothing # (we'd need to fetch starred count separately, approximate with following ratio) if public_repos <= 2 and following > 50: signals_found.append('starred_many_built_nothing') total_weight += LOST_SIGNALS['starred_many_built_nothing']['weight'] # account but no repos if public_repos == 0: signals_found.append('account_no_repos') total_weight += LOST_SIGNALS['account_no_repos']['weight'] # check repos for signals forked_count = 0 forked_modified = 0 learning_repos = 0 readme_only = 0 learning_keywords = ['learning', 'tutorial', 'course', 'practice', 'exercise', 'bootcamp', 'udemy', 'freecodecamp', 'odin', 'codecademy'] for repo in repos: name = (repo.get('name') or '').lower() description = (repo.get('description') or '').lower() language = repo.get('language') is_fork = repo.get('fork', False) # forked but never modified if is_fork: forked_count += 1 # if pushed_at is close to created_at, never modified # (simplified: just count forks for now) # learning/tutorial repos if any(kw in name or kw in description for kw in learning_keywords): learning_repos += 1 # readme only (no language detected usually means no code) if not language and not is_fork: readme_only += 1 if forked_count >= 5 and public_repos - forked_count <= 2: signals_found.append('forked_never_modified') total_weight += LOST_SIGNALS['forked_never_modified']['weight'] if learning_repos >= 3: signals_found.append('abandoned_learning_repos') total_weight += LOST_SIGNALS['abandoned_learning_repos']['weight'] if readme_only >= 2: signals_found.append('readme_only_repos') total_weight += LOST_SIGNALS['readme_only_repos']['weight'] # check bio for lost signals bio = profile.get('bio') or '' bio_signals, bio_weight = analyze_text_for_lost_signals(bio) signals_found.extend(bio_signals) total_weight += bio_weight # aspirational bio check bio_lower = bio.lower() if any(re.search(p, bio_lower) for p in LOST_SIGNALS['aspirational_bio']['patterns']): if 'aspirational_bio' not in signals_found: signals_found.append('aspirational_bio') total_weight += LOST_SIGNALS['aspirational_bio']['weight'] return signals_found, total_weight def analyze_reddit_for_lost_signals(activity, subreddits): """analyze reddit activity for lost builder signals""" signals_found = [] total_weight = 0 # check subreddit activity stuck_sub_activity = 0 for sub in subreddits: if sub.lower() in STUCK_SUBREDDITS: stuck_sub_activity += STUCK_SUBREDDITS[sub.lower()] if stuck_sub_activity >= 20: signals_found.append('stuck_communities') total_weight += min(stuck_sub_activity, 30) # cap at 30 # analyze post/comment text all_text = [] for item in activity: if item.get('title'): all_text.append(item['title']) if item.get('body'): all_text.append(item['body']) combined_text = ' '.join(all_text) text_signals, text_weight = analyze_text_for_lost_signals(combined_text) signals_found.extend(text_signals) total_weight += text_weight # check for helping others but never sharing own work help_count = 0 share_count = 0 for item in activity: body = (item.get('body') or '').lower() title = (item.get('title') or '').lower() # helping patterns if any(p in body for p in ['try this', 'you could', 'have you tried', 'i recommend']): help_count += 1 # sharing patterns if any(p in body + title for p in ['i built', 'i made', 'my project', 'check out my', 'i created']): share_count += 1 if help_count >= 5 and share_count == 0: signals_found.append('enthusiasm_for_others') total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight'] return signals_found, total_weight def analyze_social_for_lost_signals(profile, posts): """analyze mastodon/social for lost builder signals""" signals_found = [] total_weight = 0 # check bio bio = profile.get('bio') or profile.get('note') or '' bio_signals, bio_weight = analyze_text_for_lost_signals(bio) signals_found.extend(bio_signals) total_weight += bio_weight # check posts boost_count = 0 original_count = 0 own_work_count = 0 for post in posts: content = (post.get('content') or '').lower() is_boost = post.get('reblog') is not None or post.get('repost') if is_boost: boost_count += 1 else: original_count += 1 # check if sharing own work if any(p in content for p in ['i built', 'i made', 'my project', 'working on', 'just shipped']): own_work_count += 1 # analyze text text_signals, text_weight = analyze_text_for_lost_signals(content) for sig in text_signals: if sig not in signals_found: signals_found.append(sig) total_weight += LOST_SIGNALS[sig]['weight'] # boosts builders but never posts own work if boost_count >= 10 and own_work_count == 0: signals_found.append('enthusiasm_for_others') total_weight += LOST_SIGNALS['enthusiasm_for_others']['weight'] return signals_found, total_weight def calculate_lost_potential_score(signals_found): """calculate overall lost potential score from signals""" total = 0 for signal in signals_found: if signal in LOST_SIGNALS: total += LOST_SIGNALS[signal]['weight'] return total def classify_user(lost_score, builder_score, values_score): """ classify user as builder, lost, or neither returns: 'builder' | 'lost' | 'both' | 'none' """ # high builder score = active builder if builder_score >= 50 and lost_score < 30: return 'builder' # high lost score + values alignment = lost builder (priority outreach) if lost_score >= 40 and values_score >= 20: return 'lost' # both signals = complex case, might be recovering if lost_score >= 30 and builder_score >= 30: return 'both' return 'none' def get_signal_descriptions(signals_found): """get human-readable descriptions of detected signals""" descriptions = [] for signal in signals_found: if signal in LOST_SIGNALS: descriptions.append(LOST_SIGNALS[signal]['description']) return descriptions def should_outreach_lost(user_data, config=None): """ determine if we should reach out to a lost builder considers: - lost_potential_score threshold - values alignment - cooldown period - manual review requirement """ config = config or {} lost_score = user_data.get('lost_potential_score', 0) values_score = user_data.get('score', 0) # regular alignment score # minimum thresholds min_lost = config.get('min_lost_score', 40) min_values = config.get('min_values_score', 20) if lost_score < min_lost: return False, 'lost_score too low' if values_score < min_values: return False, 'values_score too low' # check cooldown last_outreach = user_data.get('last_lost_outreach') if last_outreach: cooldown_days = config.get('cooldown_days', 90) last_dt = datetime.fromisoformat(last_outreach) if datetime.now() - last_dt < timedelta(days=cooldown_days): return False, f'cooldown active (90 days)' # always require manual review for lost outreach return True, 'requires_review'