From 120e4a07e2f79a27a7c4571fdd08c110d0087595 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 15 Dec 2025 10:02:30 -0600 Subject: [PATCH] fix: integrate handle discovery into github scraper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - call discover_all_handles() to follow blog links - scrape websites for mastodon, bluesky, matrix handles - store discovered handles in contact field - fixes contact method detection for outreach 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- scoutd/github.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/scoutd/github.py b/scoutd/github.py index 91273cc..b6c3084 100644 --- a/scoutd/github.py +++ b/scoutd/github.py @@ -19,6 +19,7 @@ from .lost import ( classify_user, get_signal_descriptions, ) +from .handles import discover_all_handles # rate limit: 60/hr unauthenticated, 5000/hr with token GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '') @@ -203,6 +204,16 @@ def analyze_github_user(login): if lost_descriptions: reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}") + # === DEEP HANDLE DISCOVERY === + # follow blog links, scrape websites, find ALL social handles + handles, discovered_emails = discover_all_handles(user) + + # merge discovered emails with github email + all_emails = discovered_emails or [] + if user.get('email'): + all_emails.append(user['email']) + all_emails = list(set(e for e in all_emails if e and 'noreply' not in e.lower())) + return { 'platform': 'github', 'username': login, @@ -220,9 +231,22 @@ def analyze_github_user(login): 'total_stars': total_stars, 'reasons': reasons, 'contact': { - 'email': user.get('email'), + 'email': all_emails[0] if all_emails else None, + 'emails': all_emails, 'blog': user.get('blog'), - 'twitter': user.get('twitter_username'), + 'twitter': user.get('twitter_username') or handles.get('twitter'), + 'mastodon': handles.get('mastodon'), + 'bluesky': handles.get('bluesky'), + 'matrix': handles.get('matrix'), + 'lemmy': handles.get('lemmy'), + }, + 'extra': { + 'topics': list(aligned_topics), + 'languages': dict(languages), + 'repo_count': len(repos), + 'total_stars': total_stars, + 'hireable': user.get('hireable', False), + 'handles': handles, # all discovered handles }, 'hireable': user.get('hireable', False), 'scraped_at': datetime.now().isoformat(),