fix: integrate handle discovery into github scraper

- call discover_all_handles() to follow blog links
- scrape websites for mastodon, bluesky, matrix handles
- store discovered handles in contact field
- fixes contact method detection for outreach

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Your Name 2025-12-15 10:02:30 -06:00
parent d2248282fe
commit 120e4a07e2

View file

@ -19,6 +19,7 @@ from .lost import (
classify_user, classify_user,
get_signal_descriptions, get_signal_descriptions,
) )
from .handles import discover_all_handles
# rate limit: 60/hr unauthenticated, 5000/hr with token # rate limit: 60/hr unauthenticated, 5000/hr with token
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '') GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
@ -203,6 +204,16 @@ def analyze_github_user(login):
if lost_descriptions: if lost_descriptions:
reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}") reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
# === DEEP HANDLE DISCOVERY ===
# follow blog links, scrape websites, find ALL social handles
handles, discovered_emails = discover_all_handles(user)
# merge discovered emails with github email
all_emails = discovered_emails or []
if user.get('email'):
all_emails.append(user['email'])
all_emails = list(set(e for e in all_emails if e and 'noreply' not in e.lower()))
return { return {
'platform': 'github', 'platform': 'github',
'username': login, 'username': login,
@ -220,9 +231,22 @@ def analyze_github_user(login):
'total_stars': total_stars, 'total_stars': total_stars,
'reasons': reasons, 'reasons': reasons,
'contact': { 'contact': {
'email': user.get('email'), 'email': all_emails[0] if all_emails else None,
'emails': all_emails,
'blog': user.get('blog'), 'blog': user.get('blog'),
'twitter': user.get('twitter_username'), 'twitter': user.get('twitter_username') or handles.get('twitter'),
'mastodon': handles.get('mastodon'),
'bluesky': handles.get('bluesky'),
'matrix': handles.get('matrix'),
'lemmy': handles.get('lemmy'),
},
'extra': {
'topics': list(aligned_topics),
'languages': dict(languages),
'repo_count': len(repos),
'total_stars': total_stars,
'hireable': user.get('hireable', False),
'handles': handles, # all discovered handles
}, },
'hireable': user.get('hireable', False), 'hireable': user.get('hireable', False),
'scraped_at': datetime.now().isoformat(), 'scraped_at': datetime.now().isoformat(),