mirror of
https://github.com/sudoxnym/connectd.git
synced 2026-04-14 19:46:30 +00:00
fix: integrate handle discovery into github scraper
- call discover_all_handles() to follow blog links - scrape websites for mastodon, bluesky, matrix handles - store discovered handles in contact field - fixes contact method detection for outreach 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
d2248282fe
commit
120e4a07e2
1 changed files with 26 additions and 2 deletions
|
|
@ -19,6 +19,7 @@ from .lost import (
|
||||||
classify_user,
|
classify_user,
|
||||||
get_signal_descriptions,
|
get_signal_descriptions,
|
||||||
)
|
)
|
||||||
|
from .handles import discover_all_handles
|
||||||
|
|
||||||
# rate limit: 60/hr unauthenticated, 5000/hr with token
|
# rate limit: 60/hr unauthenticated, 5000/hr with token
|
||||||
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
|
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', '')
|
||||||
|
|
@ -203,6 +204,16 @@ def analyze_github_user(login):
|
||||||
if lost_descriptions:
|
if lost_descriptions:
|
||||||
reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
|
reasons.append(f"LOST SIGNALS: {', '.join(lost_descriptions[:3])}")
|
||||||
|
|
||||||
|
# === DEEP HANDLE DISCOVERY ===
|
||||||
|
# follow blog links, scrape websites, find ALL social handles
|
||||||
|
handles, discovered_emails = discover_all_handles(user)
|
||||||
|
|
||||||
|
# merge discovered emails with github email
|
||||||
|
all_emails = discovered_emails or []
|
||||||
|
if user.get('email'):
|
||||||
|
all_emails.append(user['email'])
|
||||||
|
all_emails = list(set(e for e in all_emails if e and 'noreply' not in e.lower()))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'platform': 'github',
|
'platform': 'github',
|
||||||
'username': login,
|
'username': login,
|
||||||
|
|
@ -220,9 +231,22 @@ def analyze_github_user(login):
|
||||||
'total_stars': total_stars,
|
'total_stars': total_stars,
|
||||||
'reasons': reasons,
|
'reasons': reasons,
|
||||||
'contact': {
|
'contact': {
|
||||||
'email': user.get('email'),
|
'email': all_emails[0] if all_emails else None,
|
||||||
|
'emails': all_emails,
|
||||||
'blog': user.get('blog'),
|
'blog': user.get('blog'),
|
||||||
'twitter': user.get('twitter_username'),
|
'twitter': user.get('twitter_username') or handles.get('twitter'),
|
||||||
|
'mastodon': handles.get('mastodon'),
|
||||||
|
'bluesky': handles.get('bluesky'),
|
||||||
|
'matrix': handles.get('matrix'),
|
||||||
|
'lemmy': handles.get('lemmy'),
|
||||||
|
},
|
||||||
|
'extra': {
|
||||||
|
'topics': list(aligned_topics),
|
||||||
|
'languages': dict(languages),
|
||||||
|
'repo_count': len(repos),
|
||||||
|
'total_stars': total_stars,
|
||||||
|
'hireable': user.get('hireable', False),
|
||||||
|
'handles': handles, # all discovered handles
|
||||||
},
|
},
|
||||||
'hireable': user.get('hireable', False),
|
'hireable': user.get('hireable', False),
|
||||||
'scraped_at': datetime.now().isoformat(),
|
'scraped_at': datetime.now().isoformat(),
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue