connectd/groq_draft.py

"""
introd/groq_draft.py - groq llama 4 maverick for smart intro drafting

uses groq api to generate personalized, natural intro messages
that don't sound like ai-generated slop
"""

import os
import json
import requests
from datetime import datetime

GROQ_API_KEY = os.environ.get('GROQ_API_KEY', '')
GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
MODEL = os.environ.get('GROQ_MODEL', 'llama-3.1-70b-versatile')


def determine_contact_method(human):
    """
    determine best contact method based on WHERE THEY'RE MOST ACTIVE

    don't use fixed hierarchy - analyze activity per platform:
    - count posts/commits/activity
    - weight by recency (last 30 days matters more)
    - contact them where they already are
    - fall back to email only if no social activity
    """
    from datetime import datetime, timedelta

    extra = human.get('extra', {})
    if isinstance(extra, str):
        extra = json.loads(extra) if extra else {}

    # handle nested extra.extra from old save format
    if 'extra' in extra and isinstance(extra['extra'], dict):
        extra = {**extra, **extra['extra']}

    contact = human.get('contact', {})
    if isinstance(contact, str):
        contact = json.loads(contact) if contact else {}

    # collect activity scores per platform
    activity_scores = {}
    now = datetime.now()
    thirty_days_ago = now - timedelta(days=30)
    ninety_days_ago = now - timedelta(days=90)

    # github activity
    github_username = human.get('username') if human.get('platform') == 'github' else extra.get('github')
    if github_username:
        github_score = 0
        top_repos = extra.get('top_repos', [])

        for repo in top_repos:
            # recent commits weight more
            pushed_at = repo.get('pushed_at', '')
            if pushed_at:
                try:
                    push_date = datetime.fromisoformat(pushed_at.replace('Z', '+00:00')).replace(tzinfo=None)
                    if push_date > thirty_days_ago:
                        github_score += 10  # very recent
                    elif push_date > ninety_days_ago:
                        github_score += 5   # somewhat recent
                    else:
                        github_score += 1   # old but exists
                except:
                    github_score += 1

            # stars indicate engagement
            github_score += min(repo.get('stars', 0) // 10, 5)

        # commit activity from deep scrape
        commit_count = extra.get('commit_count', 0)
        github_score += min(commit_count // 10, 20)

        if github_score > 0:
            activity_scores['github_issue'] = {
                'score': github_score,
                'info': f"{github_username}/{top_repos[0]['name']}" if top_repos else github_username
            }

    # mastodon activity
    mastodon_handle = human.get('username') if human.get('platform') == 'mastodon' else (extra.get('mastodon') or contact.get('mastodon'))
    if mastodon_handle:
        mastodon_score = 0
        statuses_count = extra.get('mastodon_statuses', 0) or human.get('statuses_count', 0)

        # high post count = active user
        if statuses_count > 1000:
            mastodon_score += 30
        elif statuses_count > 500:
            mastodon_score += 20
        elif statuses_count > 100:
            mastodon_score += 10
        elif statuses_count > 0:
            mastodon_score += 5

        # platform bonus for fediverse (values-aligned)
        mastodon_score += 10

        # bonus if handle was discovered via rel="me" or similar verification
        # (having a handle linked from their website = they want to be contacted there)
        handles = extra.get('handles', {})
        if handles.get('mastodon') == mastodon_handle:
            mastodon_score += 15  # verified handle bonus

        if mastodon_score > 0:
            activity_scores['mastodon'] = {'score': mastodon_score, 'info': mastodon_handle}

    # bluesky activity
    bluesky_handle = human.get('username') if human.get('platform') == 'bluesky' else (extra.get('bluesky') or contact.get('bluesky'))
    if bluesky_handle:
        bluesky_score = 0
        posts_count = extra.get('bluesky_posts', 0) or human.get('posts_count', 0)

        if posts_count > 500:
            bluesky_score += 25
        elif posts_count > 100:
            bluesky_score += 15
        elif posts_count > 0:
            bluesky_score += 5

        # newer platform, slightly lower weight
        bluesky_score += 5

        if bluesky_score > 0:
            activity_scores['bluesky'] = {'score': bluesky_score, 'info': bluesky_handle}

    # twitter activity
    twitter_handle = extra.get('twitter') or contact.get('twitter')
    if twitter_handle:
        twitter_score = 0
        tweets_count = extra.get('twitter_tweets', 0)

        if tweets_count > 1000:
            twitter_score += 20
        elif tweets_count > 100:
            twitter_score += 10
        elif tweets_count > 0:
            twitter_score += 5

        # if we found them via twitter hashtags, they're active there
        if human.get('platform') == 'twitter':
            twitter_score += 15

        if twitter_score > 0:
            activity_scores['twitter'] = {'score': twitter_score, 'info': twitter_handle}

    # NOTE: reddit is DISCOVERY ONLY, not a contact method
    # we find users on reddit but reach out via their external links (github, mastodon, etc.)
    # reddit-only users go to manual_queue for review

    # lobsters activity
    lobsters_username = extra.get('lobsters') or contact.get('lobsters')
    if lobsters_username or human.get('platform') == 'lobsters':
        lobsters_score = 0
        lobsters_username = lobsters_username or human.get('username')

        karma = extra.get('lobsters_karma', 0) or human.get('karma', 0)

        # lobsters is invite-only, high signal
        lobsters_score += 15

        if karma > 100:
            lobsters_score += 15
        elif karma > 50:
            lobsters_score += 10
        elif karma > 0:
            lobsters_score += 5

        if lobsters_score > 0:
            activity_scores['lobsters'] = {'score': lobsters_score, 'info': lobsters_username}

    # matrix activity
    matrix_id = extra.get('matrix') or contact.get('matrix')
    if matrix_id:
        matrix_score = 0

        # matrix users are typically privacy-conscious and technical
        matrix_score += 15  # platform bonus for decentralized chat

        # bonus if handle was discovered via rel="me" verification
        handles = extra.get('handles', {})
        if handles.get('matrix') == matrix_id:
            matrix_score += 10  # verified handle bonus

        if matrix_score > 0:
            activity_scores['matrix'] = {'score': matrix_score, 'info': matrix_id}

    # lemmy activity (fediverse)
    lemmy_username = human.get('username') if human.get('platform') == 'lemmy' else extra.get('lemmy')
    if lemmy_username:
        lemmy_score = 0

        # lemmy is fediverse - high values alignment
        lemmy_score += 20  # fediverse platform bonus

        post_count = extra.get('post_count', 0)
        comment_count = extra.get('comment_count', 0)

        if post_count > 100:
            lemmy_score += 15
        elif post_count > 50:
            lemmy_score += 10
        elif post_count > 10:
            lemmy_score += 5

        if comment_count > 500:
            lemmy_score += 10
        elif comment_count > 100:
            lemmy_score += 5

        if lemmy_score > 0:
            activity_scores['lemmy'] = {'score': lemmy_score, 'info': lemmy_username}

    # pick highest activity platform
    if activity_scores:
        best_platform = max(activity_scores.items(), key=lambda x: x[1]['score'])
        return best_platform[0], best_platform[1]['info']

    # fall back to email ONLY if no social activity detected
    email = extra.get('email') or contact.get('email')
    # also check emails list
    if not email:
        emails = extra.get('emails') or contact.get('emails') or []
        for e in emails:
            if e and '@' in e and 'noreply' not in e.lower():
                email = e
                break

    if email and '@' in email and 'noreply' not in email.lower():
        return 'email', email

    # last resort: manual
    return 'manual', None


def draft_intro_with_llm(match_data, recipient='a', dry_run=False):
    """
    use groq llama 4 maverick to draft a personalized intro

    match_data should contain:
    - human_a: the first person
    - human_b: the second person
    - overlap_score: numeric score
    - overlap_reasons: list of why they match

    recipient: 'a' or 'b' - who we're writing to
    """
    if not GROQ_API_KEY:
        return None, "GROQ_API_KEY not set"

    # determine recipient and other person
    if recipient == 'a':
        to_person = match_data.get('human_a', {})
        other_person = match_data.get('human_b', {})
    else:
        to_person = match_data.get('human_b', {})
        other_person = match_data.get('human_a', {})

    # build context
    to_name = to_person.get('name') or to_person.get('username', 'friend')
    other_name = other_person.get('name') or other_person.get('username', 'someone')

    to_signals = to_person.get('signals', [])
    if isinstance(to_signals, str):
        to_signals = json.loads(to_signals) if to_signals else []

    other_signals = other_person.get('signals', [])
    if isinstance(other_signals, str):
        other_signals = json.loads(other_signals) if other_signals else []

    overlap_reasons = match_data.get('overlap_reasons', [])
    if isinstance(overlap_reasons, str):
        overlap_reasons = json.loads(overlap_reasons) if overlap_reasons else []

    # parse extra data
    to_extra = to_person.get('extra', {})
    other_extra = other_person.get('extra', {})
    if isinstance(to_extra, str):
        to_extra = json.loads(to_extra) if to_extra else {}
    if isinstance(other_extra, str):
        other_extra = json.loads(other_extra) if other_extra else {}

    # build profile summaries
    to_profile = f"""
name: {to_name}
platform: {to_person.get('platform', 'unknown')}
bio: {to_person.get('bio') or 'no bio'}
location: {to_person.get('location') or 'unknown'}
signals: {', '.join(to_signals[:8])}
repos: {len(to_extra.get('top_repos', []))} public repos
languages: {', '.join(to_extra.get('languages', {}).keys())}
"""

    other_profile = f"""
name: {other_name}
platform: {other_person.get('platform', 'unknown')}
bio: {other_person.get('bio') or 'no bio'}
location: {other_person.get('location') or 'unknown'}
signals: {', '.join(other_signals[:8])}
repos: {len(other_extra.get('top_repos', []))} public repos
languages: {', '.join(other_extra.get('languages', {}).keys())}
url: {other_person.get('url', '')}
"""

    # build prompt
    system_prompt = """you are connectd, an ai that connects isolated builders who share values but don't know each other yet.

your job is to write a short, genuine intro message to one person about another person they might want to know.

rules:
- be brief (3-5 sentences max)
- be genuine, not salesy or fake
- focus on WHY they might want to connect, not just WHAT they have in common
- don't be cringe or use buzzwords
- lowercase preferred (casual tone)
- no emojis unless the person's profile suggests they'd like them
- mention specific things from their profiles, not generic "you both like open source"
- end with a simple invitation, not a hard sell
- sign off as "- connectd" (lowercase)

bad examples:
- "I noticed you're both passionate about..." (too formal)
- "You two would be PERFECT for each other!" (too salesy)
- "As a fellow privacy enthusiast..." (cringe)

good examples:
- "hey, saw you're building X. there's someone else working on similar stuff in Y who might be interesting to know."
- "you might want to check out Z's work on federated systems - similar approach to what you're doing with A."
"""

    user_prompt = f"""write an intro message to {to_name} about {other_name}.

RECIPIENT ({to_name}):
{to_profile}

INTRODUCING ({other_name}):
{other_profile}

WHY THEY MATCH (overlap score {match_data.get('overlap_score', 0)}):
{', '.join(overlap_reasons[:5])}

write a short intro message. remember: lowercase, genuine, not salesy."""

    try:
        response = requests.post(
            GROQ_API_URL,
            headers={
                'Authorization': f'Bearer {GROQ_API_KEY}',
                'Content-Type': 'application/json',
            },
            json={
                'model': MODEL,
                'messages': [
                    {'role': 'system', 'content': system_prompt},
                    {'role': 'user', 'content': user_prompt},
                ],
                'temperature': 0.7,
                'max_tokens': 300,
            },
            timeout=30,
        )

        if response.status_code != 200:
            return None, f"groq api error: {response.status_code} - {response.text}"

        data = response.json()
        draft = data['choices'][0]['message']['content'].strip()

        # determine contact method for recipient
        contact_method, contact_info = determine_contact_method(to_person)

        return {
            'draft': draft,
            'model': MODEL,
            'to': to_name,
            'about': other_name,
            'overlap_score': match_data.get('overlap_score', 0),
            'contact_method': contact_method,
            'contact_info': contact_info,
            'generated_at': datetime.now().isoformat(),
        }, None

    except Exception as e:
        return None, f"groq error: {str(e)}"


def draft_intro_batch(matches, dry_run=False):
    """
    draft intros for multiple matches
    returns list of (match, intro_result, error) tuples
    """
    results = []

    for match in matches:
        # draft for both directions
        intro_a, err_a = draft_intro_with_llm(match, recipient='a', dry_run=dry_run)
        intro_b, err_b = draft_intro_with_llm(match, recipient='b', dry_run=dry_run)

        results.append({
            'match': match,
            'intro_to_a': intro_a,
            'intro_to_b': intro_b,
            'errors': [err_a, err_b],
        })

    return results


def test_groq_connection():
    """test that groq api is working"""
    if not GROQ_API_KEY:
        return False, "GROQ_API_KEY not set"

    try:
        response = requests.post(
            GROQ_API_URL,
            headers={
                'Authorization': f'Bearer {GROQ_API_KEY}',
                'Content-Type': 'application/json',
            },
            json={
                'model': MODEL,
                'messages': [{'role': 'user', 'content': 'say "ok" and nothing else'}],
                'max_tokens': 10,
            },
            timeout=10,
        )

        if response.status_code == 200:
            return True, "groq api working"
        else:
            return False, f"groq api error: {response.status_code}"

    except Exception as e:
        return False, f"groq connection error: {str(e)}"