connectd/matchd/rank.py

"""
matchd/rank.py - score and rank match quality
"""

from itertools import combinations
from .fingerprint import generate_fingerprint
from .overlap import find_overlap, is_same_person
from scoutd.deep import check_already_connected


def rank_matches(matches):
    """
    rank a list of matches by quality
    returns sorted list with quality scores
    """
    ranked = []

    for match in matches:
        # base score from overlap
        score = match.get('overlap_score', 0)

        # bonus for geographic match
        if match.get('geographic_match'):
            score *= 1.2

        # bonus for high fingerprint similarity
        fp_sim = match.get('fingerprint_similarity')
        if fp_sim and fp_sim > 0.7:
            score *= 1.3

        # bonus for complementary skills
        comp_skills = match.get('complementary_skills', [])
        if len(comp_skills) >= 3:
            score *= 1.1

        match['quality_score'] = score
        ranked.append(match)

    # sort by quality score
    ranked.sort(key=lambda x: x['quality_score'], reverse=True)

    return ranked


def find_all_matches(db, min_score=30, min_overlap=20):
    """
    find all potential matches from database
    returns list of match dicts
    """
    print("matchd: finding all potential matches...")

    # get all humans above threshold
    humans = db.get_all_humans(min_score=min_score)
    print(f"  {len(humans)} humans to match")

    # generate fingerprints
    fingerprints = {}
    for human in humans:
        fp = generate_fingerprint(human)
        fingerprints[human['id']] = fp
        db.save_fingerprint(human['id'], fp)

    print(f"  generated {len(fingerprints)} fingerprints")

    # find all pairs
    matches = []
    checked = 0
    skipped_same = 0
    skipped_connected = 0

    for human_a, human_b in combinations(humans, 2):
        checked += 1

        # skip if likely same person
        if is_same_person(human_a, human_b):
            skipped_same += 1
            continue

        # skip if already connected (same org, company, co-contributors)
        connected, reason = check_already_connected(human_a, human_b)
        if connected:
            skipped_connected += 1
            continue

        # calculate overlap
        fp_a = fingerprints.get(human_a['id'])
        fp_b = fingerprints.get(human_b['id'])

        overlap = find_overlap(human_a, human_b, fp_a, fp_b)

        if overlap['overlap_score'] >= min_overlap:
            match = {
                'human_a': human_a,
                'human_b': human_b,
                **overlap
            }
            matches.append(match)

            # save to db
            db.save_match(human_a['id'], human_b['id'], overlap)

        if checked % 1000 == 0:
            print(f"  checked {checked} pairs, {len(matches)} matches so far...")

    print(f"  checked {checked} pairs")
    print(f"  skipped {skipped_same} (same person), {skipped_connected} (already connected)")
    print(f"  found {len(matches)} potential matches")

    # rank them
    ranked = rank_matches(matches)

    return ranked


def get_top_matches(db, limit=50):
    """
    get top matches from database
    """
    match_rows = db.get_matches(limit=limit)

    matches = []
    for row in match_rows:
        human_a = db.get_human_by_id(row['human_a_id'])
        human_b = db.get_human_by_id(row['human_b_id'])

        if human_a and human_b:
            matches.append({
                'id': row['id'],
                'human_a': human_a,
                'human_b': human_b,
                'overlap_score': row['overlap_score'],
                'overlap_reasons': row['overlap_reasons'],
                'geographic_match': row['geographic_match'],
                'status': row['status'],
            })

    return matches
autonomous daemon with platform-native contact detection - determine_contact_method now recognizes mastodon/bluesky users by platform - username IS the handle for platform-native users - fixed orphaned matches table issue - wave 1 intros sent successfully 2025-12-16 09:22:58 +00:00			`"""`
			`matchd/rank.py - score and rank match quality`
			`"""`

			`from itertools import combinations`
			`from .fingerprint import generate_fingerprint`
			`from .overlap import find_overlap, is_same_person`
			`from scoutd.deep import check_already_connected`


			`def rank_matches(matches):`
			`"""`
			`rank a list of matches by quality`
			`returns sorted list with quality scores`
			`"""`
			`ranked = []`

			`for match in matches:`
			`# base score from overlap`
			`score = match.get('overlap_score', 0)`

			`# bonus for geographic match`
			`if match.get('geographic_match'):`
			`score *= 1.2`

			`# bonus for high fingerprint similarity`
			`fp_sim = match.get('fingerprint_similarity')`
			`if fp_sim and fp_sim > 0.7:`
			`score *= 1.3`

			`# bonus for complementary skills`
			`comp_skills = match.get('complementary_skills', [])`
			`if len(comp_skills) >= 3:`
			`score *= 1.1`

			`match['quality_score'] = score`
			`ranked.append(match)`

			`# sort by quality score`
			`ranked.sort(key=lambda x: x['quality_score'], reverse=True)`

			`return ranked`


			`def find_all_matches(db, min_score=30, min_overlap=20):`
			`"""`
			`find all potential matches from database`
			`returns list of match dicts`
			`"""`
			`print("matchd: finding all potential matches...")`

			`# get all humans above threshold`
			`humans = db.get_all_humans(min_score=min_score)`
			`print(f" {len(humans)} humans to match")`

			`# generate fingerprints`
			`fingerprints = {}`
			`for human in humans:`
			`fp = generate_fingerprint(human)`
			`fingerprints[human['id']] = fp`
			`db.save_fingerprint(human['id'], fp)`

			`print(f" generated {len(fingerprints)} fingerprints")`

			`# find all pairs`
			`matches = []`
			`checked = 0`
			`skipped_same = 0`
			`skipped_connected = 0`

			`for human_a, human_b in combinations(humans, 2):`
			`checked += 1`

			`# skip if likely same person`
			`if is_same_person(human_a, human_b):`
			`skipped_same += 1`
			`continue`

			`# skip if already connected (same org, company, co-contributors)`
			`connected, reason = check_already_connected(human_a, human_b)`
			`if connected:`
			`skipped_connected += 1`
			`continue`

			`# calculate overlap`
			`fp_a = fingerprints.get(human_a['id'])`
			`fp_b = fingerprints.get(human_b['id'])`

			`overlap = find_overlap(human_a, human_b, fp_a, fp_b)`

			`if overlap['overlap_score'] >= min_overlap:`
			`match = {`
			`'human_a': human_a,`
			`'human_b': human_b,`
			`**overlap`
			`}`
			`matches.append(match)`

			`# save to db`
			`db.save_match(human_a['id'], human_b['id'], overlap)`

			`if checked % 1000 == 0:`
			`print(f" checked {checked} pairs, {len(matches)} matches so far...")`

			`print(f" checked {checked} pairs")`
			`print(f" skipped {skipped_same} (same person), {skipped_connected} (already connected)")`
			`print(f" found {len(matches)} potential matches")`

			`# rank them`
			`ranked = rank_matches(matches)`

			`return ranked`


			`def get_top_matches(db, limit=50):`
			`"""`
			`get top matches from database`
			`"""`
			`match_rows = db.get_matches(limit=limit)`

			`matches = []`
			`for row in match_rows:`
			`human_a = db.get_human_by_id(row['human_a_id'])`
			`human_b = db.get_human_by_id(row['human_b_id'])`

			`if human_a and human_b:`
			`matches.append({`
			`'id': row['id'],`
			`'human_a': human_a,`
			`'human_b': human_b,`
			`'overlap_score': row['overlap_score'],`
			`'overlap_reasons': row['overlap_reasons'],`
			`'geographic_match': row['geographic_match'],`
			`'status': row['status'],`
			`})`

			`return matches`