mirror of
https://github.com/sudoxnym/connectd.git
synced 2026-04-14 11:37:42 +00:00
- add HOST_USER env var for auto-discovery from github - merge HOST_* env vars with scraped profile data - fix countdown timers to use started_at when no cycles run - add lemmy, discord, bluesky fields to priority_users - expand API user endpoint with all platform handles - update HA sensor with full user profile attributes - add HAOS add-on structure for one-click install - update version to 1.1.0 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
210 lines
6 KiB
Python
210 lines
6 KiB
Python
"""
|
|
matchd/fingerprint.py - generate values profiles for humans
|
|
"""
|
|
|
|
import json
|
|
from collections import defaultdict
|
|
|
|
# values dimensions we track
|
|
VALUES_DIMENSIONS = [
|
|
'privacy', # surveillance concern, degoogle, self-hosted
|
|
'decentralization', # p2p, fediverse, local-first
|
|
'cooperation', # coops, mutual aid, community
|
|
'queer_friendly', # lgbtq+, pronouns
|
|
'environmental', # solarpunk, degrowth, sustainability
|
|
'anticapitalist', # post-capitalism, worker ownership
|
|
'builder', # creates vs consumes
|
|
'pnw_oriented', # pacific northwest connection
|
|
]
|
|
|
|
# skill categories
|
|
SKILL_CATEGORIES = [
|
|
'backend', # python, go, rust, databases
|
|
'frontend', # js, react, css
|
|
'devops', # docker, k8s, linux admin
|
|
'hardware', # electronics, embedded, iot
|
|
'design', # ui/ux, graphics
|
|
'community', # organizing, facilitation
|
|
'writing', # documentation, content
|
|
]
|
|
|
|
# signal to dimension mapping
|
|
SIGNAL_TO_DIMENSION = {
|
|
'privacy': 'privacy',
|
|
'selfhosted': 'privacy',
|
|
'degoogle': 'privacy',
|
|
'decentralized': 'decentralization',
|
|
'local_first': 'decentralization',
|
|
'p2p': 'decentralization',
|
|
'federated_chat': 'decentralization',
|
|
'foss': 'decentralization',
|
|
'cooperative': 'cooperation',
|
|
'community': 'cooperation',
|
|
'mutual_aid': 'cooperation',
|
|
'intentional_community': 'cooperation',
|
|
'queer': 'queer_friendly',
|
|
'pronouns': 'queer_friendly',
|
|
'blm': 'queer_friendly',
|
|
'acab': 'queer_friendly',
|
|
'solarpunk': 'environmental',
|
|
'anticapitalist': 'anticapitalist',
|
|
'pnw': 'pnw_oriented',
|
|
'pnw_state': 'pnw_oriented',
|
|
'remote': 'pnw_oriented',
|
|
'home_automation': 'builder',
|
|
'modern_lang': 'builder',
|
|
'unix': 'builder',
|
|
'containers': 'builder',
|
|
}
|
|
|
|
# language to skill mapping
|
|
LANGUAGE_TO_SKILL = {
|
|
'python': 'backend',
|
|
'go': 'backend',
|
|
'rust': 'backend',
|
|
'java': 'backend',
|
|
'ruby': 'backend',
|
|
'php': 'backend',
|
|
'javascript': 'frontend',
|
|
'typescript': 'frontend',
|
|
'html': 'frontend',
|
|
'css': 'frontend',
|
|
'vue': 'frontend',
|
|
'shell': 'devops',
|
|
'dockerfile': 'devops',
|
|
'nix': 'devops',
|
|
'hcl': 'devops',
|
|
'c': 'hardware',
|
|
'c++': 'hardware',
|
|
'arduino': 'hardware',
|
|
'verilog': 'hardware',
|
|
}
|
|
|
|
|
|
def generate_fingerprint(human_data):
|
|
"""
|
|
generate a values fingerprint for a human
|
|
|
|
input: human dict from database (has signals, languages, etc)
|
|
output: fingerprint dict with values_vector, skills, interests
|
|
"""
|
|
# parse stored json fields
|
|
signals = human_data.get('signals', [])
|
|
if isinstance(signals, str):
|
|
signals = json.loads(signals)
|
|
|
|
extra = human_data.get('extra', {})
|
|
if isinstance(extra, str):
|
|
extra = json.loads(extra)
|
|
|
|
languages = extra.get('languages', {})
|
|
topics = extra.get('topics', [])
|
|
|
|
# build values vector
|
|
values_vector = defaultdict(float)
|
|
|
|
# from signals
|
|
for signal in signals:
|
|
dimension = SIGNAL_TO_DIMENSION.get(signal)
|
|
if dimension:
|
|
values_vector[dimension] += 1.0
|
|
|
|
# normalize values vector (0-1 scale)
|
|
max_val = max(values_vector.values()) if values_vector else 1
|
|
values_vector = {k: min(v / max_val, 1.0) for k, v in values_vector.items()}
|
|
|
|
# fill in missing dimensions with 0
|
|
for dim in VALUES_DIMENSIONS:
|
|
if dim not in values_vector:
|
|
values_vector[dim] = 0.0
|
|
|
|
# determine skills from languages
|
|
skills = defaultdict(float)
|
|
total_repos = sum(languages.values()) if languages else 1
|
|
|
|
for lang, count in languages.items():
|
|
skill = LANGUAGE_TO_SKILL.get(lang.lower())
|
|
if skill:
|
|
skills[skill] += count / total_repos
|
|
|
|
# normalize skills
|
|
if skills:
|
|
max_skill = max(skills.values())
|
|
skills = {k: min(v / max_skill, 1.0) for k, v in skills.items()}
|
|
|
|
# interests from topics and signals
|
|
interests = list(set(topics + signals))
|
|
|
|
# location preference
|
|
location_pref = None
|
|
if 'pnw' in signals or 'pnw_state' in signals:
|
|
location_pref = 'pnw'
|
|
elif 'remote' in signals:
|
|
location_pref = 'remote'
|
|
elif human_data.get('location'):
|
|
loc = human_data['location'].lower()
|
|
if any(x in loc for x in ['seattle', 'portland', 'washington', 'oregon', 'pnw', 'cascadia']):
|
|
location_pref = 'pnw'
|
|
|
|
# availability (based on hireable flag if present)
|
|
availability = None
|
|
if extra.get('hireable'):
|
|
availability = 'open'
|
|
|
|
return {
|
|
'human_id': human_data.get('id'),
|
|
'values_vector': dict(values_vector),
|
|
'skills': dict(skills),
|
|
'interests': interests,
|
|
'location_pref': location_pref,
|
|
'availability': availability,
|
|
}
|
|
|
|
|
|
def fingerprint_similarity(fp_a, fp_b):
|
|
"""
|
|
calculate similarity between two fingerprints
|
|
returns 0-1 score
|
|
"""
|
|
# values similarity (cosine-ish)
|
|
va = fp_a.get('values_vector', {})
|
|
vb = fp_b.get('values_vector', {})
|
|
|
|
all_dims = set(va.keys()) | set(vb.keys())
|
|
if not all_dims:
|
|
return 0.0
|
|
|
|
dot_product = sum(va.get(d, 0) * vb.get(d, 0) for d in all_dims)
|
|
mag_a = sum(v**2 for v in va.values()) ** 0.5
|
|
mag_b = sum(v**2 for v in vb.values()) ** 0.5
|
|
|
|
if mag_a == 0 or mag_b == 0:
|
|
values_sim = 0.0
|
|
else:
|
|
values_sim = dot_product / (mag_a * mag_b)
|
|
|
|
# interest overlap (jaccard)
|
|
ia = set(fp_a.get('interests', []))
|
|
ib = set(fp_b.get('interests', []))
|
|
|
|
if ia or ib:
|
|
interest_sim = len(ia & ib) / len(ia | ib)
|
|
else:
|
|
interest_sim = 0.0
|
|
|
|
# location compatibility
|
|
loc_a = fp_a.get('location_pref')
|
|
loc_b = fp_b.get('location_pref')
|
|
|
|
loc_sim = 0.0
|
|
if loc_a == loc_b and loc_a is not None:
|
|
loc_sim = 1.0
|
|
elif loc_a == 'remote' or loc_b == 'remote':
|
|
loc_sim = 0.5
|
|
elif loc_a == 'pnw' or loc_b == 'pnw':
|
|
loc_sim = 0.3
|
|
|
|
# weighted combination
|
|
similarity = (values_sim * 0.5) + (interest_sim * 0.3) + (loc_sim * 0.2)
|
|
|
|
return similarity
|