""" matchd/fingerprint.py - generate values profiles for humans """ import json from collections import defaultdict # values dimensions we track VALUES_DIMENSIONS = [ 'privacy', # surveillance concern, degoogle, self-hosted 'decentralization', # p2p, fediverse, local-first 'cooperation', # coops, mutual aid, community 'queer_friendly', # lgbtq+, pronouns 'environmental', # solarpunk, degrowth, sustainability 'anticapitalist', # post-capitalism, worker ownership 'builder', # creates vs consumes 'pnw_oriented', # pacific northwest connection ] # skill categories SKILL_CATEGORIES = [ 'backend', # python, go, rust, databases 'frontend', # js, react, css 'devops', # docker, k8s, linux admin 'hardware', # electronics, embedded, iot 'design', # ui/ux, graphics 'community', # organizing, facilitation 'writing', # documentation, content ] # signal to dimension mapping SIGNAL_TO_DIMENSION = { 'privacy': 'privacy', 'selfhosted': 'privacy', 'degoogle': 'privacy', 'decentralized': 'decentralization', 'local_first': 'decentralization', 'p2p': 'decentralization', 'federated_chat': 'decentralization', 'foss': 'decentralization', 'cooperative': 'cooperation', 'community': 'cooperation', 'mutual_aid': 'cooperation', 'intentional_community': 'cooperation', 'queer': 'queer_friendly', 'pronouns': 'queer_friendly', 'blm': 'queer_friendly', 'acab': 'queer_friendly', 'solarpunk': 'environmental', 'anticapitalist': 'anticapitalist', 'pnw': 'pnw_oriented', 'pnw_state': 'pnw_oriented', 'remote': 'pnw_oriented', 'home_automation': 'builder', 'modern_lang': 'builder', 'unix': 'builder', 'containers': 'builder', } # language to skill mapping LANGUAGE_TO_SKILL = { 'python': 'backend', 'go': 'backend', 'rust': 'backend', 'java': 'backend', 'ruby': 'backend', 'php': 'backend', 'javascript': 'frontend', 'typescript': 'frontend', 'html': 'frontend', 'css': 'frontend', 'vue': 'frontend', 'shell': 'devops', 'dockerfile': 'devops', 'nix': 'devops', 'hcl': 'devops', 'c': 'hardware', 'c++': 'hardware', 'arduino': 'hardware', 'verilog': 'hardware', } def generate_fingerprint(human_data): """ generate a values fingerprint for a human input: human dict from database (has signals, languages, etc) output: fingerprint dict with values_vector, skills, interests """ # parse stored json fields signals = human_data.get('signals', []) if isinstance(signals, str): signals = json.loads(signals) extra = human_data.get('extra', {}) if isinstance(extra, str): extra = json.loads(extra) languages = extra.get('languages', {}) topics = extra.get('topics', []) # build values vector values_vector = defaultdict(float) # from signals for signal in signals: dimension = SIGNAL_TO_DIMENSION.get(signal) if dimension: values_vector[dimension] += 1.0 # normalize values vector (0-1 scale) max_val = max(values_vector.values()) if values_vector else 1 values_vector = {k: min(v / max_val, 1.0) for k, v in values_vector.items()} # fill in missing dimensions with 0 for dim in VALUES_DIMENSIONS: if dim not in values_vector: values_vector[dim] = 0.0 # determine skills from languages skills = defaultdict(float) total_repos = sum(languages.values()) if languages else 1 for lang, count in languages.items(): skill = LANGUAGE_TO_SKILL.get(lang.lower()) if skill: skills[skill] += count / total_repos # normalize skills if skills: max_skill = max(skills.values()) skills = {k: min(v / max_skill, 1.0) for k, v in skills.items()} # interests from topics and signals interests = list(set(topics + signals)) # location preference location_pref = None if 'pnw' in signals or 'pnw_state' in signals: location_pref = 'pnw' elif 'remote' in signals: location_pref = 'remote' elif human_data.get('location'): loc = human_data['location'].lower() if any(x in loc for x in ['seattle', 'portland', 'washington', 'oregon', 'pnw', 'cascadia']): location_pref = 'pnw' # availability (based on hireable flag if present) availability = None if extra.get('hireable'): availability = 'open' return { 'human_id': human_data.get('id'), 'values_vector': dict(values_vector), 'skills': dict(skills), 'interests': interests, 'location_pref': location_pref, 'availability': availability, } def fingerprint_similarity(fp_a, fp_b): """ calculate similarity between two fingerprints returns 0-1 score """ # values similarity (cosine-ish) va = fp_a.get('values_vector', {}) vb = fp_b.get('values_vector', {}) all_dims = set(va.keys()) | set(vb.keys()) if not all_dims: return 0.0 dot_product = sum(va.get(d, 0) * vb.get(d, 0) for d in all_dims) mag_a = sum(v**2 for v in va.values()) ** 0.5 mag_b = sum(v**2 for v in vb.values()) ** 0.5 if mag_a == 0 or mag_b == 0: values_sim = 0.0 else: values_sim = dot_product / (mag_a * mag_b) # interest overlap (jaccard) ia = set(fp_a.get('interests', [])) ib = set(fp_b.get('interests', [])) if ia or ib: interest_sim = len(ia & ib) / len(ia | ib) else: interest_sim = 0.0 # location compatibility loc_a = fp_a.get('location_pref') loc_b = fp_b.get('location_pref') loc_sim = 0.0 if loc_a == loc_b and loc_a is not None: loc_sim = 1.0 elif loc_a == 'remote' or loc_b == 'remote': loc_sim = 0.5 elif loc_a == 'pnw' or loc_b == 'pnw': loc_sim = 0.3 # weighted combination similarity = (values_sim * 0.5) + (interest_sim * 0.3) + (loc_sim * 0.2) return similarity