mirror of
https://github.com/sudoxnym/connectd.git
synced 2026-04-14 19:46:30 +00:00
- determine_contact_method now recognizes mastodon/bluesky users by platform - username IS the handle for platform-native users - fixed orphaned matches table issue - wave 1 intros sent successfully
158 lines
4.9 KiB
Python
158 lines
4.9 KiB
Python
"""
|
|
shared signal patterns for all scrapers
|
|
"""
|
|
|
|
import re
|
|
|
|
# positive signals - what we're looking for
|
|
POSITIVE_PATTERNS = [
|
|
# values
|
|
(r'\b(solarpunk|cyberpunk)\b', 'solarpunk', 10),
|
|
(r'\b(anarchis[tm]|mutual.?aid)\b', 'mutual_aid', 10),
|
|
(r'\b(cooperative|collective|worker.?owned?|coop|co.?op)\b', 'cooperative', 15),
|
|
(r'\b(community|commons)\b', 'community', 5),
|
|
(r'\b(intentional.?community|cohousing|commune)\b', 'intentional_community', 20),
|
|
|
|
# queer-friendly
|
|
(r'\b(queer|lgbtq?|trans|nonbinary|enby|genderqueer)\b', 'queer', 15),
|
|
(r'\b(they/them|she/her|he/him|xe/xem|any.?pronouns)\b', 'pronouns', 10),
|
|
(r'\bblm\b', 'blm', 5),
|
|
(r'\b(acab|1312)\b', 'acab', 5),
|
|
|
|
# tech values
|
|
(r'\b(privacy|surveillance|anti.?surveillance)\b', 'privacy', 10),
|
|
(r'\b(self.?host(?:ed|ing)?|homelab|home.?server)\b', 'selfhosted', 15),
|
|
(r'\b(local.?first|offline.?first)\b', 'local_first', 15),
|
|
(r'\b(decentralized?|federation|federated|fediverse)\b', 'decentralized', 10),
|
|
(r'\b(foss|libre|open.?source|copyleft)\b', 'foss', 10),
|
|
(r'\b(home.?assistant|home.?automation)\b', 'home_automation', 10),
|
|
(r'\b(mesh|p2p|peer.?to.?peer)\b', 'p2p', 10),
|
|
(r'\b(matrix|xmpp|irc)\b', 'federated_chat', 5),
|
|
(r'\b(degoogle|de.?google)\b', 'degoogle', 10),
|
|
|
|
# location/availability
|
|
(r'\b(seattle|portland|pnw|cascadia|pacific.?northwest)\b', 'pnw', 20),
|
|
(r'\b(washington|oregon)\b', 'pnw_state', 10),
|
|
(r'\b(remote|anywhere|relocate|looking.?to.?move)\b', 'remote', 10),
|
|
|
|
# anti-capitalism
|
|
(r'\b(anti.?capitalis[tm]|post.?capitalis[tm]|degrowth)\b', 'anticapitalist', 10),
|
|
|
|
# neurodivergent (often overlaps with our values)
|
|
(r'\b(neurodivergent|adhd|autistic|autism)\b', 'neurodivergent', 5),
|
|
|
|
# technical skills (bonus for builders)
|
|
(r'\b(rust|go|python|typescript)\b', 'modern_lang', 3),
|
|
(r'\b(linux|bsd|nixos)\b', 'unix', 3),
|
|
(r'\b(kubernetes|docker|podman)\b', 'containers', 3),
|
|
]
|
|
|
|
# negative signals - red flags
|
|
NEGATIVE_PATTERNS = [
|
|
(r'\b(qanon|maga|trump|wwg1wga)\b', 'maga', -50),
|
|
(r'\b(covid.?hoax|plandemic|5g.?conspiracy)\b', 'conspiracy', -50),
|
|
(r'\b(nwo|illuminati|deep.?state)\b', 'conspiracy', -30),
|
|
(r'\b(anti.?vax|antivax)\b', 'antivax', -30),
|
|
(r'\b(sovereign.?citizen)\b', 'sovcit', -40),
|
|
(r'\b(crypto.?bro|web3|nft|blockchain|bitcoin|ethereum)\b', 'crypto', -15),
|
|
(r'\b(conservative|republican)\b', 'conservative', -20),
|
|
(r'\b(free.?speech.?absolutist)\b', 'freeze_peach', -20),
|
|
]
|
|
|
|
# target topics for repo discovery
|
|
TARGET_TOPICS = [
|
|
'local-first', 'self-hosted', 'privacy', 'mesh-network',
|
|
'cooperative', 'solarpunk', 'decentralized', 'p2p',
|
|
'fediverse', 'activitypub', 'matrix-org', 'homeassistant',
|
|
'esphome', 'open-source-hardware', 'right-to-repair',
|
|
'mutual-aid', 'commons', 'degoogle', 'privacy-tools',
|
|
]
|
|
|
|
# ecosystem repos - high signal contributors
|
|
ECOSYSTEM_REPOS = [
|
|
'home-assistant/core',
|
|
'esphome/esphome',
|
|
'matrix-org/synapse',
|
|
'LemmyNet/lemmy',
|
|
'mastodon/mastodon',
|
|
'owncast/owncast',
|
|
'nextcloud/server',
|
|
'immich-app/immich',
|
|
'jellyfin/jellyfin',
|
|
'navidrome/navidrome',
|
|
'paperless-ngx/paperless-ngx',
|
|
'actualbudget/actual',
|
|
'firefly-iii/firefly-iii',
|
|
'logseq/logseq',
|
|
'AppFlowy-IO/AppFlowy',
|
|
'siyuan-note/siyuan',
|
|
'anytype/anytype-ts',
|
|
'calcom/cal.com',
|
|
'plausible/analytics',
|
|
'umami-software/umami',
|
|
]
|
|
|
|
# aligned subreddits
|
|
ALIGNED_SUBREDDITS = {
|
|
'intentionalcommunity': 25,
|
|
'cohousing': 25,
|
|
'cooperatives': 20,
|
|
'solarpunk': 20,
|
|
'selfhosted': 15,
|
|
'homeassistant': 15,
|
|
'homelab': 10,
|
|
'privacy': 15,
|
|
'PrivacyGuides': 15,
|
|
'degoogle': 15,
|
|
'anticonsumption': 10,
|
|
'Frugal': 5,
|
|
'simpleliving': 5,
|
|
'Seattle': 10,
|
|
'Portland': 10,
|
|
'cascadia': 15,
|
|
'linux': 5,
|
|
'opensource': 10,
|
|
'FOSS': 10,
|
|
}
|
|
|
|
# negative subreddits
|
|
NEGATIVE_SUBREDDITS = [
|
|
'conspiracy', 'conservative', 'walkaway', 'louderwithcrowder',
|
|
'JordanPeterson', 'TimPool', 'NoNewNormal', 'LockdownSkepticism',
|
|
]
|
|
|
|
# high-signal mastodon instances
|
|
ALIGNED_INSTANCES = {
|
|
'tech.lgbt': 20,
|
|
'social.coop': 25,
|
|
'fosstodon.org': 10,
|
|
'hackers.town': 15,
|
|
'hachyderm.io': 10,
|
|
'infosec.exchange': 5,
|
|
}
|
|
|
|
|
|
def analyze_text(text):
|
|
"""
|
|
analyze text for signals
|
|
returns: (score, signals_found, negative_signals)
|
|
"""
|
|
if not text:
|
|
return 0, [], []
|
|
|
|
text = text.lower()
|
|
score = 0
|
|
signals = []
|
|
negatives = []
|
|
|
|
for pattern, signal_name, points in POSITIVE_PATTERNS:
|
|
if re.search(pattern, text, re.IGNORECASE):
|
|
score += points
|
|
signals.append(signal_name)
|
|
|
|
for pattern, signal_name, points in NEGATIVE_PATTERNS:
|
|
if re.search(pattern, text, re.IGNORECASE):
|
|
score += points # points are already negative
|
|
negatives.append(signal_name)
|
|
|
|
return score, list(set(signals)), list(set(negatives))
|