"""
shared signal patterns for all scrapers
"""

import re

# positive signals - what we're looking for
POSITIVE_PATTERNS = [
    # values
    (r'\b(solarpunk|cyberpunk)\b', 'solarpunk', 10),
    (r'\b(anarchis[tm]|mutual.?aid)\b', 'mutual_aid', 10),
    (r'\b(cooperative|collective|worker.?owned?|coop|co.?op)\b', 'cooperative', 15),
    (r'\b(community|commons)\b', 'community', 5),
    (r'\b(intentional.?community|cohousing|commune)\b', 'intentional_community', 20),

    # queer-friendly
    (r'\b(queer|lgbtq?|trans|nonbinary|enby|genderqueer)\b', 'queer', 15),
    (r'\b(they/them|she/her|he/him|xe/xem|any.?pronouns)\b', 'pronouns', 10),
    (r'\bblm\b', 'blm', 5),
    (r'\b(acab|1312)\b', 'acab', 5),

    # tech values
    (r'\b(privacy|surveillance|anti.?surveillance)\b', 'privacy', 10),
    (r'\b(self.?host(?:ed|ing)?|homelab|home.?server)\b', 'selfhosted', 15),
    (r'\b(local.?first|offline.?first)\b', 'local_first', 15),
    (r'\b(decentralized?|federation|federated|fediverse)\b', 'decentralized', 10),
    (r'\b(foss|libre|open.?source|copyleft)\b', 'foss', 10),
    (r'\b(home.?assistant|home.?automation)\b', 'home_automation', 10),
    (r'\b(mesh|p2p|peer.?to.?peer)\b', 'p2p', 10),
    (r'\b(matrix|xmpp|irc)\b', 'federated_chat', 5),
    (r'\b(degoogle|de.?google)\b', 'degoogle', 10),

    # location/availability
    (r'\b(seattle|portland|pnw|cascadia|pacific.?northwest)\b', 'pnw', 20),
    (r'\b(washington|oregon)\b', 'pnw_state', 10),
    (r'\b(remote|anywhere|relocate|looking.?to.?move)\b', 'remote', 10),

    # anti-capitalism
    (r'\b(anti.?capitalis[tm]|post.?capitalis[tm]|degrowth)\b', 'anticapitalist', 10),

    # neurodivergent (often overlaps with our values)
    (r'\b(neurodivergent|adhd|autistic|autism)\b', 'neurodivergent', 5),

    # technical skills (bonus for builders)
    (r'\b(rust|go|python|typescript)\b', 'modern_lang', 3),
    (r'\b(linux|bsd|nixos)\b', 'unix', 3),
    (r'\b(kubernetes|docker|podman)\b', 'containers', 3),
]

# negative signals - red flags
NEGATIVE_PATTERNS = [
    (r'\b(qanon|maga|trump|wwg1wga)\b', 'maga', -50),
    (r'\b(covid.?hoax|plandemic|5g.?conspiracy)\b', 'conspiracy', -50),
    (r'\b(nwo|illuminati|deep.?state)\b', 'conspiracy', -30),
    (r'\b(anti.?vax|antivax)\b', 'antivax', -30),
    (r'\b(sovereign.?citizen)\b', 'sovcit', -40),
    (r'\b(crypto.?bro|web3|nft|blockchain|bitcoin|ethereum)\b', 'crypto', -15),
    (r'\b(conservative|republican)\b', 'conservative', -20),
    (r'\b(free.?speech.?absolutist)\b', 'freeze_peach', -20),
]

# target topics for repo discovery
TARGET_TOPICS = [
    'local-first', 'self-hosted', 'privacy', 'mesh-network',
    'cooperative', 'solarpunk', 'decentralized', 'p2p',
    'fediverse', 'activitypub', 'matrix-org', 'homeassistant',
    'esphome', 'open-source-hardware', 'right-to-repair',
    'mutual-aid', 'commons', 'degoogle', 'privacy-tools',
]

# ecosystem repos - high signal contributors
ECOSYSTEM_REPOS = [
    'home-assistant/core',
    'esphome/esphome',
    'matrix-org/synapse',
    'LemmyNet/lemmy',
    'mastodon/mastodon',
    'owncast/owncast',
    'nextcloud/server',
    'immich-app/immich',
    'jellyfin/jellyfin',
    'navidrome/navidrome',
    'paperless-ngx/paperless-ngx',
    'actualbudget/actual',
    'firefly-iii/firefly-iii',
    'logseq/logseq',
    'AppFlowy-IO/AppFlowy',
    'siyuan-note/siyuan',
    'anytype/anytype-ts',
    'calcom/cal.com',
    'plausible/analytics',
    'umami-software/umami',
]

# aligned subreddits
ALIGNED_SUBREDDITS = {
    'intentionalcommunity': 25,
    'cohousing': 25,
    'cooperatives': 20,
    'solarpunk': 20,
    'selfhosted': 15,
    'homeassistant': 15,
    'homelab': 10,
    'privacy': 15,
    'PrivacyGuides': 15,
    'degoogle': 15,
    'anticonsumption': 10,
    'Frugal': 5,
    'simpleliving': 5,
    'Seattle': 10,
    'Portland': 10,
    'cascadia': 15,
    'linux': 5,
    'opensource': 10,
    'FOSS': 10,
}

# negative subreddits
NEGATIVE_SUBREDDITS = [
    'conspiracy', 'conservative', 'walkaway', 'louderwithcrowder',
    'JordanPeterson', 'TimPool', 'NoNewNormal', 'LockdownSkepticism',
]

# high-signal mastodon instances
ALIGNED_INSTANCES = {
    'tech.lgbt': 20,
    'social.coop': 25,
    'fosstodon.org': 10,
    'hackers.town': 15,
    'hachyderm.io': 10,
    'infosec.exchange': 5,
}


def analyze_text(text):
    """
    analyze text for signals
    returns: (score, signals_found, negative_signals)
    """
    if not text:
        return 0, [], []

    text = text.lower()
    score = 0
    signals = []
    negatives = []

    for pattern, signal_name, points in POSITIVE_PATTERNS:
        if re.search(pattern, text, re.IGNORECASE):
            score += points
            signals.append(signal_name)

    for pattern, signal_name, points in NEGATIVE_PATTERNS:
        if re.search(pattern, text, re.IGNORECASE):
            score += points  # points are already negative
            negatives.append(signal_name)

    return score, list(set(signals)), list(set(negatives))