diff --git a/custom_components/groqd/conversation.py b/custom_components/groqd/conversation.py index 5aa8c1f..fc9e3fb 100644 --- a/custom_components/groqd/conversation.py +++ b/custom_components/groqd/conversation.py @@ -4,6 +4,8 @@ from __future__ import annotations from collections.abc import Callable import json +import re +from html import unescape from typing import Any, Literal from groq._types import NOT_GIVEN @@ -142,6 +144,42 @@ def _searxng_tool() -> ChatCompletionToolParam: return ChatCompletionToolParam(type="function", function=tool_spec) +def _fetch_tool() -> ChatCompletionToolParam: + tool_spec = FunctionDefinition( + name="fetch_url", + parameters={ + "type": "object", + "properties": { + "url": {"type": "string"}, + "max_chars": {"type": "integer", "default": 4000}, + }, + "required": ["url"], + }, + description="Fetch a URL and return cleaned text content.", + ) + return ChatCompletionToolParam(type="function", function=tool_spec) + + +def _strip_html(text: str) -> str: + # Cheap HTML-to-text for summarization; avoids extra deps. + text = re.sub(r"(?s).*?", " ", text) + text = re.sub(r"(?s).*?", " ", text) + text = re.sub(r"<[^>]+>", " ", text) + text = re.sub(r"\\s+", " ", text) + return unescape(text).strip() + + +async def _run_fetch(hass: HomeAssistant, url: str, max_chars: int) -> dict[str, Any]: + session = async_get_clientsession(hass) + async with session.get(url, timeout=20) as resp: + resp.raise_for_status() + content_type = resp.headers.get("content-type", "") + text = await resp.text() + cleaned = _strip_html(text) + if max_chars > 0: + cleaned = cleaned[:max_chars] + return {"url": url, "content_type": content_type, "text": cleaned} + async def _run_searxng( hass: HomeAssistant, options: dict[str, Any], @@ -258,6 +296,9 @@ class GroqdConversationEntity( if tools is None: tools = [] tools.append(_searxng_tool()) + if tools is None: + tools = [] + tools.append(_fetch_tool()) memory_scope = options.get(CONF_MEMORY_SCOPE, DEFAULT_MEMORY_SCOPE) memory_key = None @@ -448,6 +489,17 @@ class GroqdConversationEntity( tool_response = await _run_searxng(self.hass, options, tool_args) except Exception as err: tool_response = {"error": type(err).__name__, "error_text": str(err)} + elif tool_name == "fetch_url": + url = tool_args.get("url", "") + max_chars = tool_args.get("max_chars", 4000) + try: + max_chars = int(max_chars) + except (TypeError, ValueError): + max_chars = 4000 + try: + tool_response = await _run_fetch(self.hass, url, max_chars) + except Exception as err: + tool_response = {"error": type(err).__name__, "error_text": str(err)} elif llm_api: tool_input = llm.ToolInput( tool_name=tool_name,