Add fetch_url tool for link ingestion

This commit is contained in:
Your Name 2025-12-20 15:01:26 -06:00
parent e95bb18e29
commit 923131b681

View file

@ -4,6 +4,8 @@ from __future__ import annotations
from collections.abc import Callable from collections.abc import Callable
import json import json
import re
from html import unescape
from typing import Any, Literal from typing import Any, Literal
from groq._types import NOT_GIVEN from groq._types import NOT_GIVEN
@ -142,6 +144,42 @@ def _searxng_tool() -> ChatCompletionToolParam:
return ChatCompletionToolParam(type="function", function=tool_spec) return ChatCompletionToolParam(type="function", function=tool_spec)
def _fetch_tool() -> ChatCompletionToolParam:
tool_spec = FunctionDefinition(
name="fetch_url",
parameters={
"type": "object",
"properties": {
"url": {"type": "string"},
"max_chars": {"type": "integer", "default": 4000},
},
"required": ["url"],
},
description="Fetch a URL and return cleaned text content.",
)
return ChatCompletionToolParam(type="function", function=tool_spec)
def _strip_html(text: str) -> str:
# Cheap HTML-to-text for summarization; avoids extra deps.
text = re.sub(r"(?s)<script.*?>.*?</script>", " ", text)
text = re.sub(r"(?s)<style.*?>.*?</style>", " ", text)
text = re.sub(r"<[^>]+>", " ", text)
text = re.sub(r"\\s+", " ", text)
return unescape(text).strip()
async def _run_fetch(hass: HomeAssistant, url: str, max_chars: int) -> dict[str, Any]:
session = async_get_clientsession(hass)
async with session.get(url, timeout=20) as resp:
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
text = await resp.text()
cleaned = _strip_html(text)
if max_chars > 0:
cleaned = cleaned[:max_chars]
return {"url": url, "content_type": content_type, "text": cleaned}
async def _run_searxng( async def _run_searxng(
hass: HomeAssistant, hass: HomeAssistant,
options: dict[str, Any], options: dict[str, Any],
@ -258,6 +296,9 @@ class GroqdConversationEntity(
if tools is None: if tools is None:
tools = [] tools = []
tools.append(_searxng_tool()) tools.append(_searxng_tool())
if tools is None:
tools = []
tools.append(_fetch_tool())
memory_scope = options.get(CONF_MEMORY_SCOPE, DEFAULT_MEMORY_SCOPE) memory_scope = options.get(CONF_MEMORY_SCOPE, DEFAULT_MEMORY_SCOPE)
memory_key = None memory_key = None
@ -448,6 +489,17 @@ class GroqdConversationEntity(
tool_response = await _run_searxng(self.hass, options, tool_args) tool_response = await _run_searxng(self.hass, options, tool_args)
except Exception as err: except Exception as err:
tool_response = {"error": type(err).__name__, "error_text": str(err)} tool_response = {"error": type(err).__name__, "error_text": str(err)}
elif tool_name == "fetch_url":
url = tool_args.get("url", "")
max_chars = tool_args.get("max_chars", 4000)
try:
max_chars = int(max_chars)
except (TypeError, ValueError):
max_chars = 4000
try:
tool_response = await _run_fetch(self.hass, url, max_chars)
except Exception as err:
tool_response = {"error": type(err).__name__, "error_text": str(err)}
elif llm_api: elif llm_api:
tool_input = llm.ToolInput( tool_input = llm.ToolInput(
tool_name=tool_name, tool_name=tool_name,