mirror of
https://github.com/sudoxreboot/groqd
synced 2026-04-14 03:26:35 +00:00
Add fetch_url tool for link ingestion
This commit is contained in:
parent
e95bb18e29
commit
923131b681
1 changed files with 52 additions and 0 deletions
|
|
@ -4,6 +4,8 @@ from __future__ import annotations
|
|||
|
||||
from collections.abc import Callable
|
||||
import json
|
||||
import re
|
||||
from html import unescape
|
||||
from typing import Any, Literal
|
||||
|
||||
from groq._types import NOT_GIVEN
|
||||
|
|
@ -142,6 +144,42 @@ def _searxng_tool() -> ChatCompletionToolParam:
|
|||
return ChatCompletionToolParam(type="function", function=tool_spec)
|
||||
|
||||
|
||||
def _fetch_tool() -> ChatCompletionToolParam:
|
||||
tool_spec = FunctionDefinition(
|
||||
name="fetch_url",
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {"type": "string"},
|
||||
"max_chars": {"type": "integer", "default": 4000},
|
||||
},
|
||||
"required": ["url"],
|
||||
},
|
||||
description="Fetch a URL and return cleaned text content.",
|
||||
)
|
||||
return ChatCompletionToolParam(type="function", function=tool_spec)
|
||||
|
||||
|
||||
def _strip_html(text: str) -> str:
|
||||
# Cheap HTML-to-text for summarization; avoids extra deps.
|
||||
text = re.sub(r"(?s)<script.*?>.*?</script>", " ", text)
|
||||
text = re.sub(r"(?s)<style.*?>.*?</style>", " ", text)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
text = re.sub(r"\\s+", " ", text)
|
||||
return unescape(text).strip()
|
||||
|
||||
|
||||
async def _run_fetch(hass: HomeAssistant, url: str, max_chars: int) -> dict[str, Any]:
|
||||
session = async_get_clientsession(hass)
|
||||
async with session.get(url, timeout=20) as resp:
|
||||
resp.raise_for_status()
|
||||
content_type = resp.headers.get("content-type", "")
|
||||
text = await resp.text()
|
||||
cleaned = _strip_html(text)
|
||||
if max_chars > 0:
|
||||
cleaned = cleaned[:max_chars]
|
||||
return {"url": url, "content_type": content_type, "text": cleaned}
|
||||
|
||||
async def _run_searxng(
|
||||
hass: HomeAssistant,
|
||||
options: dict[str, Any],
|
||||
|
|
@ -258,6 +296,9 @@ class GroqdConversationEntity(
|
|||
if tools is None:
|
||||
tools = []
|
||||
tools.append(_searxng_tool())
|
||||
if tools is None:
|
||||
tools = []
|
||||
tools.append(_fetch_tool())
|
||||
|
||||
memory_scope = options.get(CONF_MEMORY_SCOPE, DEFAULT_MEMORY_SCOPE)
|
||||
memory_key = None
|
||||
|
|
@ -448,6 +489,17 @@ class GroqdConversationEntity(
|
|||
tool_response = await _run_searxng(self.hass, options, tool_args)
|
||||
except Exception as err:
|
||||
tool_response = {"error": type(err).__name__, "error_text": str(err)}
|
||||
elif tool_name == "fetch_url":
|
||||
url = tool_args.get("url", "")
|
||||
max_chars = tool_args.get("max_chars", 4000)
|
||||
try:
|
||||
max_chars = int(max_chars)
|
||||
except (TypeError, ValueError):
|
||||
max_chars = 4000
|
||||
try:
|
||||
tool_response = await _run_fetch(self.hass, url, max_chars)
|
||||
except Exception as err:
|
||||
tool_response = {"error": type(err).__name__, "error_text": str(err)}
|
||||
elif llm_api:
|
||||
tool_input = llm.ToolInput(
|
||||
tool_name=tool_name,
|
||||
|
|
|
|||
Loading…
Reference in a new issue