mirror of
https://github.com/sudoxreboot/groqd
synced 2026-04-14 11:36:49 +00:00
Add fetch_url tool for link ingestion
This commit is contained in:
parent
e95bb18e29
commit
923131b681
1 changed files with 52 additions and 0 deletions
|
|
@ -4,6 +4,8 @@ from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
from html import unescape
|
||||||
from typing import Any, Literal
|
from typing import Any, Literal
|
||||||
|
|
||||||
from groq._types import NOT_GIVEN
|
from groq._types import NOT_GIVEN
|
||||||
|
|
@ -142,6 +144,42 @@ def _searxng_tool() -> ChatCompletionToolParam:
|
||||||
return ChatCompletionToolParam(type="function", function=tool_spec)
|
return ChatCompletionToolParam(type="function", function=tool_spec)
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_tool() -> ChatCompletionToolParam:
|
||||||
|
tool_spec = FunctionDefinition(
|
||||||
|
name="fetch_url",
|
||||||
|
parameters={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {"type": "string"},
|
||||||
|
"max_chars": {"type": "integer", "default": 4000},
|
||||||
|
},
|
||||||
|
"required": ["url"],
|
||||||
|
},
|
||||||
|
description="Fetch a URL and return cleaned text content.",
|
||||||
|
)
|
||||||
|
return ChatCompletionToolParam(type="function", function=tool_spec)
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_html(text: str) -> str:
|
||||||
|
# Cheap HTML-to-text for summarization; avoids extra deps.
|
||||||
|
text = re.sub(r"(?s)<script.*?>.*?</script>", " ", text)
|
||||||
|
text = re.sub(r"(?s)<style.*?>.*?</style>", " ", text)
|
||||||
|
text = re.sub(r"<[^>]+>", " ", text)
|
||||||
|
text = re.sub(r"\\s+", " ", text)
|
||||||
|
return unescape(text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
async def _run_fetch(hass: HomeAssistant, url: str, max_chars: int) -> dict[str, Any]:
|
||||||
|
session = async_get_clientsession(hass)
|
||||||
|
async with session.get(url, timeout=20) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
content_type = resp.headers.get("content-type", "")
|
||||||
|
text = await resp.text()
|
||||||
|
cleaned = _strip_html(text)
|
||||||
|
if max_chars > 0:
|
||||||
|
cleaned = cleaned[:max_chars]
|
||||||
|
return {"url": url, "content_type": content_type, "text": cleaned}
|
||||||
|
|
||||||
async def _run_searxng(
|
async def _run_searxng(
|
||||||
hass: HomeAssistant,
|
hass: HomeAssistant,
|
||||||
options: dict[str, Any],
|
options: dict[str, Any],
|
||||||
|
|
@ -258,6 +296,9 @@ class GroqdConversationEntity(
|
||||||
if tools is None:
|
if tools is None:
|
||||||
tools = []
|
tools = []
|
||||||
tools.append(_searxng_tool())
|
tools.append(_searxng_tool())
|
||||||
|
if tools is None:
|
||||||
|
tools = []
|
||||||
|
tools.append(_fetch_tool())
|
||||||
|
|
||||||
memory_scope = options.get(CONF_MEMORY_SCOPE, DEFAULT_MEMORY_SCOPE)
|
memory_scope = options.get(CONF_MEMORY_SCOPE, DEFAULT_MEMORY_SCOPE)
|
||||||
memory_key = None
|
memory_key = None
|
||||||
|
|
@ -448,6 +489,17 @@ class GroqdConversationEntity(
|
||||||
tool_response = await _run_searxng(self.hass, options, tool_args)
|
tool_response = await _run_searxng(self.hass, options, tool_args)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
tool_response = {"error": type(err).__name__, "error_text": str(err)}
|
tool_response = {"error": type(err).__name__, "error_text": str(err)}
|
||||||
|
elif tool_name == "fetch_url":
|
||||||
|
url = tool_args.get("url", "")
|
||||||
|
max_chars = tool_args.get("max_chars", 4000)
|
||||||
|
try:
|
||||||
|
max_chars = int(max_chars)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
max_chars = 4000
|
||||||
|
try:
|
||||||
|
tool_response = await _run_fetch(self.hass, url, max_chars)
|
||||||
|
except Exception as err:
|
||||||
|
tool_response = {"error": type(err).__name__, "error_text": str(err)}
|
||||||
elif llm_api:
|
elif llm_api:
|
||||||
tool_input = llm.ToolInput(
|
tool_input = llm.ToolInput(
|
||||||
tool_name=tool_name,
|
tool_name=tool_name,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue