feat: add summary and search commands (#54)

* feat: add summary and search commands

Signed-off-by: Frost Ming <me@frostming.com>

* fix formats

Signed-off-by: Frost Ming <me@frostming.com>

* fix: clean up

Signed-off-by: Frost Ming <me@frostming.com>
This commit is contained in:
Frost Ming
2025-07-08 11:41:57 +08:00
committed by GitHub
parent e91862a548
commit 0b60ae2fbe
31 changed files with 1279 additions and 2588 deletions

View File

@ -1,8 +1,8 @@
Google_Gemini_API_Key="your_gemini_api_key"
Telegram_Bot_Token="your_telegram_bot_token"
Anthropic_API_Key="your_anthropic_api_key"
Openai_API_Key="your_openai_api_key"
Yi_API_Key="your_yi_api_key"
Yi_Base_Url="your_yi_base_url"
Python_Bin_Path=""
Python_Venv_Path="venv"
GOOGLE_GEMINI_API_KEY="your_gemini_api_key"
TELEGRAM_BOT_TOKEN="your_telegram_bot_token"
ANTHROPIC_API_KEY="your_anthropic_api_key"
OPENAI_API_KEY="your_openai_api_key"
YI_API_KEY="your_yi_api_key"
YI_BASE_URL="your_yi_base_url"
PYTHON_BIN_PATH=""
PYTHON_VENV_PATH="venv"

2
.gitignore vendored
View File

@ -170,3 +170,5 @@ nohup.out
.pdm-python
*.wav
token_key.json
messages.db
*.session

35
config.py Normal file
View File

@ -0,0 +1,35 @@
from functools import cached_property
import openai
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env")
telegram_bot_token: str
timezone: str = "Asia/Shanghai"
openai_api_key: str | None = None
openai_model: str = "gpt-4o-mini"
openai_base_url: str = "https://api.openai.com/v1"
google_gemini_api_key: str | None = None
anthropic_api_key: str | None = None
telegra_ph_token: str | None = None
@cached_property
def openai_client(self) -> openai.OpenAI:
return openai.OpenAI(
api_key=self.openai_api_key,
base_url=self.openai_base_url,
)
@cached_property
def telegraph_client(self):
from handlers._telegraph import TelegraphAPI
return TelegraphAPI(self.telegra_ph_token)
settings = Settings() # type: ignore

View File

@ -1,173 +1,22 @@
from __future__ import annotations
import base64
import importlib
import re
import traceback
from functools import update_wrapper
from mimetypes import guess_type
from pathlib import Path
from typing import Any, Callable, TypeVar
import requests
from telebot import TeleBot
from telebot.types import BotCommand, Message
from telebot.util import smart_split
import telegramify_markdown
from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract
from expiringdict import ExpiringDict
from telebot.types import BotCommand
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
T = TypeVar("T", bound=Callable)
from ._utils import logger, wrap_handler
DEFAULT_LOAD_PRIORITY = 10
BOT_MESSAGE_LENGTH = 4000
REPLY_MESSAGE_CACHE = ExpiringDict(max_len=1000, max_age_seconds=600)
def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
"""Create the first reply message which make user feel the bot is working."""
return bot.reply_to(
message, f"*{who}* is _thinking_ \.\.\.", parse_mode="MarkdownV2"
)
def bot_reply_markdown(
reply_id: Message,
who: str,
text: str,
bot: TeleBot,
split_text: bool = True,
disable_web_page_preview: bool = False,
) -> bool:
"""
reply the Markdown by take care of the message length.
it will fallback to plain text in case of any failure
"""
try:
cache_key = f"{reply_id.chat.id}_{reply_id.message_id}"
if cache_key in REPLY_MESSAGE_CACHE and REPLY_MESSAGE_CACHE[cache_key] == text:
print(f"Skipping duplicate message for {cache_key}")
return True
REPLY_MESSAGE_CACHE[cache_key] = text
if len(text.encode("utf-8")) <= BOT_MESSAGE_LENGTH or not split_text:
bot.edit_message_text(
f"*{who}*:\n{telegramify_markdown.convert(text)}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
parse_mode="MarkdownV2",
disable_web_page_preview=disable_web_page_preview,
)
return True
# Need a split of message
msgs = smart_split(text, BOT_MESSAGE_LENGTH)
bot.edit_message_text(
f"*{who}* \[1/{len(msgs)}\]:\n{telegramify_markdown.convert(msgs[0])}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
parse_mode="MarkdownV2",
disable_web_page_preview=disable_web_page_preview,
)
for i in range(1, len(msgs)):
bot.reply_to(
reply_id.reply_to_message,
f"*{who}* \[{i+1}/{len(msgs)}\]:\n{telegramify_markdown.convert(msgs[i])}",
parse_mode="MarkdownV2",
)
return True
except Exception as e:
print(traceback.format_exc())
# print(f"wrong markdown format: {text}")
bot.edit_message_text(
f"*{who}*:\n{text}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
disable_web_page_preview=disable_web_page_preview,
)
return False
def extract_prompt(message: str, bot_name: str) -> str:
"""
This function filters messages for prompts.
Returns:
str: If it is not a prompt, return None. Otherwise, return the trimmed prefix of the actual prompt.
"""
# remove '@bot_name' as it is considered part of the command when in a group chat.
message = re.sub(re.escape(f"@{bot_name}"), "", message).strip()
# add a whitespace after the first colon as we separate the prompt from the command by the first whitespace.
message = re.sub(":", ": ", message, count=1).strip()
try:
left, message = message.split(maxsplit=1)
except ValueError:
return ""
if ":" not in left:
# the replacement happens in the right part, restore it.
message = message.replace(": ", ":", 1)
return message.strip()
def remove_prompt_prefix(message: str) -> str:
"""
Remove "/cmd" or "/cmd@bot_name" or "cmd:"
"""
message += " "
# Explanation of the regex pattern:
# ^ - Match the start of the string
# ( - Start of the group
# / - Literal forward slash
# [a-zA-Z] - Any letter (start of the command)
# [a-zA-Z0-9_]* - Any number of letters, digits, or underscores
# (@\w+)? - Optionally match @ followed by one or more word characters (for bot name)
# \s - A single whitespace character (space or newline)
# | - OR
# [a-zA-Z] - Any letter (start of the command)
# [a-zA-Z0-9_]* - Any number of letters, digits, or underscores
# :\s - Colon followed by a single whitespace character
# ) - End of the group
pattern = r"^(/[a-zA-Z][a-zA-Z0-9_]*(@\w+)?\s|[a-zA-Z][a-zA-Z0-9_]*:\s)"
return re.sub(pattern, "", message).strip()
def wrap_handler(handler: T, bot: TeleBot) -> T:
def wrapper(message: Message, *args: Any, **kwargs: Any) -> None:
try:
m = ""
if message.text and message.text.find("answer_it") != -1:
# for answer_it no args
return handler(message, *args, **kwargs)
elif message.text is not None:
m = message.text = extract_prompt(message.text, bot.get_me().username)
elif message.caption is not None:
m = message.caption = extract_prompt(
message.caption, bot.get_me().username
)
elif message.location and message.location.latitude is not None:
# for location map handler just return
return handler(message, *args, **kwargs)
if not m:
bot.reply_to(message, "Please provide info after start words.")
return
return handler(message, *args, **kwargs)
except Exception as e:
traceback.print_exc()
# handle more here
if str(e).find("RECITATION") > 0:
bot.reply_to(message, "Your prompt `RECITATION` please check the log")
else:
bot.reply_to(message, "Something wrong, please check the log")
return update_wrapper(wrapper, handler)
def list_available_commands() -> list[str]:
commands = []
this_path = Path(__file__).parent
for child in this_path.iterdir():
if child.name.startswith("_"):
continue
commands.append(child.stem)
return commands
def load_handlers(bot: TeleBot, disable_commands: list[str]) -> None:
@ -183,16 +32,13 @@ def load_handlers(bot: TeleBot, disable_commands: list[str]) -> None:
modules_with_priority.sort(key=lambda x: x[-1])
for module, name, priority in modules_with_priority:
if hasattr(module, "register"):
print(f"Loading {name} handlers with priority {priority}.")
logger.debug(f"Loading {name} handlers with priority {priority}.")
module.register(bot)
print("Loading handlers done.")
logger.info("Loading handlers done.")
all_commands: list[BotCommand] = []
for handler in bot.message_handlers:
help_text = getattr(handler["function"], "__doc__", "")
# tricky ignore the latest_handle_messages
if help_text and help_text == "ignore":
continue
# Add pre-processing and error handling to all callbacks
handler["function"] = wrap_handler(handler["function"], bot)
for command in handler["filters"].get("commands", []):
@ -200,309 +46,4 @@ def load_handlers(bot: TeleBot, disable_commands: list[str]) -> None:
if all_commands:
bot.set_my_commands(all_commands)
print("Setting commands done.")
def list_available_commands() -> list[str]:
commands = []
this_path = Path(__file__).parent
for child in this_path.iterdir():
if child.name.startswith("_"):
continue
commands.append(child.stem)
return commands
def extract_url_from_text(text: str) -> list[str]:
extractor = URLExtract()
urls = extractor.find_urls(text)
return urls
def get_text_from_jina_reader(url: str):
try:
r = requests.get(f"https://r.jina.ai/{url}")
return r.text
except Exception as e:
print(e)
return None
def enrich_text_with_urls(text: str) -> str:
urls = extract_url_from_text(text)
for u in urls:
try:
url_text = get_text_from_jina_reader(u)
url_text = f"\n```markdown\n{url_text}\n```\n"
text = text.replace(u, url_text)
except Exception as e:
# just ignore the error
pass
return text
def image_to_data_uri(file_path):
content_type = guess_type(file_path)[0]
with open(file_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
return f"data:{content_type};base64,{encoded_image}"
import json
import requests
import os
from bs4 import BeautifulSoup
import markdown
class TelegraphAPI:
def __init__(
self,
access_token=None,
short_name="tg_bot_collections",
author_name="Telegram Bot Collections",
author_url=None,
):
self.access_token = (
access_token
if access_token
else self._create_ph_account(short_name, author_name, author_url)
)
self.base_url = "https://api.telegra.ph"
# Get account info on initialization
account_info = self.get_account_info()
self.short_name = account_info.get("short_name")
self.author_name = account_info.get("author_name")
self.author_url = account_info.get("author_url")
def _create_ph_account(self, short_name, author_name, author_url):
Store_Token = False
TELEGRAPH_API_URL = "https://api.telegra.ph/createAccount"
TOKEN_FILE = "token_key.json"
# Try to load existing token information
try:
with open(TOKEN_FILE, "r") as f:
tokens = json.load(f)
if "TELEGRA_PH_TOKEN" in tokens and tokens["TELEGRA_PH_TOKEN"] != "example":
return tokens["TELEGRA_PH_TOKEN"]
except FileNotFoundError:
tokens = {}
# If no existing valid token in TOKEN_FILE, create a new account
data = {
"short_name": short_name,
"author_name": author_name,
"author_url": author_url,
}
# Make API request
response = requests.post(TELEGRAPH_API_URL, data=data)
response.raise_for_status()
account = response.json()
access_token = account["result"]["access_token"]
# Update the token in the dictionary
tokens["TELEGRA_PH_TOKEN"] = access_token
# Store the updated tokens
if Store_Token:
with open(TOKEN_FILE, "w") as f:
json.dump(tokens, f, indent=4)
else:
print(f"Token not stored to file, but here is your token:\n{access_token}")
# Store it to the environment variable
os.environ["TELEGRA_PH_TOKEN"] = access_token
return access_token
def create_page(
self, title, content, author_name=None, author_url=None, return_content=False
):
url = f"{self.base_url}/createPage"
data = {
"access_token": self.access_token,
"title": title,
"content": json.dumps(content),
"return_content": return_content,
"author_name": author_name if author_name else self.author_name,
"author_url": author_url if author_url else self.author_url,
}
# Max 65,536 characters/64KB.
if len(json.dumps(content)) > 65536:
content = content[:64000]
data["content"] = json.dumps(content)
try:
response = requests.post(url, data=data)
response.raise_for_status()
response = response.json()
page_url = response["result"]["url"]
return page_url
except:
return "https://telegra.ph/api"
def get_account_info(self):
url = f'{self.base_url}/getAccountInfo?access_token={self.access_token}&fields=["short_name","author_name","author_url","auth_url"]'
response = requests.get(url)
if response.status_code == 200:
return response.json()["result"]
else:
print(f"Fail getting telegra.ph token info: {response.status_code}")
return None
def edit_page(
self,
path,
title,
content,
author_name=None,
author_url=None,
return_content=False,
):
url = f"{self.base_url}/editPage"
data = {
"access_token": self.access_token,
"path": path,
"title": title,
"content": json.dumps(content),
"return_content": return_content,
"author_name": author_name if author_name else self.author_name,
"author_url": author_url if author_url else self.author_url,
}
response = requests.post(url, data=data)
response.raise_for_status()
response = response.json()
page_url = response["result"]["url"]
return page_url
def get_page(self, path):
url = f"{self.base_url}/getPage/{path}?return_content=true"
response = requests.get(url)
response.raise_for_status()
return response.json()["result"]["content"]
def create_page_md(
self,
title,
markdown_text,
author_name=None,
author_url=None,
return_content=False,
):
content = self._md_to_dom(markdown_text)
return self.create_page(title, content, author_name, author_url, return_content)
def edit_page_md(
self,
path,
title,
markdown_text,
author_name=None,
author_url=None,
return_content=False,
):
content = self._md_to_dom(markdown_text)
return self.edit_page(
path, title, content, author_name, author_url, return_content
)
def authorize_browser(self):
url = f'{self.base_url}/getAccountInfo?access_token={self.access_token}&fields=["auth_url"]'
response = requests.get(url)
response.raise_for_status()
return response.json()["result"]["auth_url"]
def _md_to_dom(self, markdown_text):
html = markdown.markdown(
markdown_text,
extensions=["markdown.extensions.extra", "markdown.extensions.sane_lists"],
)
soup = BeautifulSoup(html, "html.parser")
def parse_element(element):
tag_dict = {"tag": element.name}
if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
if element.name == "h1":
tag_dict["tag"] = "h3"
elif element.name == "h2":
tag_dict["tag"] = "h4"
else:
tag_dict["tag"] = "p"
tag_dict["children"] = [
{"tag": "strong", "children": element.contents}
]
if element.attrs:
tag_dict["attrs"] = element.attrs
if element.contents:
children = []
for child in element.contents:
if isinstance(child, str):
children.append(child.strip())
else:
children.append(parse_element(child))
tag_dict["children"] = children
else:
if element.attrs:
tag_dict["attrs"] = element.attrs
if element.contents:
children = []
for child in element.contents:
if isinstance(child, str):
children.append(child.strip())
else:
children.append(parse_element(child))
if children:
tag_dict["children"] = children
return tag_dict
new_dom = []
for element in soup.contents:
if isinstance(element, str) and not element.strip():
continue
elif isinstance(element, str):
new_dom.append({"tag": "text", "content": element.strip()})
else:
new_dom.append(parse_element(element))
return new_dom
def upload_image(self, file_name: str) -> str:
base_url = "https://telegra.ph"
upload_url = f"{base_url}/upload"
try:
content_type = guess_type(file_name)[0]
with open(file_name, "rb") as f:
response = requests.post(
upload_url, files={"file": ("blob", f, content_type)}
)
response.raise_for_status()
# [{'src': '/file/xx.jpg'}]
response = response.json()
image_url = f"{base_url}{response[0]['src']}"
return image_url
except Exception as e:
print(f"upload image: {e}")
return "https://telegra.ph/api"
# `import *` will give you these
__all__ = [
"bot_reply_first",
"bot_reply_markdown",
"remove_prompt_prefix",
"enrich_text_with_urls",
"image_to_data_uri",
"TelegraphAPI",
]
logger.info("Setting commands done.")

252
handlers/_telegraph.py Normal file
View File

@ -0,0 +1,252 @@
import json
import os
from mimetypes import guess_type
import markdown
import requests
from bs4 import BeautifulSoup
from ._utils import logger
class TelegraphAPI:
def __init__(
self,
access_token=None,
short_name="tg_bot_collections",
author_name="Telegram Bot Collections",
author_url=None,
):
self.access_token = (
access_token
if access_token
else self._create_ph_account(short_name, author_name, author_url)
)
self.base_url = "https://api.telegra.ph"
# Get account info on initialization
account_info = self.get_account_info()
self.short_name = account_info.get("short_name")
self.author_name = account_info.get("author_name")
self.author_url = account_info.get("author_url")
def _create_ph_account(self, short_name, author_name, author_url):
Store_Token = False
TELEGRAPH_API_URL = "https://api.telegra.ph/createAccount"
TOKEN_FILE = "token_key.json"
# Try to load existing token information
try:
with open(TOKEN_FILE, "r") as f:
tokens = json.load(f)
if "TELEGRA_PH_TOKEN" in tokens and tokens["TELEGRA_PH_TOKEN"] != "example":
return tokens["TELEGRA_PH_TOKEN"]
except FileNotFoundError:
tokens = {}
# If no existing valid token in TOKEN_FILE, create a new account
data = {
"short_name": short_name,
"author_name": author_name,
"author_url": author_url,
}
# Make API request
response = requests.post(TELEGRAPH_API_URL, data=data)
response.raise_for_status()
account = response.json()
access_token = account["result"]["access_token"]
# Update the token in the dictionary
tokens["TELEGRA_PH_TOKEN"] = access_token
# Store the updated tokens
if Store_Token:
with open(TOKEN_FILE, "w") as f:
json.dump(tokens, f, indent=4)
else:
logger.info(
f"Token not stored to file, but here is your token:\n{access_token}"
)
# Store it to the environment variable
os.environ["TELEGRA_PH_TOKEN"] = access_token
return access_token
def create_page(
self, title, content, author_name=None, author_url=None, return_content=False
):
url = f"{self.base_url}/createPage"
data = {
"access_token": self.access_token,
"title": title,
"content": json.dumps(content),
"return_content": return_content,
"author_name": author_name if author_name else self.author_name,
"author_url": author_url if author_url else self.author_url,
}
# Max 65,536 characters/64KB.
if len(json.dumps(content)) > 65536:
content = content[:64000]
data["content"] = json.dumps(content)
try:
response = requests.post(url, data=data)
response.raise_for_status()
response = response.json()
page_url = response["result"]["url"]
return page_url
except requests.exceptions.RequestException:
return "https://telegra.ph/api"
def get_account_info(self):
url = f'{self.base_url}/getAccountInfo?access_token={self.access_token}&fields=["short_name","author_name","author_url","auth_url"]'
response = requests.get(url)
if response.status_code == 200:
return response.json()["result"]
else:
logger.info(f"Fail getting telegra.ph token info: {response.status_code}")
return None
def edit_page(
self,
path,
title,
content,
author_name=None,
author_url=None,
return_content=False,
):
url = f"{self.base_url}/editPage"
data = {
"access_token": self.access_token,
"path": path,
"title": title,
"content": json.dumps(content),
"return_content": return_content,
"author_name": author_name if author_name else self.author_name,
"author_url": author_url if author_url else self.author_url,
}
response = requests.post(url, data=data)
response.raise_for_status()
response = response.json()
page_url = response["result"]["url"]
return page_url
def get_page(self, path):
url = f"{self.base_url}/getPage/{path}?return_content=true"
response = requests.get(url)
response.raise_for_status()
return response.json()["result"]["content"]
def create_page_md(
self,
title,
markdown_text,
author_name=None,
author_url=None,
return_content=False,
):
content = self._md_to_dom(markdown_text)
return self.create_page(title, content, author_name, author_url, return_content)
def edit_page_md(
self,
path,
title,
markdown_text,
author_name=None,
author_url=None,
return_content=False,
):
content = self._md_to_dom(markdown_text)
return self.edit_page(
path, title, content, author_name, author_url, return_content
)
def authorize_browser(self):
url = f'{self.base_url}/getAccountInfo?access_token={self.access_token}&fields=["auth_url"]'
response = requests.get(url)
response.raise_for_status()
return response.json()["result"]["auth_url"]
def _md_to_dom(self, markdown_text):
html = markdown.markdown(
markdown_text,
extensions=["markdown.extensions.extra", "markdown.extensions.sane_lists"],
)
soup = BeautifulSoup(html, "html.parser")
def parse_element(element):
tag_dict = {"tag": element.name}
if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
if element.name == "h1":
tag_dict["tag"] = "h3"
elif element.name == "h2":
tag_dict["tag"] = "h4"
else:
tag_dict["tag"] = "p"
tag_dict["children"] = [
{"tag": "strong", "children": element.contents}
]
if element.attrs:
tag_dict["attrs"] = element.attrs
if element.contents:
children = []
for child in element.contents:
if isinstance(child, str):
children.append(child.strip())
else:
children.append(parse_element(child))
tag_dict["children"] = children
else:
if element.attrs:
tag_dict["attrs"] = element.attrs
if element.contents:
children = []
for child in element.contents:
if isinstance(child, str):
children.append(child.strip())
else:
children.append(parse_element(child))
if children:
tag_dict["children"] = children
return tag_dict
new_dom = []
for element in soup.contents:
if isinstance(element, str) and not element.strip():
continue
elif isinstance(element, str):
new_dom.append({"tag": "text", "content": element.strip()})
else:
new_dom.append(parse_element(element))
return new_dom
def upload_image(self, file_name: str) -> str:
base_url = "https://telegra.ph"
upload_url = f"{base_url}/upload"
try:
content_type = guess_type(file_name)[0]
with open(file_name, "rb") as f:
response = requests.post(
upload_url, files={"file": ("blob", f, content_type)}
)
response.raise_for_status()
# [{'src': '/file/xx.jpg'}]
response = response.json()
image_url = f"{base_url}{response[0]['src']}"
return image_url
except Exception as e:
logger.info(f"upload image: {e}")
return "https://telegra.ph/api"

209
handlers/_utils.py Normal file
View File

@ -0,0 +1,209 @@
from __future__ import annotations
import base64
import logging
import re
from functools import update_wrapper
from mimetypes import guess_type
from typing import Any, Callable, TypeVar
import requests
import telegramify_markdown
from expiringdict import ExpiringDict
from telebot import TeleBot
from telebot.types import Message
from telebot.util import smart_split
from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
T = TypeVar("T", bound=Callable)
logger = logging.getLogger("bot")
BOT_MESSAGE_LENGTH = 4000
REPLY_MESSAGE_CACHE = ExpiringDict(max_len=1000, max_age_seconds=600)
def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
"""Create the first reply message which make user feel the bot is working."""
return bot.reply_to(
message, f"*{who}* is _thinking_ \.\.\.", parse_mode="MarkdownV2"
)
def bot_reply_markdown(
reply_id: Message,
who: str,
text: str,
bot: TeleBot,
split_text: bool = True,
disable_web_page_preview: bool = False,
) -> bool:
"""
reply the Markdown by take care of the message length.
it will fallback to plain text in case of any failure
"""
try:
cache_key = f"{reply_id.chat.id}_{reply_id.message_id}"
if cache_key in REPLY_MESSAGE_CACHE and REPLY_MESSAGE_CACHE[cache_key] == text:
logger.info(f"Skipping duplicate message for {cache_key}")
return True
REPLY_MESSAGE_CACHE[cache_key] = text
if len(text.encode("utf-8")) <= BOT_MESSAGE_LENGTH or not split_text:
bot.edit_message_text(
f"*{who}*:\n{telegramify_markdown.convert(text)}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
parse_mode="MarkdownV2",
disable_web_page_preview=disable_web_page_preview,
)
return True
# Need a split of message
msgs = smart_split(text, BOT_MESSAGE_LENGTH)
bot.edit_message_text(
f"*{who}* \[1/{len(msgs)}\]:\n{telegramify_markdown.convert(msgs[0])}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
parse_mode="MarkdownV2",
disable_web_page_preview=disable_web_page_preview,
)
for i in range(1, len(msgs)):
bot.reply_to(
reply_id.reply_to_message,
f"*{who}* \[{i + 1}/{len(msgs)}\\]:\n{telegramify_markdown.convert(msgs[i])}",
parse_mode="MarkdownV2",
)
return True
except Exception:
logger.exception("Error in bot_reply_markdown")
# logger.info(f"wrong markdown format: {text}")
bot.edit_message_text(
f"*{who}*:\n{text}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
disable_web_page_preview=disable_web_page_preview,
)
return False
def extract_prompt(message: str, bot_name: str) -> str:
"""
This function filters messages for prompts.
Returns:
str: If it is not a prompt, return None. Otherwise, return the trimmed prefix of the actual prompt.
"""
# remove '@bot_name' as it is considered part of the command when in a group chat.
message = re.sub(re.escape(f"@{bot_name}"), "", message).strip()
# add a whitespace after the first colon as we separate the prompt from the command by the first whitespace.
message = re.sub(":", ": ", message, count=1).strip()
try:
left, message = message.split(maxsplit=1)
except ValueError:
return ""
if ":" not in left:
# the replacement happens in the right part, restore it.
message = message.replace(": ", ":", 1)
return message.strip()
def remove_prompt_prefix(message: str) -> str:
"""
Remove "/cmd" or "/cmd@bot_name" or "cmd:"
"""
message += " "
# Explanation of the regex pattern:
# ^ - Match the start of the string
# ( - Start of the group
# / - Literal forward slash
# [a-zA-Z] - Any letter (start of the command)
# [a-zA-Z0-9_]* - Any number of letters, digits, or underscores
# (@\w+)? - Optionally match @ followed by one or more word characters (for bot name)
# \s - A single whitespace character (space or newline)
# | - OR
# [a-zA-Z] - Any letter (start of the command)
# [a-zA-Z0-9_]* - Any number of letters, digits, or underscores
# :\s - Colon followed by a single whitespace character
# ) - End of the group
pattern = r"^(/[a-zA-Z][a-zA-Z0-9_]*(@\w+)?\s|[a-zA-Z][a-zA-Z0-9_]*:\s)"
return re.sub(pattern, "", message).strip()
def non_llm_handler(handler: T) -> T:
handler.__is_llm_handler__ = False
return handler
def wrap_handler(handler: T, bot: TeleBot) -> T:
def wrapper(message: Message, *args: Any, **kwargs: Any) -> None:
try:
if getattr(handler, "__is_llm_handler__", True):
m = ""
if message.text is not None:
m = message.text = extract_prompt(
message.text, bot.get_me().username
)
elif message.caption is not None:
m = message.caption = extract_prompt(
message.caption, bot.get_me().username
)
elif message.location and message.location.latitude is not None:
# for location map handler just return
return handler(message, *args, **kwargs)
if not m:
bot.reply_to(message, "Please provide info after start words.")
return
return handler(message, *args, **kwargs)
except Exception as e:
logger.exception("Error in handler %s: %s", handler.__name__, e)
# handle more here
if str(e).find("RECITATION") > 0:
bot.reply_to(message, "Your prompt `RECITATION` please check the log")
else:
bot.reply_to(message, "Something wrong, please check the log")
return update_wrapper(wrapper, handler)
def extract_url_from_text(text: str) -> list[str]:
extractor = URLExtract()
urls = extractor.find_urls(text)
return urls
def get_text_from_jina_reader(url: str):
try:
r = requests.get(f"https://r.jina.ai/{url}")
return r.text
except Exception as e:
logger.exception("Error fetching text from Jina reader: %s", e)
return None
def enrich_text_with_urls(text: str) -> str:
urls = extract_url_from_text(text)
for u in urls:
try:
url_text = get_text_from_jina_reader(u)
url_text = f"\n```markdown\n{url_text}\n```\n"
text = text.replace(u, url_text)
except Exception:
# just ignore the error
pass
return text
def image_to_data_uri(file_path):
content_type = guess_type(file_path)[0]
with open(file_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
return f"data:{content_type};base64,{encoded_image}"

View File

@ -1,12 +1,12 @@
from os import environ
import time
from os import environ
from openai import OpenAI
import requests
from expiringdict import ExpiringDict
from openai import OpenAI
from telebot import TeleBot
from telebot.types import Message
from telegramify_markdown import convert
from expiringdict import ExpiringDict
from . import *
@ -197,7 +197,7 @@ def yi_photo_handler(message: Message, bot: TeleBot) -> None:
}
response = requests.post(
f"https://api.lingyiwanwu.com/v1/chat/completions",
"https://api.lingyiwanwu.com/v1/chat/completions",
headers=headers,
json=payload,
).json()

View File

@ -1,27 +1,29 @@
from os import environ
import time
from openai import OpenAI
from expiringdict import ExpiringDict
from telebot import TeleBot
from telebot.types import Message
from expiringdict import ExpiringDict
from rich import print
from . import *
from telegramify_markdown import convert
from telegramify_markdown.customize import markdown_symbol
from config import settings
from ._utils import (
bot_reply_first,
bot_reply_markdown,
enrich_text_with_urls,
image_to_data_uri,
logger,
)
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
CHATGPT_API_KEY = environ.get("OPENAI_API_KEY")
CHATGPT_BASE_URL = environ.get("OPENAI_API_BASE") or "https://api.openai.com/v1"
CHATGPT_MODEL = "gpt-4o-mini-2024-07-18"
CHATGPT_PRO_MODEL = "gpt-4o-mini-2024-07-18"
CHATGPT_MODEL = settings.openai_model
CHATGPT_PRO_MODEL = settings.openai_model
client = OpenAI(api_key=CHATGPT_API_KEY, base_url=CHATGPT_BASE_URL)
client = settings.openai_client
# Global history cache
@ -31,7 +33,7 @@ chatgpt_pro_player_dict = ExpiringDict(max_len=1000, max_age_seconds=600)
def chatgpt_handler(message: Message, bot: TeleBot) -> None:
"""gpt : /gpt <question>"""
print(message)
logger.debug(message)
m = message.text.strip()
player_message = []
@ -81,8 +83,8 @@ def chatgpt_handler(message: Message, bot: TeleBot) -> None:
}
)
except Exception as e:
print(e)
except Exception:
logger.exception("ChatGPT handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
# pop my user
player_message.pop()
@ -134,7 +136,7 @@ def chatgpt_pro_handler(message: Message, bot: TeleBot) -> None:
s = ""
start = time.time()
for chunk in r:
print(chunk)
logger.debug(chunk)
if chunk.choices:
if chunk.choices[0].delta.content is None:
break
@ -145,7 +147,7 @@ def chatgpt_pro_handler(message: Message, bot: TeleBot) -> None:
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot, split_text=True)
except:
except Exception:
pass
player_message.append(
@ -155,8 +157,8 @@ def chatgpt_pro_handler(message: Message, bot: TeleBot) -> None:
}
)
except Exception as e:
print(e)
except Exception:
logger.exception("ChatGPT handler error")
# bot.reply_to(message, "answer wrong maybe up to the max token")
player_message.clear()
return
@ -205,15 +207,15 @@ def chatgpt_photo_handler(message: Message, bot: TeleBot) -> None:
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot)
except:
except Exception:
pass
except Exception as e:
print(e)
except Exception:
logger.exception("ChatGPT handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
if CHATGPT_API_KEY:
if settings.openai_api_key:
def register(bot: TeleBot) -> None:
bot.register_message_handler(chatgpt_handler, commands=["gpt"], pass_bot=True)

View File

@ -1,17 +1,16 @@
import time
from os import environ
from pathlib import Path
import time
from anthropic import Anthropic, APITimeoutError
from expiringdict import ExpiringDict
from telebot import TeleBot
from telebot.types import Message
from expiringdict import ExpiringDict
from . import *
from telegramify_markdown import convert
from telegramify_markdown.customize import markdown_symbol
from ._utils import bot_reply_first, bot_reply_markdown, enrich_text_with_urls
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol

View File

@ -1,18 +1,19 @@
from os import environ
import time
import datetime
import re
from telebot import TeleBot
from telebot.types import Message
from expiringdict import ExpiringDict
from . import *
import time
from os import environ
import cohere
from expiringdict import ExpiringDict
from telebot import TeleBot
from telebot.types import Message
from telegramify_markdown import convert
from telegramify_markdown.customize import markdown_symbol
from config import settings
from ._utils import bot_reply_first, bot_reply_markdown, enrich_text_with_urls
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
@ -21,8 +22,6 @@ COHERE_MODEL = "command-r-plus" # command-r may cause Chinese garbled code, and
if COHERE_API_KEY:
co = cohere.Client(api_key=COHERE_API_KEY)
TELEGRA_PH_TOKEN = environ.get("TELEGRA_PH_TOKEN")
ph = TelegraphAPI(TELEGRA_PH_TOKEN)
# Global history cache
cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=600)
@ -140,7 +139,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
+ source
+ f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
)
ph_s = ph.create_page_md(
ph_s = settings.telegraph_client.create_page_md(
title="Cohere", markdown_text=content
) # or edit_page with get_page so not producing massive pages
s += f"\n\n[View]({ph_s})"
@ -149,7 +148,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
bot_reply_markdown(
reply_id, who, s, bot, split_text=True, disable_web_page_preview=True
)
except:
except Exception:
pass
player_message.append(

View File

@ -1,18 +1,16 @@
import json
import time
import re
from telebot import TeleBot
from telebot.types import Message
from . import *
import time
# TODO: update requirements.txt and setup tools
# pip install dify-client
from dify_client import ChatClient
from telebot import TeleBot
from telebot.types import Message
from telegramify_markdown.customize import markdown_symbol
from ._utils import bot_reply_first, bot_reply_markdown, enrich_text_with_urls
# If you want, Customizing the head level 1 symbol
markdown_symbol.head_level_1 = "📌"
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol

View File

@ -1,11 +1,10 @@
import random
from PIL import Image, ImageDraw, ImageFont
import re
from os import listdir
from PIL import Image, ImageDraw, ImageFont
from telebot import TeleBot
from telebot.types import Message
import re
from . import *
def split_lines(text, max_length=30):
@ -157,7 +156,7 @@ def fake_photo_handler(message: Message, bot: TeleBot) -> None:
s = s.replace("/fake", "").strip()
s = s.replace("fake:", "").strip()
prompt = s.strip()
bot.reply_to(message, f"Generating LiuNeng's fake image")
bot.reply_to(message, "Generating LiuNeng's fake image")
# get the high quaility picture.
max_size_photo = max(message.photo, key=lambda p: p.file_size)
file_path = bot.get_file(max_size_photo.file_id).file_path

View File

@ -1,17 +1,16 @@
from os import environ
import re
import time
from os import environ
import google.generativeai as genai
from expiringdict import ExpiringDict
from google.generativeai import ChatSession
from google.generativeai.types.generation_types import StopCandidateException
from telebot import TeleBot
from telebot.types import Message
from expiringdict import ExpiringDict
from telegramify_markdown.customize import markdown_symbol
from . import *
from ._utils import bot_reply_first, bot_reply_markdown, enrich_text_with_urls, logger
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
@ -166,11 +165,11 @@ def gemini_pro_handler(message: Message, bot: TeleBot) -> None:
player.history.clear()
return
except Exception as e:
print(e)
logger.exception("Gemini audio handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
try:
player.history.clear()
except:
except Exception:
print(f"\n------\n{who} history.clear() Error / Unstoppable\n------\n")
return
@ -207,10 +206,10 @@ def gemini_photo_handler(message: Message, bot: TeleBot) -> None:
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot)
except:
except Exception:
pass
except Exception as e:
print(e)
logger.exception("Gemini photo handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
@ -248,11 +247,11 @@ def gemini_audio_handler(message: Message, bot: TeleBot) -> None:
player.history.clear()
return
except Exception as e:
print(e)
logger.exception("Gemini audio handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
try:
player.history.clear()
except:
except Exception:
print(f"\n------\n{who} history.clear() Error / Unstoppable\n------\n")
return

View File

@ -15,7 +15,6 @@ def github_poster_handler(message: Message, bot: TeleBot):
cmd_list.append("--year")
cmd_list.append(years.strip())
r = subprocess.check_output(cmd_list).decode("utf-8")
try:
if "done" in r:
# TODO windows path
r = subprocess.check_output(
@ -25,8 +24,6 @@ def github_poster_handler(message: Message, bot: TeleBot):
bot.send_photo(
message.chat.id, photo, reply_to_message_id=message.message_id
)
except:
bot.reply_to(message, "github poster error")
def register(bot: TeleBot) -> None:

View File

@ -1,13 +1,13 @@
import re
from telebot import TeleBot
from telebot.types import Message
from telebot.types import InputMediaPhoto
from os import environ
import requests
from expiringdict import ExpiringDict
from kling import ImageGen, VideoGen
import requests
from telebot import TeleBot
from telebot.types import InputMediaPhoto, Message
from . import *
from ._utils import logger
KLING_COOKIE = environ.get("KLING_COOKIE")
pngs_link_dict = ExpiringDict(max_len=100, max_age_seconds=60 * 10)
@ -17,7 +17,7 @@ def kling_handler(message: Message, bot: TeleBot):
"""kling: /kling <address>"""
bot.reply_to(
message,
f"Generating pretty kling image may take some time please wait",
"Generating pretty kling image may take some time please wait",
)
m = message.text.strip()
prompt = m.strip()
@ -47,7 +47,7 @@ def kling_pro_handler(message: Message, bot: TeleBot):
"""kling: /kling <address>"""
bot.reply_to(
message,
f"Generating pretty kling video may take a long time about 2mins to 5mins please wait",
"Generating pretty kling video may take a long time about 2mins to 5mins please wait",
)
m = message.text.strip()
prompt = m.strip()
@ -98,7 +98,7 @@ def kling_photo_handler(message: Message, bot: TeleBot) -> None:
downloaded_file = bot.download_file(file_path)
bot.reply_to(
message,
f"Generating pretty kling image using your photo may take some time please wait",
"Generating pretty kling image using your photo may take some time please wait",
)
with open("kling.jpg", "wb") as temp_file:
temp_file.write(downloaded_file)
@ -109,10 +109,10 @@ def kling_photo_handler(message: Message, bot: TeleBot) -> None:
# set the dict
try:
pngs_link_dict[str(message.from_user.id)] = links
except Exception as e:
print(str(e))
except Exception as e:
print(str(e))
except Exception:
logger.exception("Kling photo handler error")
except Exception:
logger.exception("Kling photo handler error")
bot.reply_to(message, "kling error maybe block the prompt")
return
photos_list = [InputMediaPhoto(i) for i in links]

View File

@ -1,16 +1,15 @@
from os import environ
import time
from os import environ
from expiringdict import ExpiringDict
from groq import Groq
from telebot import TeleBot
from telebot.types import Message
from expiringdict import ExpiringDict
from . import *
from groq import Groq
from telegramify_markdown import convert
from telegramify_markdown.customize import markdown_symbol
from ._utils import bot_reply_first, bot_reply_markdown, enrich_text_with_urls, logger
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
@ -75,8 +74,8 @@ def llama_handler(message: Message, bot: TeleBot) -> None:
}
)
except Exception as e:
print(e)
except Exception:
logger.exception("Llama handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
# pop my user
player_message.pop()
@ -148,8 +147,8 @@ def llama_pro_handler(message: Message, bot: TeleBot) -> None:
}
)
except Exception as e:
print(e)
except Exception:
logger.exception("Llama Pro handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
player_message.clear()
return

View File

@ -1,12 +1,11 @@
import gc
import shutil
import random
import shutil
from tempfile import SpooledTemporaryFile
import numpy as np
import PIL
import PIL.Image
from matplotlib import figure
from PIL import Image
from prettymapp.geo import get_aoi
from prettymapp.osm import get_osm_geometries
from prettymapp.plotting import Plot as PrettyPlot
@ -58,7 +57,7 @@ def sizeof_image(image):
def compress_image(input_image, output_image, target_size):
quality = 95
factor = 1.0
with Image.open(input_image) as img:
with PIL.Image.open(input_image) as img:
while sizeof_image(img) > target_size:
factor -= 0.05
width, height = img.size

View File

@ -1,16 +1,15 @@
# qwen use https://api.together.xyz
from os import environ
import time
from os import environ
from expiringdict import ExpiringDict
from telebot import TeleBot
from telebot.types import Message
from expiringdict import ExpiringDict
from . import *
from together import Together
from telegramify_markdown import convert
from telegramify_markdown.customize import markdown_symbol
from together import Together
from ._utils import bot_reply_first, bot_reply_markdown, enrich_text_with_urls, logger
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
@ -77,8 +76,8 @@ def qwen_handler(message: Message, bot: TeleBot) -> None:
}
)
except Exception as e:
print(e)
except Exception:
logger.exception("Qwen handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
# pop my user
player_message.pop()
@ -150,8 +149,8 @@ def qwen_pro_handler(message: Message, bot: TeleBot) -> None:
}
)
except Exception as e:
print(e)
except Exception:
logger.exception("Qwen Pro handler error")
bot.reply_to(message, "answer wrong maybe up to the max token")
player_message.clear()
return

View File

@ -1,20 +1,15 @@
from telebot import TeleBot
from telebot.types import Message
import requests
from openai import OpenAI
from os import environ
from . import *
import requests
from telebot import TeleBot
from telebot.types import Message
from config import settings
SD_API_KEY = environ.get("SD3_KEY")
# TODO refactor this shit to __init__
CHATGPT_API_KEY = environ.get("OPENAI_API_KEY")
CHATGPT_BASE_URL = environ.get("OPENAI_API_BASE") or "https://api.openai.com/v1"
CHATGPT_PRO_MODEL = "gpt-4o-2024-05-13"
client = OpenAI(api_key=CHATGPT_API_KEY, base_url=CHATGPT_BASE_URL)
CHATGPT_PRO_MODEL = settings.openai_model
def get_user_balance():
@ -33,7 +28,7 @@ def get_user_balance():
def generate_sd3_image(prompt):
response = requests.post(
f"https://api.stability.ai/v2beta/stable-image/generate/sd3",
"https://api.stability.ai/v2beta/stable-image/generate/sd3",
headers={"authorization": f"Bearer {SD_API_KEY}", "accept": "image/*"},
files={"none": ""},
data={
@ -61,18 +56,14 @@ def sd_handler(message: Message, bot: TeleBot):
)
m = message.text.strip()
prompt = m.strip()
try:
r = generate_sd3_image(prompt)
if r:
with open(f"sd3.jpeg", "rb") as photo:
with open("sd3.jpeg", "rb") as photo:
bot.send_photo(
message.chat.id, photo, reply_to_message_id=message.message_id
)
else:
bot.reply_to(message, "prompt error")
except Exception as e:
print(e)
bot.reply_to(message, "sd3 error")
def sd_pro_handler(message: Message, bot: TeleBot):
@ -83,7 +74,7 @@ def sd_pro_handler(message: Message, bot: TeleBot):
rewrite_prompt = (
f"revise `{prompt}` to a DALL-E prompt only return the prompt in English."
)
completion = client.chat.completions.create(
completion = settings.openai_client.chat.completions.create(
messages=[{"role": "user", "content": rewrite_prompt}],
max_tokens=2048,
model=CHATGPT_PRO_MODEL,
@ -95,21 +86,17 @@ def sd_pro_handler(message: Message, bot: TeleBot):
message,
f"Generating pretty sd3-turbo image may take some time please left credits {credits} every try will cost 4 criedits wait:\n the real prompt is: {sd_prompt}",
)
try:
r = generate_sd3_image(sd_prompt)
if r:
with open(f"sd3.jpeg", "rb") as photo:
with open("sd3.jpeg", "rb") as photo:
bot.send_photo(
message.chat.id, photo, reply_to_message_id=message.message_id
)
else:
bot.reply_to(message, "prompt error")
except Exception as e:
print(e)
bot.reply_to(message, "sd3 error")
if SD_API_KEY and CHATGPT_API_KEY:
if SD_API_KEY and settings.openai_api_key:
def register(bot: TeleBot) -> None:
bot.register_message_handler(sd_handler, commands=["sd3"], pass_bot=True)

View File

@ -0,0 +1,139 @@
from __future__ import annotations
import logging
from datetime import datetime, timezone
from functools import partial
import telegramify_markdown
from telebot import TeleBot
from telebot.types import Message
from config import settings
from handlers._utils import non_llm_handler
from .messages import ChatMessage, MessageStore
from .utils import PROMPT, filter_message, parse_date
logger = logging.getLogger("bot")
store = MessageStore("data/messages.db")
@non_llm_handler
def handle_message(message: Message):
logger.debug(
"Received message: %s, chat_id=%d, from=%s",
message.text,
message.chat.id,
message.from_user.id,
)
# 这里可以添加处理消息的逻辑
store.add_message(
ChatMessage(
chat_id=message.chat.id,
message_id=message.id,
content=message.text or "",
user_id=message.from_user.id,
user_name=message.from_user.full_name,
timestamp=datetime.fromtimestamp(message.date, tz=timezone.utc),
)
)
@non_llm_handler
def summary_command(message: Message, bot: TeleBot):
"""生成消息摘要。示例:/summary today; /summary 2d"""
text_parts = message.text.split(maxsplit=1)
if len(text_parts) < 2:
date = "today"
else:
date = text_parts[1].strip()
since, now = parse_date(date, settings.timezone)
messages = store.get_messages_since(message.chat.id, since)
messages_text = "\n".join(
f"{msg.timestamp.isoformat()} - @{msg.user_name}: {msg.content}"
for msg in messages
)
if not messages_text:
bot.reply_to(message, "没有找到指定时间范围内的历史消息。")
return
new_message = bot.reply_to(message, "正在生成摘要,请稍候...")
response = settings.openai_client.chat.completions.create(
model=settings.openai_model,
messages=[
{"role": "user", "content": PROMPT.format(messages=messages_text)},
],
)
reply_text = f"""*👇 前情提要 👇 \\({since.strftime("%Y/%m/%d %H:%M")} \\- {now.strftime("%Y/%m/%d %H:%M")}\\)*
{telegramify_markdown.convert(response.choices[0].message.content)}
"""
logger.debug("Generated summary:\n%s", reply_text)
bot.edit_message_text(
chat_id=new_message.chat.id,
message_id=new_message.message_id,
text=reply_text,
parse_mode="MarkdownV2",
)
@non_llm_handler
def stats_command(message: Message, bot: TeleBot):
"""获取群组消息统计信息"""
stats = store.get_stats(message.chat.id)
if not stats:
bot.reply_to(message, "没有找到任何统计信息。")
return
stats_text = "\n".join(
f"{entry.date}: {entry.message_count} messages" for entry in stats
)
bot.reply_to(
message,
f"📊 群组消息统计信息:\n```\n{stats_text}\n```",
parse_mode="MarkdownV2",
)
@non_llm_handler
def search_command(message: Message, bot: TeleBot):
"""搜索群组消息(示例:/search 关键词 [N]"""
text_parts = message.text.split(maxsplit=2)
if len(text_parts) < 2:
bot.reply_to(message, "请提供要搜索的关键词。")
return
keyword = text_parts[1].strip()
if len(text_parts) > 2 and text_parts[2].isdigit():
limit = int(text_parts[2])
else:
limit = 10
messages = store.search_messages(message.chat.id, keyword, limit=limit)
if not messages:
bot.reply_to(message, "没有找到匹配的消息。")
return
chat_id = str(message.chat.id)
if chat_id.startswith("-100"):
chat_id = chat_id[4:]
items = []
for msg in messages:
link = f"https://t.me/c/{chat_id}/{msg.message_id}"
items.append(f"{link}\n```\n{msg.content}\n```")
message_text = telegramify_markdown.convert("\n".join(items))
bot.reply_to(
message,
f"🔍 *搜索结果(只显示前 {limit} 个):*\n{message_text}",
parse_mode="MarkdownV2",
)
load_priority = 5
if settings.openai_api_key:
def register(bot: TeleBot):
"""注册命令处理器"""
bot.register_message_handler(
summary_command, commands=["summary"], pass_bot=True
)
bot.register_message_handler(stats_command, commands=["stats"], pass_bot=True)
bot.register_message_handler(search_command, commands=["search"], pass_bot=True)
bot.register_message_handler(
handle_message, func=partial(filter_message, bot=bot)
)

View File

@ -0,0 +1,49 @@
from __future__ import annotations
import asyncio
import os
import sys
from .messages import ChatMessage, MessageStore
async def fetch_messages(chat_id: int) -> None:
from telethon import TelegramClient
from telethon.tl.types import Message
store = MessageStore("data/messages.db")
api_id = int(os.getenv("TELEGRAM_API_ID"))
api_hash = os.getenv("TELEGRAM_API_HASH")
async with TelegramClient("test", api_id, api_hash) as client:
assert isinstance(client, TelegramClient)
with store.connect() as conn:
async for message in client.iter_messages(chat_id, reverse=True):
if not isinstance(message, Message) or not message.message:
continue
if not message.from_id:
continue
print(message.pretty_format(message))
user = await client.get_entity(message.from_id)
fullname = user.first_name
if user.last_name:
fullname += f" {user.last_name}"
store.add_message(
ChatMessage(
chat_id=chat_id,
message_id=message.id,
content=message.message,
user_id=message.from_id.user_id,
user_name=fullname,
timestamp=message.date,
),
conn=conn,
)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python -m handlers.summary <chat_id>")
sys.exit(1)
chat_id = int(sys.argv[1])
asyncio.run(fetch_messages(chat_id)) # 替换为实际的群组ID

View File

@ -0,0 +1,164 @@
import os
import sqlite3
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
@dataclass(frozen=True)
class ChatMessage:
chat_id: int
message_id: int
content: str
user_id: int
user_name: str
timestamp: datetime
@dataclass(frozen=True)
class StatsEntry:
date: str
message_count: int
class MessageStore:
def __init__(self, db_file: str):
parent_folder = os.path.dirname(db_file)
if not os.path.exists(parent_folder):
os.makedirs(parent_folder)
self._db_file = db_file
self._init_db()
def connect(self) -> sqlite3.Connection:
"""Create a new database connection."""
return sqlite3.connect(self._db_file)
def _init_db(self):
with self.connect() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS messages (
chat_id INTEGER,
message_id INTEGER,
content TEXT,
user_id INTEGER,
user_name TEXT,
timestamp TEXT,
PRIMARY KEY (chat_id, message_id)
);
"""
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_chat_timestamp ON messages (chat_id, timestamp);
"""
)
conn.commit()
def add_message(
self, message: ChatMessage, conn: sqlite3.Connection | None = None
) -> None:
need_close = False
if conn is None:
conn = self.connect()
need_close = True
try:
conn.execute(
"""
INSERT OR REPLACE INTO messages (chat_id, message_id, content, user_id, user_name, timestamp)
VALUES (?, ?, ?, ?, ?, ?);
""",
(
message.chat_id,
message.message_id,
message.content,
message.user_id,
message.user_name,
message.timestamp.isoformat(),
),
)
self._clean_old_messages(message.chat_id, conn)
conn.commit()
finally:
if need_close:
conn.close()
def get_messages_since(self, chat_id: int, since: datetime) -> list[ChatMessage]:
with self.connect() as conn:
cursor = conn.cursor()
cursor.execute(
"""
SELECT chat_id, message_id, content, user_id, user_name, timestamp
FROM messages
WHERE chat_id = ? AND timestamp >= ?
ORDER BY timestamp ASC;
""",
(chat_id, since.isoformat()),
)
rows = cursor.fetchall()
return [
ChatMessage(
chat_id=row[0],
message_id=row[1],
content=row[2],
user_id=row[3],
user_name=row[4],
timestamp=datetime.fromisoformat(row[5]),
)
for row in rows
]
def get_stats(self, chat_id: int) -> list[StatsEntry]:
with self.connect() as conn:
self._clean_old_messages(chat_id, conn)
cursor = conn.cursor()
cursor.execute(
"""
SELECT DATE(timestamp), COUNT(*)
FROM messages
WHERE chat_id = ?
GROUP BY DATE(timestamp)
ORDER BY DATE(timestamp) ASC;
""",
(chat_id,),
)
rows = cursor.fetchall()
return [StatsEntry(date=row[0], message_count=row[1]) for row in rows]
def search_messages(
self, chat_id: int, keyword: str, limit: int = 10
) -> list[ChatMessage]:
# TODO: Fuzzy search with full-text search or similar
with self.connect() as conn:
cursor = conn.cursor()
cursor.execute(
"""
SELECT chat_id, message_id, content, user_id, user_name, timestamp
FROM messages
WHERE chat_id = ? AND content LIKE ?
ORDER BY timestamp DESC
LIMIT ?;
""",
(chat_id, f"%{keyword}%", limit),
)
rows = cursor.fetchall()
return [
ChatMessage(
chat_id=row[0],
message_id=row[1],
content=row[2],
user_id=row[3],
user_name=row[4],
timestamp=datetime.fromisoformat(row[5]),
)
for row in rows
]
def _clean_old_messages(
self, chat_id: int, conn: sqlite3.Connection, days: int = 7
) -> None:
cursor = conn.cursor()
threshold_date = datetime.now(tz=timezone.utc) - timedelta(days=days)
cursor.execute(
"DELETE FROM messages WHERE chat_id = ? AND timestamp < ?;",
(chat_id, threshold_date.isoformat()),
)

48
handlers/summary/utils.py Normal file
View File

@ -0,0 +1,48 @@
import re
import zoneinfo
from datetime import datetime, timedelta
from telebot import TeleBot
from telebot.types import Message
PROMPT = """\
请将下面的聊天记录进行总结,包含讨论了哪些话题,有哪些亮点发言和主要观点。
引用用户名请加粗。直接返回内容即可,不要包含引导词和标题。
--- Messages Start ---
{messages}
--- Messages End ---
"""
def filter_message(message: Message, bot: TeleBot) -> bool:
"""过滤消息,排除非文本消息和命令消息"""
if not message.text:
return False
if not message.from_user:
return False
if message.from_user.id == bot.get_me().id:
return False
if message.text.startswith("/"):
return False
return True
date_regex = re.compile(r"^(\d+)([dhm])$")
def parse_date(date_str: str, locale: str) -> tuple[datetime, datetime]:
date_str = date_str.strip().lower()
now = datetime.now(tz=zoneinfo.ZoneInfo(locale))
if date_str == "today":
return now.replace(hour=0, minute=0, second=0, microsecond=0), now
elif m := date_regex.match(date_str):
number = int(m.group(1))
unit = m.group(2)
match unit:
case "d":
return now - timedelta(days=number), now
case "h":
return now - timedelta(hours=number), now
case "m":
return now - timedelta(minutes=number), now
raise ValueError(f"Unsupported date format: {date_str}")

View File

@ -1,8 +1,8 @@
from urlextract import URLExtract
from telebot import TeleBot
from telebot.types import Message
from urlextract import URLExtract
from . import *
from ._utils import bot_reply_first, bot_reply_markdown
def tweet_handler(message: Message, bot: TeleBot):

File diff suppressed because it is too large Load Diff

928
pdm.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
[project]
name = "tg_bot_collections"
# PEP 621 project metadata
# See https://www.python.org/dev/peps/pep-0621/
dependencies = [
@ -17,11 +18,20 @@ dependencies = [
"groq",
"together>=1.1.5",
"dify-client>=0.1.10",
"chattts-fork>=0.0.1",
"expiringdict>=1.2.2",
"beautifulsoup4>=4.12.3",
"Markdown>=3.6",
"cohere>=5.5.8",
"kling-creator>=0.0.3",
"pydantic-settings>=2.10.1",
"pydantic>=2.11.7",
"telethon>=1.40.0",
"pysocks>=1.7.1",
]
requires-python = ">=3.10"
[tool.pdm]
distribution = false
[tool.pdm.scripts]
dev = "python tg.py --debug"

View File

@ -5,7 +5,6 @@ aiohttp==3.9.5
aiosignal==1.3.1
annotated-types==0.6.0
anthropic==0.32.0
antlr4-python3-runtime==4.9.3
anyio==4.3.0
async-timeout==4.0.3; python_version < "3.11"
attrs==23.2.0
@ -18,7 +17,6 @@ cairosvg==2.7.1
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
chattts-fork==0.0.8
click==8.1.7
click-plugins==1.1.1
cligj==0.7.2
@ -31,10 +29,7 @@ cycler==0.12.1
defusedxml==0.7.1
dify-client==0.1.10
distro==1.9.0
einops==0.8.0
einx==0.2.2
emoji==2.11.1
encodec==0.1.1
eval-type-backport==0.2.0
exceptiongroup==1.2.1; python_version < "3.11"
expiringdict==1.2.2
@ -43,13 +38,12 @@ fastavro==1.9.4
filelock==3.14.0
fiona==1.9.6
fonttools==4.51.0
frozendict==2.4.4
frozenlist==1.4.1
fsspec==2024.3.1
geopandas==0.14.4
github-poster==2.7.4
google-ai-generativelanguage==0.6.6
google-api-core==2.19.0
google-api-core[grpc]==2.19.0
google-api-python-client==2.128.0
google-auth==2.29.0
google-auth-httplib2==0.2.0
@ -65,36 +59,18 @@ httpx==0.27.0
httpx-sse==0.4.0
huggingface-hub==0.23.0
idna==3.7
intel-openmp==2021.4.0; platform_system == "Windows"
jinja2==3.1.4
jiter==0.5.0
jmespath==1.0.1
kiwisolver==1.4.5
kling-creator==0.3.0
markdown==3.6
markdown-it-py==3.0.0
markupsafe==2.1.5
matplotlib==3.8.4
mdurl==0.1.2
mistletoe==1.4.0
mkl==2021.4.0; platform_system == "Windows"
mpmath==1.3.0
multidict==6.0.5
networkx==3.3
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-cuda-cupti-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-cuda-runtime-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-cudnn-cu12==8.9.2.26; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-cufft-cu12==11.0.2.54; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-curand-cu12==10.3.2.106; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-cusolver-cu12==11.4.5.107; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-cusparse-cu12==12.1.0.106; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-nccl-cu12==2.20.5; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-nvjitlink-cu12==12.5.40; platform_system == "Linux" and platform_machine == "x86_64"
nvidia-nvtx-cu12==12.1.105; platform_system == "Linux" and platform_machine == "x86_64"
omegaconf==2.3.0
openai==1.37.2
osmnx==1.9.2
packaging==24.0
@ -106,55 +82,50 @@ platformdirs==4.2.1
prettymapp==0.3.0
proto-plus==1.23.0
protobuf==4.25.3
pyaes==1.6.1
pyarrow==16.0.0
pyasn1==0.6.0
pyasn1-modules==0.4.0
pycparser==2.22
pydantic==2.7.1
pydantic-core==2.18.2
pydantic==2.11.7
pydantic-core==2.33.2
pydantic-settings==2.10.1
pygments==2.18.0
pyogrio==0.7.2
pyparsing==3.1.2
pyproj==3.6.1
pysocks==1.7.1
pytelegrambotapi==4.21.0
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
pytz==2024.1
pyyaml==6.0.1
regex==2024.5.15
requests==2.32.3
rich==13.7.1
rsa==4.9
s3transfer==0.10.2
safetensors==0.4.3
scipy==1.13.1
shapely==2.0.4
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
soupsieve==2.5
svgwrite==1.4.3
sympy==1.12
tabulate==0.9.0
tbb==2021.12.0; platform_system == "Windows"
telegramify-markdown==0.1.9
telethon==1.40.0
time-machine==2.14.1; implementation_name != "pypy"
tinycss2==1.3.0
together==1.2.5
tokenizers==0.19.1
torch==2.3.0
torchaudio==2.3.0
tqdm==4.66.4
transformers==4.41.1
triton==2.3.0; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12"
typer==0.12.3
types-requests==2.32.0.20240622
typing-extensions==4.11.0
typing-extensions==4.14.1
typing-inspection==0.4.1
tzdata==2024.1
uritemplate==4.1.1
uritools==4.0.2
urlextract==1.9.0
urllib3==2.2.1
vector-quantize-pytorch==1.14.24
vocos==0.1.0
webencodings==0.5.1
yarl==1.9.4

View File

@ -7,20 +7,20 @@ service_name="tgbotyh"
source .env
google_gemini_api_key="${Google_Gemini_API_Key}"
telegram_bot_token="${Telegram_Bot_Token}"
anthropic_api_key="${Anthropic_API_Key}"
openai_api_key="${Openai_API_Key}"
yi_api_key="${Yi_API_Key}"
yi_base_url="${Yi_Base_Url}"
google_gemini_api_key="${GOOGLE_GEMINI_API_KEY}"
telegram_bot_token="${TELEGRAM_BOT_TOKEN}"
anthropic_api_key="${ANTHROPIC_API_KEY}"
openai_api_key="${OPENAI_API_KEY}"
yi_api_key="${YI_API_KEY}"
yi_base_url="${YI_BASE_URL}"
if [ -n "$Python_Bin_Path" ]; then
python_bin_path="$Python_Bin_Path"
if [ -n "$PYTHON_BIN_PATH" ]; then
python_bin_path="$PYTHON_BIN_PATH"
fi
if [ -n "$Python_Venv_Path" ]; then
venv_dir="${Python_Venv_Path}"
if [ -n "$PYTHON_VENV_PATH" ]; then
venv_dir="${PYTHON_VENV_PATH}"
fi
sudoCmd=""

29
tg.py
View File

@ -1,13 +1,34 @@
import argparse
import logging
from telebot import TeleBot
from config import settings
from handlers import list_available_commands, load_handlers
logger = logging.getLogger("bot")
def setup_logging(debug: bool):
logger.setLevel(logging.DEBUG if debug else logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(
logging.Formatter(
"%(asctime)s - [%(levelname)s] - %(filename)s:%(lineno)d - %(message)s"
)
)
logger.addHandler(handler)
def main():
# Init args
parser = argparse.ArgumentParser()
parser.add_argument("tg_token", help="tg token")
parser.add_argument(
"tg_token", help="tg token", default=settings.telegram_bot_token, nargs="?"
)
parser.add_argument(
"--debug", "--verbose", "-v", action="store_true", help="Enable debug mode"
)
# 'disable-command' option
# The action 'append' will allow multiple entries to be saved into a list
@ -22,15 +43,15 @@ def main():
)
options = parser.parse_args()
print("Arg parse done.")
setup_logging(options.debug)
# Init bot
bot = TeleBot(options.tg_token)
load_handlers(bot, options.disable_commands)
print("Bot init done.")
logger.info("Bot init done.")
# Start bot
print("Starting tg collections bot.")
logger.info("Starting tg collections bot.")
bot.infinity_polling(timeout=10, long_polling_timeout=5)