mirror of
https://github.com/cdryzun/tg_bot_collections.git
synced 2025-04-29 00:27:09 +08:00
fix: cohere long answer and garbled code
This commit is contained in:
parent
4cf8c8f8d9
commit
5fb3fc49d0
@ -1,6 +1,7 @@
|
|||||||
from os import environ
|
from os import environ
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
from telebot import TeleBot
|
from telebot import TeleBot
|
||||||
from telebot.types import Message
|
from telebot.types import Message
|
||||||
@ -16,7 +17,7 @@ markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level
|
|||||||
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
|
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
|
||||||
|
|
||||||
COHERE_API_KEY = environ.get("COHERE_API_KEY")
|
COHERE_API_KEY = environ.get("COHERE_API_KEY")
|
||||||
COHERE_MODEL = "command-r-plus"
|
COHERE_MODEL = "command-r-plus" # command-r may cause Chinese garbled code, and non stream mode also may cause garbled code.
|
||||||
if COHERE_API_KEY:
|
if COHERE_API_KEY:
|
||||||
co = cohere.Client(api_key=COHERE_API_KEY)
|
co = cohere.Client(api_key=COHERE_API_KEY)
|
||||||
|
|
||||||
@ -27,98 +28,27 @@ ph = TelegraphAPI(TELEGRA_PH_TOKEN)
|
|||||||
cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300)
|
cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300)
|
||||||
|
|
||||||
|
|
||||||
def cohere_handler_direct(message: Message, bot: TeleBot) -> None:
|
def clean_text(text):
|
||||||
"""cohere : /cohere <question>"""
|
"""Clean up the garbled code in the UTF-8 encoded Chinese string.
|
||||||
m = message.text.strip()
|
|
||||||
|
|
||||||
player_message = []
|
Args:
|
||||||
if str(message.from_user.id) not in cohere_player_dict:
|
text: String that needs to be cleaned.
|
||||||
cohere_player_dict[str(message.from_user.id)] = player_message
|
|
||||||
|
Returns:
|
||||||
|
The cleaned string, if garbled code is detected, a prompt message is added at the end.
|
||||||
|
"""
|
||||||
|
if "<EFBFBD>" in text:
|
||||||
|
# Use re.sub to clean up garbled code
|
||||||
|
cleaned_text = re.sub(r"<EFBFBD>.*?([,。!?;:]|$)", r"\1", text)
|
||||||
|
cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
|
||||||
|
print(f"\n---------\nOriginal text:\n{text}\n---------")
|
||||||
|
return cleaned_text + "\n\n~~(乱码已去除,可能存在错误,请注意)~~"
|
||||||
else:
|
else:
|
||||||
player_message = cohere_player_dict[str(message.from_user.id)]
|
return text
|
||||||
|
|
||||||
if m.strip() == "clear":
|
|
||||||
bot.reply_to(
|
|
||||||
message,
|
|
||||||
"Just cleared your Cohere messages history",
|
|
||||||
)
|
|
||||||
player_message.clear()
|
|
||||||
return
|
|
||||||
|
|
||||||
if m[:4].lower() == "new ":
|
|
||||||
m = m[4:].strip()
|
|
||||||
player_message.clear()
|
|
||||||
|
|
||||||
m = enrich_text_with_urls(m)
|
|
||||||
|
|
||||||
who = "Command R Plus"
|
|
||||||
reply_id = bot_reply_first(message, who, bot)
|
|
||||||
|
|
||||||
player_message.append({"role": "User", "message": m})
|
|
||||||
# keep the last 5, every has two ask and answer.
|
|
||||||
if len(player_message) > 10:
|
|
||||||
player_message = player_message[2:]
|
|
||||||
|
|
||||||
try:
|
|
||||||
stream = co.chat_stream(
|
|
||||||
model=COHERE_MODEL,
|
|
||||||
message=m,
|
|
||||||
temperature=0.8,
|
|
||||||
chat_history=player_message,
|
|
||||||
prompt_truncation="AUTO",
|
|
||||||
connectors=[{"id": "web-search"}],
|
|
||||||
citation_quality="accurate",
|
|
||||||
preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
|
|
||||||
)
|
|
||||||
|
|
||||||
s = ""
|
|
||||||
source = ""
|
|
||||||
start = time.time()
|
|
||||||
for event in stream:
|
|
||||||
if event.event_type == "stream-start":
|
|
||||||
bot_reply_markdown(reply_id, who, "Thinking...", bot)
|
|
||||||
elif event.event_type == "search-queries-generation":
|
|
||||||
bot_reply_markdown(reply_id, who, "Searching online...", bot)
|
|
||||||
elif event.event_type == "search-results":
|
|
||||||
bot_reply_markdown(reply_id, who, "Reading...", bot)
|
|
||||||
for doc in event.documents:
|
|
||||||
source += f"\n[{doc['title']}]({doc['url']})"
|
|
||||||
elif event.event_type == "text-generation":
|
|
||||||
s += event.text.encode("utf-8").decode("utf-8")
|
|
||||||
if time.time() - start > 0.4:
|
|
||||||
start = time.time()
|
|
||||||
bot_reply_markdown(
|
|
||||||
reply_id,
|
|
||||||
who,
|
|
||||||
f"\nStill thinking{len(s)}...",
|
|
||||||
bot,
|
|
||||||
split_text=True,
|
|
||||||
)
|
|
||||||
elif event.event_type == "stream-end":
|
|
||||||
break
|
|
||||||
s += "\n" + source + "\n"
|
|
||||||
|
|
||||||
try:
|
|
||||||
bot_reply_markdown(reply_id, who, s, bot, split_text=True)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
player_message.append(
|
|
||||||
{
|
|
||||||
"role": "Chatbot",
|
|
||||||
"message": convert(s),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
bot_reply_markdown(reply_id, who, "Answer wrong", bot)
|
|
||||||
player_message.clear()
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
def cohere_handler(message: Message, bot: TeleBot) -> None:
|
def cohere_handler(message: Message, bot: TeleBot) -> None:
|
||||||
"""cohere : /cohere <question> This will return a telegraph link"""
|
"""cohere : /cohere_pro <question> Come with a telegraph link"""
|
||||||
m = message.text.strip()
|
m = message.text.strip()
|
||||||
|
|
||||||
player_message = []
|
player_message = []
|
||||||
@ -150,6 +80,14 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
|
|||||||
player_message = player_message[2:]
|
player_message = player_message[2:]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
current_time = datetime.datetime.now(datetime.timezone.utc)
|
||||||
|
preamble = (
|
||||||
|
f"You are Command, a large language model trained to have polite, helpful, and inclusive conversations with people. Your responses should be accurate and graceful in user's original language."
|
||||||
|
f"The current UTC time is {current_time.strftime('%Y-%m-%d %H:%M:%S')}, "
|
||||||
|
f"UTC-4 (e.g. New York) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-4))).strftime('%Y-%m-%d %H:%M:%S')}, "
|
||||||
|
f"UTC-7 (e.g. Los Angeles) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, "
|
||||||
|
f"and UTC+8 (e.g. Beijing) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}."
|
||||||
|
)
|
||||||
stream = co.chat_stream(
|
stream = co.chat_stream(
|
||||||
model=COHERE_MODEL,
|
model=COHERE_MODEL,
|
||||||
message=m,
|
message=m,
|
||||||
@ -158,7 +96,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
|
|||||||
prompt_truncation="AUTO",
|
prompt_truncation="AUTO",
|
||||||
connectors=[{"id": "web-search"}],
|
connectors=[{"id": "web-search"}],
|
||||||
citation_quality="accurate",
|
citation_quality="accurate",
|
||||||
preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
|
preamble=preamble,
|
||||||
)
|
)
|
||||||
|
|
||||||
s = ""
|
s = ""
|
||||||
@ -175,22 +113,32 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
|
|||||||
source += f"\n{doc['title']}\n{doc['url']}\n"
|
source += f"\n{doc['title']}\n{doc['url']}\n"
|
||||||
elif event.event_type == "text-generation":
|
elif event.event_type == "text-generation":
|
||||||
s += event.text.encode("utf-8").decode("utf-8")
|
s += event.text.encode("utf-8").decode("utf-8")
|
||||||
if time.time() - start > 0.4:
|
if time.time() - start > 1.4:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
bot_reply_markdown(
|
s = clean_text(s)
|
||||||
reply_id,
|
if len(s) > 3900:
|
||||||
who,
|
bot_reply_markdown(
|
||||||
f"\nStill thinking{len(s)}...",
|
reply_id,
|
||||||
bot,
|
who,
|
||||||
split_text=True,
|
f"\nStill thinking{len(s)}...\n",
|
||||||
)
|
bot,
|
||||||
|
split_text=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
bot_reply_markdown(
|
||||||
|
reply_id,
|
||||||
|
who,
|
||||||
|
f"\nStill thinking{len(s)}...\n{s}",
|
||||||
|
bot,
|
||||||
|
split_text=True,
|
||||||
|
)
|
||||||
elif event.event_type == "stream-end":
|
elif event.event_type == "stream-end":
|
||||||
break
|
break
|
||||||
content = (
|
content = (
|
||||||
s
|
s
|
||||||
+ "\n------\n------\n"
|
+ "\n------\n------\n"
|
||||||
+ source
|
+ source
|
||||||
+ f"\n------\n------\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
+ f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
|
||||||
)
|
)
|
||||||
ph_s = ph.create_page_md(
|
ph_s = ph.create_page_md(
|
||||||
title="Cohere", markdown_text=content
|
title="Cohere", markdown_text=content
|
||||||
@ -218,14 +166,6 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
|
|||||||
|
|
||||||
if COHERE_API_KEY:
|
if COHERE_API_KEY:
|
||||||
|
|
||||||
def register(bot: TeleBot) -> None:
|
|
||||||
bot.register_message_handler(
|
|
||||||
cohere_handler_direct, commands=["cohere_no_ph"], pass_bot=True
|
|
||||||
)
|
|
||||||
bot.register_message_handler(
|
|
||||||
cohere_handler_direct, regexp="^cohere_no_ph:", pass_bot=True
|
|
||||||
)
|
|
||||||
|
|
||||||
def register(bot: TeleBot) -> None:
|
def register(bot: TeleBot) -> None:
|
||||||
bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True)
|
bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True)
|
||||||
bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True)
|
bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user