fix: cohere long answer and garbled code

This commit is contained in:
Alter-xyz 2024-06-28 05:28:44 -04:00
parent 4cf8c8f8d9
commit 5fb3fc49d0

View File

@ -1,6 +1,7 @@
from os import environ from os import environ
import time import time
import datetime import datetime
import re
from telebot import TeleBot from telebot import TeleBot
from telebot.types import Message from telebot.types import Message
@ -16,7 +17,7 @@ markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
COHERE_API_KEY = environ.get("COHERE_API_KEY") COHERE_API_KEY = environ.get("COHERE_API_KEY")
COHERE_MODEL = "command-r-plus" COHERE_MODEL = "command-r-plus" # command-r may cause Chinese garbled code, and non stream mode also may cause garbled code.
if COHERE_API_KEY: if COHERE_API_KEY:
co = cohere.Client(api_key=COHERE_API_KEY) co = cohere.Client(api_key=COHERE_API_KEY)
@ -27,98 +28,27 @@ ph = TelegraphAPI(TELEGRA_PH_TOKEN)
cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300) cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300)
def cohere_handler_direct(message: Message, bot: TeleBot) -> None: def clean_text(text):
"""cohere : /cohere <question>""" """Clean up the garbled code in the UTF-8 encoded Chinese string.
m = message.text.strip()
player_message = [] Args:
if str(message.from_user.id) not in cohere_player_dict: text: String that needs to be cleaned.
cohere_player_dict[str(message.from_user.id)] = player_message
Returns:
The cleaned string, if garbled code is detected, a prompt message is added at the end.
"""
if "<EFBFBD>" in text:
# Use re.sub to clean up garbled code
cleaned_text = re.sub(r"<EFBFBD>.*?([,。!?;:]|$)", r"\1", text)
cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
print(f"\n---------\nOriginal text:\n{text}\n---------")
return cleaned_text + "\n\n~~(乱码已去除,可能存在错误,请注意)~~"
else: else:
player_message = cohere_player_dict[str(message.from_user.id)] return text
if m.strip() == "clear":
bot.reply_to(
message,
"Just cleared your Cohere messages history",
)
player_message.clear()
return
if m[:4].lower() == "new ":
m = m[4:].strip()
player_message.clear()
m = enrich_text_with_urls(m)
who = "Command R Plus"
reply_id = bot_reply_first(message, who, bot)
player_message.append({"role": "User", "message": m})
# keep the last 5, every has two ask and answer.
if len(player_message) > 10:
player_message = player_message[2:]
try:
stream = co.chat_stream(
model=COHERE_MODEL,
message=m,
temperature=0.8,
chat_history=player_message,
prompt_truncation="AUTO",
connectors=[{"id": "web-search"}],
citation_quality="accurate",
preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
)
s = ""
source = ""
start = time.time()
for event in stream:
if event.event_type == "stream-start":
bot_reply_markdown(reply_id, who, "Thinking...", bot)
elif event.event_type == "search-queries-generation":
bot_reply_markdown(reply_id, who, "Searching online...", bot)
elif event.event_type == "search-results":
bot_reply_markdown(reply_id, who, "Reading...", bot)
for doc in event.documents:
source += f"\n[{doc['title']}]({doc['url']})"
elif event.event_type == "text-generation":
s += event.text.encode("utf-8").decode("utf-8")
if time.time() - start > 0.4:
start = time.time()
bot_reply_markdown(
reply_id,
who,
f"\nStill thinking{len(s)}...",
bot,
split_text=True,
)
elif event.event_type == "stream-end":
break
s += "\n" + source + "\n"
try:
bot_reply_markdown(reply_id, who, s, bot, split_text=True)
except:
pass
player_message.append(
{
"role": "Chatbot",
"message": convert(s),
}
)
except Exception as e:
print(e)
bot_reply_markdown(reply_id, who, "Answer wrong", bot)
player_message.clear()
return
def cohere_handler(message: Message, bot: TeleBot) -> None: def cohere_handler(message: Message, bot: TeleBot) -> None:
"""cohere : /cohere <question> This will return a telegraph link""" """cohere : /cohere_pro <question> Come with a telegraph link"""
m = message.text.strip() m = message.text.strip()
player_message = [] player_message = []
@ -150,6 +80,14 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
player_message = player_message[2:] player_message = player_message[2:]
try: try:
current_time = datetime.datetime.now(datetime.timezone.utc)
preamble = (
f"You are Command, a large language model trained to have polite, helpful, and inclusive conversations with people. Your responses should be accurate and graceful in user's original language."
f"The current UTC time is {current_time.strftime('%Y-%m-%d %H:%M:%S')}, "
f"UTC-4 (e.g. New York) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-4))).strftime('%Y-%m-%d %H:%M:%S')}, "
f"UTC-7 (e.g. Los Angeles) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, "
f"and UTC+8 (e.g. Beijing) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}."
)
stream = co.chat_stream( stream = co.chat_stream(
model=COHERE_MODEL, model=COHERE_MODEL,
message=m, message=m,
@ -158,7 +96,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
prompt_truncation="AUTO", prompt_truncation="AUTO",
connectors=[{"id": "web-search"}], connectors=[{"id": "web-search"}],
citation_quality="accurate", citation_quality="accurate",
preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.", preamble=preamble,
) )
s = "" s = ""
@ -175,22 +113,32 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
source += f"\n{doc['title']}\n{doc['url']}\n" source += f"\n{doc['title']}\n{doc['url']}\n"
elif event.event_type == "text-generation": elif event.event_type == "text-generation":
s += event.text.encode("utf-8").decode("utf-8") s += event.text.encode("utf-8").decode("utf-8")
if time.time() - start > 0.4: if time.time() - start > 1.4:
start = time.time() start = time.time()
bot_reply_markdown( s = clean_text(s)
reply_id, if len(s) > 3900:
who, bot_reply_markdown(
f"\nStill thinking{len(s)}...", reply_id,
bot, who,
split_text=True, f"\nStill thinking{len(s)}...\n",
) bot,
split_text=True,
)
else:
bot_reply_markdown(
reply_id,
who,
f"\nStill thinking{len(s)}...\n{s}",
bot,
split_text=True,
)
elif event.event_type == "stream-end": elif event.event_type == "stream-end":
break break
content = ( content = (
s s
+ "\n------\n------\n" + "\n------\n------\n"
+ source + source
+ f"\n------\n------\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
) )
ph_s = ph.create_page_md( ph_s = ph.create_page_md(
title="Cohere", markdown_text=content title="Cohere", markdown_text=content
@ -218,14 +166,6 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
if COHERE_API_KEY: if COHERE_API_KEY:
def register(bot: TeleBot) -> None:
bot.register_message_handler(
cohere_handler_direct, commands=["cohere_no_ph"], pass_bot=True
)
bot.register_message_handler(
cohere_handler_direct, regexp="^cohere_no_ph:", pass_bot=True
)
def register(bot: TeleBot) -> None: def register(bot: TeleBot) -> None:
bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True) bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True)
bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True) bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True)