mirror of
				https://github.com/cdryzun/tg_bot_collections.git
				synced 2025-11-04 16:56:43 +08:00 
			
		
		
		
	fix: cohere long answer and garbled code
This commit is contained in:
		@ -1,6 +1,7 @@
 | 
				
			|||||||
from os import environ
 | 
					from os import environ
 | 
				
			||||||
import time
 | 
					import time
 | 
				
			||||||
import datetime
 | 
					import datetime
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from telebot import TeleBot
 | 
					from telebot import TeleBot
 | 
				
			||||||
from telebot.types import Message
 | 
					from telebot.types import Message
 | 
				
			||||||
@ -16,7 +17,7 @@ markdown_symbol.head_level_1 = "📌"  # If you want, Customizing the head level
 | 
				
			|||||||
markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol
 | 
					markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol
 | 
				
			||||||
 | 
					
 | 
				
			||||||
COHERE_API_KEY = environ.get("COHERE_API_KEY")
 | 
					COHERE_API_KEY = environ.get("COHERE_API_KEY")
 | 
				
			||||||
COHERE_MODEL = "command-r-plus"
 | 
					COHERE_MODEL = "command-r-plus"  # command-r may cause Chinese garbled code, and non stream mode also may cause garbled code.
 | 
				
			||||||
if COHERE_API_KEY:
 | 
					if COHERE_API_KEY:
 | 
				
			||||||
    co = cohere.Client(api_key=COHERE_API_KEY)
 | 
					    co = cohere.Client(api_key=COHERE_API_KEY)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -27,98 +28,27 @@ ph = TelegraphAPI(TELEGRA_PH_TOKEN)
 | 
				
			|||||||
cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300)
 | 
					cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def cohere_handler_direct(message: Message, bot: TeleBot) -> None:
 | 
					def clean_text(text):
 | 
				
			||||||
    """cohere : /cohere <question>"""
 | 
					    """Clean up the garbled code in the UTF-8 encoded Chinese string.
 | 
				
			||||||
    m = message.text.strip()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    player_message = []
 | 
					    Args:
 | 
				
			||||||
    if str(message.from_user.id) not in cohere_player_dict:
 | 
					      text: String that needs to be cleaned.
 | 
				
			||||||
        cohere_player_dict[str(message.from_user.id)] = player_message
 | 
					
 | 
				
			||||||
 | 
					    Returns:
 | 
				
			||||||
 | 
					      The cleaned string, if garbled code is detected, a prompt message is added at the end.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    if "<EFBFBD>" in text:
 | 
				
			||||||
 | 
					        # Use re.sub to clean up garbled code
 | 
				
			||||||
 | 
					        cleaned_text = re.sub(r"<EFBFBD>.*?([,。!?;:]|$)", r"\1", text)
 | 
				
			||||||
 | 
					        cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
 | 
				
			||||||
 | 
					        print(f"\n---------\nOriginal text:\n{text}\n---------")
 | 
				
			||||||
 | 
					        return cleaned_text + "\n\n~~(乱码已去除,可能存在错误,请注意)~~"
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        player_message = cohere_player_dict[str(message.from_user.id)]
 | 
					        return text
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if m.strip() == "clear":
 | 
					 | 
				
			||||||
        bot.reply_to(
 | 
					 | 
				
			||||||
            message,
 | 
					 | 
				
			||||||
            "Just cleared your Cohere messages history",
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        player_message.clear()
 | 
					 | 
				
			||||||
        return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if m[:4].lower() == "new ":
 | 
					 | 
				
			||||||
        m = m[4:].strip()
 | 
					 | 
				
			||||||
        player_message.clear()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    m = enrich_text_with_urls(m)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    who = "Command R Plus"
 | 
					 | 
				
			||||||
    reply_id = bot_reply_first(message, who, bot)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    player_message.append({"role": "User", "message": m})
 | 
					 | 
				
			||||||
    # keep the last 5, every has two ask and answer.
 | 
					 | 
				
			||||||
    if len(player_message) > 10:
 | 
					 | 
				
			||||||
        player_message = player_message[2:]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        stream = co.chat_stream(
 | 
					 | 
				
			||||||
            model=COHERE_MODEL,
 | 
					 | 
				
			||||||
            message=m,
 | 
					 | 
				
			||||||
            temperature=0.8,
 | 
					 | 
				
			||||||
            chat_history=player_message,
 | 
					 | 
				
			||||||
            prompt_truncation="AUTO",
 | 
					 | 
				
			||||||
            connectors=[{"id": "web-search"}],
 | 
					 | 
				
			||||||
            citation_quality="accurate",
 | 
					 | 
				
			||||||
            preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        s = ""
 | 
					 | 
				
			||||||
        source = ""
 | 
					 | 
				
			||||||
        start = time.time()
 | 
					 | 
				
			||||||
        for event in stream:
 | 
					 | 
				
			||||||
            if event.event_type == "stream-start":
 | 
					 | 
				
			||||||
                bot_reply_markdown(reply_id, who, "Thinking...", bot)
 | 
					 | 
				
			||||||
            elif event.event_type == "search-queries-generation":
 | 
					 | 
				
			||||||
                bot_reply_markdown(reply_id, who, "Searching online...", bot)
 | 
					 | 
				
			||||||
            elif event.event_type == "search-results":
 | 
					 | 
				
			||||||
                bot_reply_markdown(reply_id, who, "Reading...", bot)
 | 
					 | 
				
			||||||
                for doc in event.documents:
 | 
					 | 
				
			||||||
                    source += f"\n[{doc['title']}]({doc['url']})"
 | 
					 | 
				
			||||||
            elif event.event_type == "text-generation":
 | 
					 | 
				
			||||||
                s += event.text.encode("utf-8").decode("utf-8")
 | 
					 | 
				
			||||||
                if time.time() - start > 0.4:
 | 
					 | 
				
			||||||
                    start = time.time()
 | 
					 | 
				
			||||||
                    bot_reply_markdown(
 | 
					 | 
				
			||||||
                        reply_id,
 | 
					 | 
				
			||||||
                        who,
 | 
					 | 
				
			||||||
                        f"\nStill thinking{len(s)}...",
 | 
					 | 
				
			||||||
                        bot,
 | 
					 | 
				
			||||||
                        split_text=True,
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
            elif event.event_type == "stream-end":
 | 
					 | 
				
			||||||
                break
 | 
					 | 
				
			||||||
        s += "\n" + source + "\n"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            bot_reply_markdown(reply_id, who, s, bot, split_text=True)
 | 
					 | 
				
			||||||
        except:
 | 
					 | 
				
			||||||
            pass
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        player_message.append(
 | 
					 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
                "role": "Chatbot",
 | 
					 | 
				
			||||||
                "message": convert(s),
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    except Exception as e:
 | 
					 | 
				
			||||||
        print(e)
 | 
					 | 
				
			||||||
        bot_reply_markdown(reply_id, who, "Answer wrong", bot)
 | 
					 | 
				
			||||||
        player_message.clear()
 | 
					 | 
				
			||||||
        return
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def cohere_handler(message: Message, bot: TeleBot) -> None:
 | 
					def cohere_handler(message: Message, bot: TeleBot) -> None:
 | 
				
			||||||
    """cohere : /cohere <question> This will return a telegraph link"""
 | 
					    """cohere : /cohere_pro <question> Come with a telegraph link"""
 | 
				
			||||||
    m = message.text.strip()
 | 
					    m = message.text.strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    player_message = []
 | 
					    player_message = []
 | 
				
			||||||
@ -150,6 +80,14 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
 | 
				
			|||||||
        player_message = player_message[2:]
 | 
					        player_message = player_message[2:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
 | 
					        current_time = datetime.datetime.now(datetime.timezone.utc)
 | 
				
			||||||
 | 
					        preamble = (
 | 
				
			||||||
 | 
					            f"You are Command, a large language model trained to have polite, helpful, and inclusive conversations with people. Your responses should be accurate and graceful in user's original language."
 | 
				
			||||||
 | 
					            f"The current UTC time is {current_time.strftime('%Y-%m-%d %H:%M:%S')}, "
 | 
				
			||||||
 | 
					            f"UTC-4 (e.g. New York) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-4))).strftime('%Y-%m-%d %H:%M:%S')}, "
 | 
				
			||||||
 | 
					            f"UTC-7 (e.g. Los Angeles) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, "
 | 
				
			||||||
 | 
					            f"and UTC+8 (e.g. Beijing) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}."
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
        stream = co.chat_stream(
 | 
					        stream = co.chat_stream(
 | 
				
			||||||
            model=COHERE_MODEL,
 | 
					            model=COHERE_MODEL,
 | 
				
			||||||
            message=m,
 | 
					            message=m,
 | 
				
			||||||
@ -158,7 +96,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
 | 
				
			|||||||
            prompt_truncation="AUTO",
 | 
					            prompt_truncation="AUTO",
 | 
				
			||||||
            connectors=[{"id": "web-search"}],
 | 
					            connectors=[{"id": "web-search"}],
 | 
				
			||||||
            citation_quality="accurate",
 | 
					            citation_quality="accurate",
 | 
				
			||||||
            preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
 | 
					            preamble=preamble,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        s = ""
 | 
					        s = ""
 | 
				
			||||||
@ -175,12 +113,22 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
 | 
				
			|||||||
                    source += f"\n{doc['title']}\n{doc['url']}\n"
 | 
					                    source += f"\n{doc['title']}\n{doc['url']}\n"
 | 
				
			||||||
            elif event.event_type == "text-generation":
 | 
					            elif event.event_type == "text-generation":
 | 
				
			||||||
                s += event.text.encode("utf-8").decode("utf-8")
 | 
					                s += event.text.encode("utf-8").decode("utf-8")
 | 
				
			||||||
                if time.time() - start > 0.4:
 | 
					                if time.time() - start > 1.4:
 | 
				
			||||||
                    start = time.time()
 | 
					                    start = time.time()
 | 
				
			||||||
 | 
					                    s = clean_text(s)
 | 
				
			||||||
 | 
					                    if len(s) > 3900:
 | 
				
			||||||
                        bot_reply_markdown(
 | 
					                        bot_reply_markdown(
 | 
				
			||||||
                            reply_id,
 | 
					                            reply_id,
 | 
				
			||||||
                            who,
 | 
					                            who,
 | 
				
			||||||
                        f"\nStill thinking{len(s)}...",
 | 
					                            f"\nStill thinking{len(s)}...\n",
 | 
				
			||||||
 | 
					                            bot,
 | 
				
			||||||
 | 
					                            split_text=True,
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        bot_reply_markdown(
 | 
				
			||||||
 | 
					                            reply_id,
 | 
				
			||||||
 | 
					                            who,
 | 
				
			||||||
 | 
					                            f"\nStill thinking{len(s)}...\n{s}",
 | 
				
			||||||
                            bot,
 | 
					                            bot,
 | 
				
			||||||
                            split_text=True,
 | 
					                            split_text=True,
 | 
				
			||||||
                        )
 | 
					                        )
 | 
				
			||||||
@ -190,7 +138,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
 | 
				
			|||||||
            s
 | 
					            s
 | 
				
			||||||
            + "\n------\n------\n"
 | 
					            + "\n------\n------\n"
 | 
				
			||||||
            + source
 | 
					            + source
 | 
				
			||||||
            + f"\n------\n------\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
 | 
					            + f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        ph_s = ph.create_page_md(
 | 
					        ph_s = ph.create_page_md(
 | 
				
			||||||
            title="Cohere", markdown_text=content
 | 
					            title="Cohere", markdown_text=content
 | 
				
			||||||
@ -218,14 +166,6 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
if COHERE_API_KEY:
 | 
					if COHERE_API_KEY:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def register(bot: TeleBot) -> None:
 | 
					 | 
				
			||||||
        bot.register_message_handler(
 | 
					 | 
				
			||||||
            cohere_handler_direct, commands=["cohere_no_ph"], pass_bot=True
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        bot.register_message_handler(
 | 
					 | 
				
			||||||
            cohere_handler_direct, regexp="^cohere_no_ph:", pass_bot=True
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def register(bot: TeleBot) -> None:
 | 
					    def register(bot: TeleBot) -> None:
 | 
				
			||||||
        bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True)
 | 
					        bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True)
 | 
				
			||||||
        bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True)
 | 
					        bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True)
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user