From 5fb3fc49d0786ecb68d7816621989c5a8d67d7e4 Mon Sep 17 00:00:00 2001
From: Alter-xyz <88554920+alterxyz@users.noreply.github.com>
Date: Fri, 28 Jun 2024 05:28:44 -0400
Subject: [PATCH 1/6] fix: cohere long answer and garbled code

---
 handlers/cohere.py | 152 ++++++++++++++-------------------------------
 1 file changed, 46 insertions(+), 106 deletions(-)
diff --git a/handlers/cohere.py b/handlers/cohere.py
index ddc0082..20ee644 100644
--- a/handlers/cohere.py
+++ b/handlers/cohere.py
@@ -1,6 +1,7 @@
 from os import environ
 import time
 import datetime
+import re
 
 from telebot import TeleBot
 from telebot.types import Message
@@ -16,7 +17,7 @@ markdown_symbol.head_level_1 = "📌"  # If you want, Customizing the head level
 markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol
 
 COHERE_API_KEY = environ.get("COHERE_API_KEY")
-COHERE_MODEL = "command-r-plus"
+COHERE_MODEL = "command-r-plus"  # command-r may cause Chinese garbled code, and non stream mode also may cause garbled code.
 if COHERE_API_KEY:
     co = cohere.Client(api_key=COHERE_API_KEY)
 
@@ -27,98 +28,27 @@ ph = TelegraphAPI(TELEGRA_PH_TOKEN)
 cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300)
 
 
-def cohere_handler_direct(message: Message, bot: TeleBot) -> None:
-    """cohere : /cohere <question>"""
-    m = message.text.strip()
+def clean_text(text):
+    """Clean up the garbled code in the UTF-8 encoded Chinese string.
 
-    player_message = []
-    if str(message.from_user.id) not in cohere_player_dict:
-        cohere_player_dict[str(message.from_user.id)] = player_message
+    Args:
+      text: String that needs to be cleaned.
+
+    Returns:
+      The cleaned string, if garbled code is detected, a prompt message is added at the end.
+    """
+    if "�" in text:
+        # Use re.sub to clean up garbled code
+        cleaned_text = re.sub(r"�.*?([，。！？；：]|$)", r"\1", text)
+        cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
+        print(f"\n---------\nOriginal text:\n{text}\n---------")
+        return cleaned_text + "\n\n~~(乱码已去除，可能存在错误，请注意)~~"
     else:
-        player_message = cohere_player_dict[str(message.from_user.id)]
-
-    if m.strip() == "clear":
-        bot.reply_to(
-            message,
-            "Just cleared your Cohere messages history",
-        )
-        player_message.clear()
-        return
-
-    if m[:4].lower() == "new ":
-        m = m[4:].strip()
-        player_message.clear()
-
-    m = enrich_text_with_urls(m)
-
-    who = "Command R Plus"
-    reply_id = bot_reply_first(message, who, bot)
-
-    player_message.append({"role": "User", "message": m})
-    # keep the last 5, every has two ask and answer.
-    if len(player_message) > 10:
-        player_message = player_message[2:]
-
-    try:
-        stream = co.chat_stream(
-            model=COHERE_MODEL,
-            message=m,
-            temperature=0.8,
-            chat_history=player_message,
-            prompt_truncation="AUTO",
-            connectors=[{"id": "web-search"}],
-            citation_quality="accurate",
-            preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
-        )
-
-        s = ""
-        source = ""
-        start = time.time()
-        for event in stream:
-            if event.event_type == "stream-start":
-                bot_reply_markdown(reply_id, who, "Thinking...", bot)
-            elif event.event_type == "search-queries-generation":
-                bot_reply_markdown(reply_id, who, "Searching online...", bot)
-            elif event.event_type == "search-results":
-                bot_reply_markdown(reply_id, who, "Reading...", bot)
-                for doc in event.documents:
-                    source += f"\n[{doc['title']}]({doc['url']})"
-            elif event.event_type == "text-generation":
-                s += event.text.encode("utf-8").decode("utf-8")
-                if time.time() - start > 0.4:
-                    start = time.time()
-                    bot_reply_markdown(
-                        reply_id,
-                        who,
-                        f"\nStill thinking{len(s)}...",
-                        bot,
-                        split_text=True,
-                    )
-            elif event.event_type == "stream-end":
-                break
-        s += "\n" + source + "\n"
-
-        try:
-            bot_reply_markdown(reply_id, who, s, bot, split_text=True)
-        except:
-            pass
-
-        player_message.append(
-            {
-                "role": "Chatbot",
-                "message": convert(s),
-            }
-        )
-
-    except Exception as e:
-        print(e)
-        bot_reply_markdown(reply_id, who, "Answer wrong", bot)
-        player_message.clear()
-        return
+        return text
 
 
 def cohere_handler(message: Message, bot: TeleBot) -> None:
-    """cohere : /cohere <question> This will return a telegraph link"""
+    """cohere : /cohere_pro <question> Come with a telegraph link"""
     m = message.text.strip()
 
     player_message = []
@@ -150,6 +80,14 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
         player_message = player_message[2:]
 
     try:
+        current_time = datetime.datetime.now(datetime.timezone.utc)
+        preamble = (
+            f"You are Command, a large language model trained to have polite, helpful, and inclusive conversations with people. Your responses should be accurate and graceful in user's original language."
+            f"The current UTC time is {current_time.strftime('%Y-%m-%d %H:%M:%S')}, "
+            f"UTC-4 (e.g. New York) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-4))).strftime('%Y-%m-%d %H:%M:%S')}, "
+            f"UTC-7 (e.g. Los Angeles) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, "
+            f"and UTC+8 (e.g. Beijing) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}."
+        )
         stream = co.chat_stream(
             model=COHERE_MODEL,
             message=m,
@@ -158,7 +96,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
             prompt_truncation="AUTO",
             connectors=[{"id": "web-search"}],
             citation_quality="accurate",
-            preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
+            preamble=preamble,
         )
 
         s = ""
@@ -175,22 +113,32 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
                     source += f"\n{doc['title']}\n{doc['url']}\n"
             elif event.event_type == "text-generation":
                 s += event.text.encode("utf-8").decode("utf-8")
-                if time.time() - start > 0.4:
+                if time.time() - start > 1.4:
                     start = time.time()
-                    bot_reply_markdown(
-                        reply_id,
-                        who,
-                        f"\nStill thinking{len(s)}...",
-                        bot,
-                        split_text=True,
-                    )
+                    s = clean_text(s)
+                    if len(s) > 3900:
+                        bot_reply_markdown(
+                            reply_id,
+                            who,
+                            f"\nStill thinking{len(s)}...\n",
+                            bot,
+                            split_text=True,
+                        )
+                    else:
+                        bot_reply_markdown(
+                            reply_id,
+                            who,
+                            f"\nStill thinking{len(s)}...\n{s}",
+                            bot,
+                            split_text=True,
+                        )
             elif event.event_type == "stream-end":
                 break
         content = (
             s
             + "\n------\n------\n"
             + source
-            + f"\n------\n------\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+            + f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
         )
         ph_s = ph.create_page_md(
             title="Cohere", markdown_text=content
@@ -218,14 +166,6 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
 
 if COHERE_API_KEY:
 
-    def register(bot: TeleBot) -> None:
-        bot.register_message_handler(
-            cohere_handler_direct, commands=["cohere_no_ph"], pass_bot=True
-        )
-        bot.register_message_handler(
-            cohere_handler_direct, regexp="^cohere_no_ph:", pass_bot=True
-        )
-
     def register(bot: TeleBot) -> None:
         bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True)
         bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True)

From f2b73750a8e62ced2c2475aec3b3f56ae51d14a9 Mon Sep 17 00:00:00 2001
From: Alter-xyz <88554920+alterxyz@users.noreply.github.com>
Date: Fri, 28 Jun 2024 05:29:09 -0400
Subject: [PATCH 2/6] fix: update same message

---
 handlers/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/handlers/__init__.py b/handlers/__init__.py
index f13f1f5..79ca3ca 100644
--- a/handlers/__init__.py
+++ b/handlers/__init__.py
@@ -7,6 +7,7 @@ import traceback
 from functools import update_wrapper
 from pathlib import Path
 from typing import Any, Callable, TypeVar
+from expiringdict import ExpiringDict
 
 import requests
 from telebot import TeleBot
@@ -23,6 +24,8 @@ T = TypeVar("T", bound=Callable)
 
 BOT_MESSAGE_LENGTH = 4000
 
+REPLY_MESSAGE_CACHE = ExpiringDict(max_len=1000, max_age_seconds=300)
+
 
 def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
     """Create the first reply message which make user feel the bot is working."""
@@ -39,6 +42,11 @@ def bot_reply_markdown(
     it will fallback to plain text in case of any failure
     """
     try:
+        cache_key = f"{reply_id.chat.id}_{reply_id.message_id}"
+        if cache_key in REPLY_MESSAGE_CACHE and REPLY_MESSAGE_CACHE[cache_key] == text:
+            print(f"Skipping duplicate message for {cache_key}")
+            return True
+        REPLY_MESSAGE_CACHE[cache_key] = text
         if len(text.encode("utf-8")) <= BOT_MESSAGE_LENGTH or not split_text:
             bot.edit_message_text(
                 f"*{who}*:\n{telegramify_markdown.convert(text)}",

From d1d84aa0e333c327e784dd6c3e788a69caf2e6c8 Mon Sep 17 00:00:00 2001
From: Alter-xyz <88554920+alterxyz@users.noreply.github.com>
Date: Fri, 28 Jun 2024 11:46:06 -0400
Subject: [PATCH 3/6] refactor

- refactor: Modulated
- chore: Switch on the top as "Customization"
- chore: Prompt for tasks
- feat: Support more LLM as optional
- feat: Summarization for final answer
- feat: Asynchronous/Thread for faster speed
---
 handlers/useful.py | 633 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 487 insertions(+), 146 deletions(-)

diff --git a/handlers/useful.py b/handlers/useful.py
index 59f2897..2acf40f 100644
--- a/handlers/useful.py
+++ b/handlers/useful.py
@@ -8,80 +8,130 @@ import time
 import datetime
 from concurrent.futures import ThreadPoolExecutor
 
-from openai import OpenAI
-import google.generativeai as genai
-from telebot import TeleBot
-from together import Together
-from telebot.types import Message
-
 from . import *
 
 from telegramify_markdown.customize import markdown_symbol
 
-#### Cohere init ####
-import cohere
-
-COHERE_API_KEY = environ.get("COHERE_API_KEY")
-COHERE_MODEL = "command-r-plus"
-# if you want to use cohere for answer it, set it to True
-USE_CHHERE = False
-USE_CLAUDE = True
-if COHERE_API_KEY:
-    co = cohere.Client(api_key=COHERE_API_KEY)
-
-#### Telegraph init ####
-TELEGRA_PH_TOKEN = environ.get("TELEGRA_PH_TOKEN")
-ph = TelegraphAPI(TELEGRA_PH_TOKEN)
-#### Telegraph done ####
-
+# If you want, Customizing the head level 1 symbol
+markdown_symbol.head_level_1 = "📌"
+markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol
 chat_message_dict = ExpiringDict(max_len=100, max_age_seconds=120)
 chat_user_dict = ExpiringDict(max_len=100, max_age_seconds=20)
 
-markdown_symbol.head_level_1 = "📌"  # If you want, Customizing the head level 1 symbol
-markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol
 
-GOOGLE_GEMINI_KEY = environ.get("GOOGLE_GEMINI_KEY")
+#### Customization ####
+Language = "zh-cn"  # "en" or "zh-cn".
+SUMMARY = "gemini"  # "cohere" or "gemini" or None
+Extra_clean = True  # Will Delete command message
+GEMINI_USE = True
+CHATGPT_USE = True
+COHERE_USE = True
+QWEN_USE = True
+CLADUE_USE = True
+LLAMA_USE = True
 
-genai.configure(api_key=GOOGLE_GEMINI_KEY)
+#### Telegra.ph init ####
+# Will auto generate a token if not provided, restart will lose all TODO
+TELEGRA_PH_TOKEN = environ.get("TELEGRA_PH_TOKEN")
+# Edit "Store_Token = False" in "__init__.py" to True to store it
+ph = TelegraphAPI(TELEGRA_PH_TOKEN)
 
-generation_config = {
-    "temperature": 0.7,
-    "top_p": 1,
-    "top_k": 1,
-    "max_output_tokens": 8192,
-}
 
-safety_settings = [
-    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
-    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
-    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
-    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
-]
-
-model = genai.GenerativeModel(
-    model_name="gemini-1.5-flash-latest",
-    generation_config=generation_config,
-    safety_settings=safety_settings,
-)
-
-#### ChatGPT init ####
+#### LLMs init ####
+#### OpenAI init ####
 CHATGPT_API_KEY = environ.get("OPENAI_API_KEY")
 CHATGPT_BASE_URL = environ.get("OPENAI_API_BASE") or "https://api.openai.com/v1"
+if CHATGPT_USE and CHATGPT_API_KEY:
+    from openai import OpenAI
+
+    CHATGPT_PRO_MODEL = "gpt-4o-2024-05-13"
+    client = OpenAI(api_key=CHATGPT_API_KEY, base_url=CHATGPT_BASE_URL, timeout=300)
+
+
+#### Gemini init ####
+GOOGLE_GEMINI_KEY = environ.get("GOOGLE_GEMINI_KEY")
+if GEMINI_USE and GOOGLE_GEMINI_KEY:
+    import google.generativeai as genai
+    from google.generativeai import ChatSession
+    from google.generativeai.types.generation_types import StopCandidateException
+
+    genai.configure(api_key=GOOGLE_GEMINI_KEY)
+
+    generation_config = {
+        "temperature": 0.7,
+        "top_p": 1,
+        "top_k": 1,
+        "max_output_tokens": 8192,
+    }
+
+    safety_settings = [
+        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
+        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
+        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
+        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
+    ]
+
+    model = genai.GenerativeModel(
+        model_name="gemini-1.5-flash-latest",
+        generation_config=generation_config,
+        safety_settings=safety_settings,
+    )
+    model_flash = genai.GenerativeModel(
+        model_name="gemini-1.5-flash-latest",
+        generation_config=generation_config,
+        safety_settings=safety_settings,
+        system_instruction=f"""
+The user asked a question, and multiple AI have given answers to the same question.
+Your task is to summarize the responses from them in a concise and clear manner.
+The summary should:
+In one to two short sentences, as less as possible, and should not exceed 150 characters.
+Your must use language of {Language} to respond.
+Start with "Summary:" or "总结:"
+""",
+    )
+    convo = model.start_chat()
+    convo_summary = model_flash.start_chat()
+
+
+#### Cohere init ####
+COHERE_API_KEY = environ.get("COHERE_API_KEY")
+
+if COHERE_USE and COHERE_API_KEY:
+    import cohere
+
+    COHERE_MODEL = "command-r-plus"
+    co = cohere.Client(api_key=COHERE_API_KEY)
+
+
+#### Qwen init ####
 QWEN_API_KEY = environ.get("TOGETHER_API_KEY")
-QWEN_MODEL = "Qwen/Qwen2-72B-Instruct"
-CHATGPT_PRO_MODEL = "gpt-4o-2024-05-13"
 
-#### CLAUDE ####
+if QWEN_USE and QWEN_API_KEY:
+    from together import Together
+
+    QWEN_MODEL = "Qwen/Qwen2-72B-Instruct"
+    qwen_client = Together(api_key=QWEN_API_KEY)
+
+#### Claude init ####
 ANTHROPIC_API_KEY = environ.get("ANTHROPIC_API_KEY")
-ANTHROPIC_BASE_URL = environ.get("ANTHROPIC_BASE_URL")
-ANTHROPIC_MODEL = "claude-3-5-sonnet-20240620"
 # use openai for claude
-claude_client = OpenAI(
-    api_key=ANTHROPIC_API_KEY, base_url=ANTHROPIC_BASE_URL, timeout=20
-)
+if CLADUE_USE and ANTHROPIC_API_KEY:
+    ANTHROPIC_BASE_URL = environ.get("ANTHROPIC_BASE_URL")
+    ANTHROPIC_MODEL = "claude-3-5-sonnet-20240620"
+    claude_client = OpenAI(
+        api_key=ANTHROPIC_API_KEY, base_url=ANTHROPIC_BASE_URL, timeout=20
+    )
 
-client = OpenAI(api_key=CHATGPT_API_KEY, base_url=CHATGPT_BASE_URL, timeout=300)
-qwen_client = Together(api_key=QWEN_API_KEY, timeout=300)
+#### llama init ####
+LLAMA_API_KEY = environ.get("GROQ_API_KEY")
+if LLAMA_USE and LLAMA_API_KEY:
+    from groq import Groq
+
+    llama_client = Groq(api_key=LLAMA_API_KEY)
+    LLAMA_MODEL = "llama3-8b-8192"
+
+
+#### init end ####
 
 
 def md_handler(message: Message, bot: TeleBot):
@@ -134,35 +184,7 @@ def latest_handle_messages(message: Message, bot: TeleBot):
         print(chat_message_dict[chat_id].text)
 
 
-def get_gpt_answer(message):
-    chatgpt_reply_text = ""
-    player_message = [{"role": "user", "content": message}]
-    try:
-        r = client.chat.completions.create(
-            messages=player_message, max_tokens=4096, model=CHATGPT_PRO_MODEL
-        )
-        chatgpt_reply_text = r.choices[0].message.content.encode("utf8").decode()
-    except Exception as e:
-        print(e)
-        chatgpt_reply_text = "answer wrong"
-    return chatgpt_reply_text
-
-
-def get_claude_answer(message):
-    chatgpt_reply_text = ""
-    player_message = [{"role": "user", "content": message}]
-    try:
-        r = claude_client.chat.completions.create(
-            messages=player_message, max_tokens=4096, model=ANTHROPIC_MODEL
-        )
-        chatgpt_reply_text = r.choices[0].message.content.encode("utf8").decode()
-    except Exception as e:
-        print(e)
-        chatgpt_reply_text = "answer wrong"
-    return chatgpt_reply_text
-
-
-def answer_it_handler(message: Message, bot: TeleBot):
+def answer_it_handler(message: Message, bot: TeleBot) -> None:
     """answer_it: /answer_it"""
     # answer the last message in the chat group
     who = "answer_it"
@@ -172,85 +194,226 @@ def answer_it_handler(message: Message, bot: TeleBot):
     latest_message = chat_message_dict.get(chat_id)
     m = latest_message.text.strip()
     m = enrich_text_with_urls(m)
-    full = "Question:\n" + m + "\n---\n"
-    ##### Gemini #####
+    full_answer = f"Question:\n{m}\n---\n"
+
+    #### Answers Thread ####
+    executor = ThreadPoolExecutor(max_workers=5)
+    if GEMINI_USE and GOOGLE_GEMINI_KEY:
+        gemini_future = executor.submit(gemini_answer, latest_message, bot, m)
+    if CHATGPT_USE and CHATGPT_API_KEY:
+        chatgpt_future = executor.submit(chatgpt_answer, latest_message, bot, m)
+    if COHERE_USE and COHERE_API_KEY:
+        cohere_future = executor.submit(cohere_answer, latest_message, bot, m)
+    if QWEN_USE and QWEN_API_KEY:
+        qwen_future = executor.submit(qwen_answer, latest_message, bot, m)
+    if CLADUE_USE and ANTHROPIC_API_KEY:
+        claude_future = executor.submit(claude_answer, latest_message, bot, m)
+    if LLAMA_USE and LLAMA_API_KEY:
+        llama_future = executor.submit(llama_answer, latest_message, bot, m)
+
+    #### Answers List ####
+    full_chat_id_list = []
+    if GEMINI_USE and GOOGLE_GEMINI_KEY:
+        answer_gemini, gemini_chat_id = gemini_future.result()
+        full_chat_id_list.append(gemini_chat_id)
+        full_answer += answer_gemini
+    if CHATGPT_USE and CHATGPT_API_KEY:
+        anaswer_chatgpt, chatgpt_chat_id = chatgpt_future.result()
+        full_chat_id_list.append(chatgpt_chat_id)
+        full_answer += anaswer_chatgpt
+    if COHERE_USE and COHERE_API_KEY:
+        answer_cohere, cohere_chat_id = cohere_future.result()
+        full_chat_id_list.append(cohere_chat_id)
+        full_answer += answer_cohere
+    if QWEN_USE and QWEN_API_KEY:
+        answer_qwen, qwen_chat_id = qwen_future.result()
+        full_chat_id_list.append(qwen_chat_id)
+        full_answer += answer_qwen
+    if CLADUE_USE and ANTHROPIC_API_KEY:
+        answer_claude, claude_chat_id = claude_future.result()
+        full_chat_id_list.append(claude_chat_id)
+        full_answer += answer_claude
+    if LLAMA_USE and LLAMA_API_KEY:
+        answer_llama, llama_chat_id = llama_future.result()
+        full_chat_id_list.append(llama_chat_id)
+        full_answer += answer_llama
+
+    print(full_chat_id_list)
+
+    ##### Telegraph #####
+    final_answer(latest_message, bot, full_answer, full_chat_id_list)
+    if Extra_clean:
+        bot.delete_message(chat_id, message.message_id)
+
+
+# def thread_answers(latest_message: Message, bot: TeleBot, m: str):
+#     #### answers function init ####
+#     USE = {
+#         "gemini_answer": GEMINI_USE and GOOGLE_GEMINI_KEY,
+#         "chatgpt_answer": CHATGPT_USE and CHATGPT_API_KEY,
+#         "cohere_answer": COHERE_USE and COHERE_API_KEY,
+#         "qwen_answer": QWEN_USE and QWEN_API_KEY,
+#         # More LLMs
+#     }
+
+
+#     results = []
+#     full_chat_id_list = []
+
+#     with ThreadPoolExecutor(max_workers=5) as executor:
+#         futures = {
+#             executor.submit(func, latest_message, bot, m): func
+#             for func, use in USE.items()
+#             if use
+#         }
+
+#         for future in as_completed(futures):
+#             try:
+#                 answer, message_id = future.result()
+#                 # Store the answer and message_id
+#                 results.append((message_id, answer))
+#                 full_chat_id_list.append(message_id)
+#             except Exception as e:
+#                 print(f"\n------\nthread_answers Error:\n{e}\n------\n")
+#                 continue
+
+#     # rank the results by message_id
+#     sorted_results = sorted(results)
+#     full_chat_id_list.sort()
+
+#     # final answer
+#     full_answer = f"Question:\n{m}\n---\n"
+#     for _, answer in sorted_results:
+#         full_answer += answer
+
+#     return full_answer, full_chat_id_list
+
+
+def gemini_answer(latest_message: Message, bot: TeleBot, m):
+    """gemini answer"""
     who = "Gemini Pro"
+    # show something, make it more responsible
     reply_id = bot_reply_first(latest_message, who, bot)
 
-    #### excutor thread ####
-    executor = ThreadPoolExecutor(max_workers=5)
-    chatgpt_thread = executor.submit(get_gpt_answer, m)
-    claude_thread = None
-
-    claude_answer = ""
-    if ANTHROPIC_API_KEY:
-        claude_thread = executor.submit(get_claude_answer, m)
     try:
-        r = model.generate_content(m, stream=True)
+        r = convo.send_message(m, stream=True)
         s = ""
         start = time.time()
         for e in r:
             s += e.text
-            if time.time() - start > 1.5:
+            if time.time() - start > 1.7:
                 start = time.time()
                 bot_reply_markdown(reply_id, who, s, bot, split_text=False)
         bot_reply_markdown(reply_id, who, s, bot)
+        convo.history.clear()
     except Exception as e:
-        print(e)
+        print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
+        convo.history.clear()
         bot_reply_markdown(reply_id, who, "Error", bot)
+        return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
 
-    full += f"{who}:\n{s}"
-    chat_id_list = [reply_id.message_id]
+    answer = f"\n---\n{who}:\n{s}"
+    return answer, reply_id.message_id
 
-    ##### ChatGPT #####
+
+def chatgpt_answer(latest_message: Message, bot: TeleBot, m):
+    """chatgpt answer"""
     who = "ChatGPT Pro"
     reply_id = bot_reply_first(latest_message, who, bot)
-    # get gpt answer using thread
-    chatgpt_answer = chatgpt_thread.result()
 
-    bot_reply_markdown(reply_id, who, chatgpt_answer, bot)
+    player_message = [{"role": "user", "content": m}]
 
-    full += f"\n---\n{who}:\n{chatgpt_answer}"
-    chat_id_list.append(reply_id.message_id)
+    try:
+        r = client.chat.completions.create(
+            messages=player_message,
+            max_tokens=4096,
+            model=CHATGPT_PRO_MODEL,
+            stream=True,
+        )
+        s = ""
+        start = time.time()
+        for chunk in r:
+            if chunk.choices[0].delta.content is None:
+                break
+            s += chunk.choices[0].delta.content
+            if time.time() - start > 1.5:
+                start = time.time()
+                bot_reply_markdown(reply_id, who, s, bot, split_text=False)
+        # maybe not complete
+        try:
+            bot_reply_markdown(reply_id, who, s, bot)
+        except:
+            pass
 
-    ##### Claude #####
-    if USE_CLAUDE and ANTHROPIC_API_KEY:
-        who = "Claude Pro"
-        claude_answer = claude_thread.result()
-        reply_id = bot_reply_first(latest_message, who, bot)
-        bot_reply_markdown(reply_id, who, claude_answer, bot)
+    except Exception as e:
+        print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
+        bot_reply_markdown(reply_id, who, "answer wrong", bot)
+        return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
 
-        full += f"\n---\n{who}:\n{claude_answer}"
-        chat_id_list.append(reply_id.message_id)
-
-    ##### Cohere #####
-    if USE_CHHERE and COHERE_API_KEY:
-        full, chat_id = cohere_answer(latest_message, bot, full, m)
-        chat_id_list.append(chat_id)
-    else:
-        pass
-
-    ##### Telegraph #####
-    final_answer(latest_message, bot, full, chat_id_list)
+    answer = f"\n---\n{who}:\n{s}"
+    return answer, reply_id.message_id
 
 
-def cohere_answer(latest_message: Message, bot: TeleBot, full, m):
+def claude_answer(latest_message: Message, bot: TeleBot, m):
+    """claude answer"""
+    who = "Claude Pro"
+    reply_id = bot_reply_first(latest_message, who, bot)
+
+    try:
+        r = claude_client.chat.completions.create(
+            messages=[{"role": "user", "content": m}],
+            max_tokens=4096,
+            model=ANTHROPIC_MODEL,
+            stream=True,
+        )
+        s = ""
+        start = time.time()
+        for chunk in r:
+            if chunk.choices[0].delta.content is None:
+                break
+            s += chunk.choices[0].delta.content
+            if time.time() - start > 1.5:
+                start = time.time()
+                bot_reply_markdown(reply_id, who, s, bot, split_text=False)
+        # maybe not complete
+        try:
+            bot_reply_markdown(reply_id, who, s, bot)
+        except:
+            pass
+
+    except Exception as e:
+        print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
+        bot_reply_markdown(reply_id, who, "answer wrong", bot)
+        return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
+
+    answer = f"\n---\n{who}:\n{s}"
+    return answer, reply_id.message_id
+
+
+def cohere_answer(latest_message: Message, bot: TeleBot, m):
     """cohere answer"""
     who = "Command R Plus"
     reply_id = bot_reply_first(latest_message, who, bot)
 
-    player_message = [{"role": "User", "message": m}]
-
     try:
+        current_time = datetime.datetime.now(datetime.timezone.utc)
+        preamble = (
+            f"You are Command R Plus, a large language model trained to have polite, helpful, inclusive conversations with people. People are looking for information that may need you to search online. Make an accurate and fast response. If there are no search results, then provide responses based on your general knowledge(It's fine if it's not accurate, it might still inspire the user."
+            f"The current UTC time is {current_time.strftime('%Y-%m-%d %H:%M:%S')}, "
+            f"UTC-4 (e.g. New York) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-4))).strftime('%Y-%m-%d %H:%M:%S')}, "
+            f"UTC-7 (e.g. Los Angeles) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, "
+            f"and UTC+8 (e.g. Beijing) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}."
+        )
+
         stream = co.chat_stream(
             model=COHERE_MODEL,
             message=m,
-            temperature=0.3,
-            chat_history=player_message,
+            temperature=0.8,
+            chat_history=[],  # One time, so no need for chat history
             prompt_truncation="AUTO",
             connectors=[{"id": "web-search"}],
             citation_quality="accurate",
-            preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
+            preamble=preamble,
         )
 
         s = ""
@@ -266,13 +429,13 @@ def cohere_answer(latest_message: Message, bot: TeleBot, full, m):
                 for doc in event.documents:
                     source += f"\n{doc['title']}\n{doc['url']}\n"
             elif event.event_type == "text-generation":
-                s += event.text.encode("utf-8").decode("utf-8")
-                if time.time() - start > 0.4:
+                s += event.text.encode("utf-8").decode("utf-8", "ignore")
+                if time.time() - start > 0.8:
                     start = time.time()
                     bot_reply_markdown(
                         reply_id,
                         who,
-                        f"\nStill thinking{len(s)}...",
+                        f"\nStill thinking{len(s)}...\n{s}",
                         bot,
                         split_text=True,
                     )
@@ -280,34 +443,212 @@ def cohere_answer(latest_message: Message, bot: TeleBot, full, m):
                 break
         content = (
             s
-            + "\n------\n------\n"
+            + "\n---\n---\n"
             + source
-            + f"\n------\n------\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+            + f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
         )
-
+        # maybe not complete
         try:
             bot_reply_markdown(reply_id, who, s, bot, split_text=True)
         except:
             pass
     except Exception as e:
-        print(e)
+        print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
         bot_reply_markdown(reply_id, who, "Answer wrong", bot)
-        player_message.clear()
-        return full, reply_id.message_id
-    full += f"\n---\n{who}:\n{content}"
-    return full, reply_id.message_id
+        return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
+    answer = f"\n---\n{who}:\n{content}"
+    return answer, reply_id.message_id
 
 
-def final_answer(latest_message: Message, bot: TeleBot, full, answers_list):
-    """final answer"""
-    who = "Answer"
+def qwen_answer(latest_message: Message, bot: TeleBot, m):
+    """qwen answer"""
+    who = "qwen Pro"
     reply_id = bot_reply_first(latest_message, who, bot)
-    ph_s = ph.create_page_md(title="Answer it", markdown_text=full)
-    bot_reply_markdown(reply_id, who, f"[View]({ph_s})", bot)
+    try:
+        r = qwen_client.chat.completions.create(
+            messages=[{"role": "user", "content": m}],
+            max_tokens=8192,
+            model=QWEN_MODEL,
+            stream=True,
+        )
+        s = ""
+        start = time.time()
+        for chunk in r:
+            if chunk.choices[0].delta.content is None:
+                break
+            s += chunk.choices[0].delta.content
+            if time.time() - start > 1.5:
+                start = time.time()
+                bot_reply_markdown(reply_id, who, s, bot, split_text=False)
+        # maybe not complete
+        try:
+            bot_reply_markdown(reply_id, who, s, bot)
+        except:
+            pass
+
+    except Exception as e:
+        print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
+        bot_reply_markdown(reply_id, who, "answer wrong", bot)
+        return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
+
+    answer = f"\n---\n{who}:\n{s}"
+    return answer, reply_id.message_id
+
+
+def llama_answer(latest_message: Message, bot: TeleBot, m):
+    """llama answer"""
+    who = "llama"
+    reply_id = bot_reply_first(latest_message, who, bot)
+    try:
+        r = llama_client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": f"{m}\nMotes: You must use language of {Language} to respond.",
+                }
+            ],
+            max_tokens=8192,
+            model=LLAMA_MODEL,
+            stream=True,
+        )
+        s = ""
+        start = time.time()
+        for chunk in r:
+            if chunk.choices[0].delta.content is None:
+                break
+            s += chunk.choices[0].delta.content
+            if time.time() - start > 1.5:
+                start = time.time()
+                bot_reply_markdown(reply_id, who, s, bot, split_text=False)
+        # maybe not complete
+        try:
+            bot_reply_markdown(reply_id, who, s, bot)
+        except:
+            pass
+
+    except Exception as e:
+        print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
+        bot_reply_markdown(reply_id, who, "answer wrong", bot)
+        return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
+
+    answer = f"\n---\n{who}:\n{s}"
+    return answer, reply_id.message_id
+
+
+# TODO: Perplexity looks good. `pplx_answer`
+
+
+def final_answer(latest_message: Message, bot: TeleBot, full_answer: str, answers_list):
+    """final answer"""
+    who = "Answer it"
+    reply_id = bot_reply_first(latest_message, who, bot)
+    ph_s = ph.create_page_md(title="Answer it", markdown_text=full_answer)
+    bot_reply_markdown(reply_id, who, f"**[Full Answer]({ph_s})**", bot)
     # delete the chat message, only leave a telegra.ph link
     for i in answers_list:
         bot.delete_message(latest_message.chat.id, i)
 
+    #### Summary ####
+    if SUMMARY == None:
+        pass
+    elif COHERE_USE and COHERE_API_KEY and SUMMARY == "cohere":
+        summary_cohere(bot, full_answer, ph_s, reply_id)
+    elif GEMINI_USE and GOOGLE_GEMINI_KEY and SUMMARY == "gemini":
+        summary_gemini(bot, full_answer, ph_s, reply_id)
+    else:
+        pass
+
+
+def summary_cohere(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) -> None:
+    """Receive the full text, and the final_answer's chat_id, update with a summary."""
+    who = "Answer it"
+
+    # inherit
+    if Language == "zh-cn":
+        s = f"**[全文]({ph_s})** | "
+    elif Language == "en":
+        s = f"**[Full Answer]({ph_s})** | "
+
+    # filter
+    length = len(full_answer)  # max 128,000 tokens...
+    if length > 50000:
+        full_answer = full_answer[:50000]
+
+    try:
+        preamble = """
+        You are Command R Plus, a large language model trained to have polite, helpful, inclusive conversations with people. The user asked a question, and multiple AI have given answers to the same question, but they have different styles, and rarely they have opposite opinions or other issues, but that is normal. Your task is to summarize the responses from them in a concise and clear manner. The summary should:
+
+Be written in bullet points.
+Contain between two to ten sentences.
+Highlight key points and main conclusions.
+Note any significant differences in responses.
+Provide a brief indication if users should refer to the full responses for more details.
+For the first LLM's content, if it is mostly in any language other than English, respond in that language for all your output.
+Start with "Summary:" or "总结:"
+"""
+        stream = co.chat_stream(
+            model=COHERE_MODEL,
+            message=full_answer,
+            temperature=0.4,
+            chat_history=[],
+            prompt_truncation="OFF",
+            connectors=[],
+            preamble=preamble,
+        )
+
+        start = time.time()
+        for event in stream:
+            if event.event_type == "stream-start":
+                bot_reply_markdown(reply_id, who, f"{s}Summarizing...", bot)
+            elif event.event_type == "text-generation":
+                s += event.text.encode("utf-8").decode("utf-8", "ignore")
+                if time.time() - start > 0.4:
+                    start = time.time()
+                    bot_reply_markdown(reply_id, who, s, bot)
+            elif event.event_type == "stream-end":
+                break
+
+        try:
+            bot_reply_markdown(reply_id, who, s, bot)
+        except:
+            pass
+
+    except Exception as e:
+        if Language == "zh-cn":
+            bot_reply_markdown(reply_id, who, f"[全文]({ph_s})", bot)
+        elif Language == "en":
+            bot_reply_markdown(reply_id, who, f"[Full Answer]({ph_s})", bot)
+        print(f"\n------\nsummary_cohere function inner Error:\n{e}\n------\n")
+
+
+def summary_gemini(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) -> None:
+    """Receive the full text, and the final_answer's chat_id, update with a summary."""
+    who = "Answer it"
+
+    # inherit
+    if Language == "zh-cn":
+        s = f"**[全文]({ph_s})** | "
+    elif Language == "en":
+        s = f"**[Full Answer]({ph_s})** | "
+
+    try:
+        r = convo_summary.send_message(full_answer, stream=True)
+        start = time.time()
+        for e in r:
+            s += e.text
+            if time.time() - start > 0.4:
+                start = time.time()
+                bot_reply_markdown(reply_id, who, s, bot, split_text=False)
+        bot_reply_markdown(reply_id, who, s, bot)
+        convo_summary.history.clear()
+    except Exception as e:
+        if Language == "zh-cn":
+            bot_reply_markdown(reply_id, who, f"[全文]({ph_s})", bot)
+        elif Language == "en":
+            bot_reply_markdown(reply_id, who, f"[Full Answer]({ph_s})", bot)
+        print(f"\n------\nsummary_gemini function inner Error:\n{e}\n------\n")
+        bot_reply_markdown(reply_id, who, f"{s}Error", bot)
+
 
 if GOOGLE_GEMINI_KEY and CHATGPT_API_KEY:
 

From bde2fd061b53b6171d8a8d8d7cb7c43eddb34a0a Mon Sep 17 00:00:00 2001
From: Alter-xyz <88554920+alterxyz@users.noreply.github.com>
Date: Sat, 29 Jun 2024 01:49:14 -0400
Subject: [PATCH 4/6] feat: answer_it screenless llm to ph page

---
 handlers/useful.py | 210 +++++++++++++++++++++++++++++----------------
 1 file changed, 136 insertions(+), 74 deletions(-)

diff --git a/handlers/useful.py b/handlers/useful.py
index 2acf40f..2fe1975 100644
--- a/handlers/useful.py
+++ b/handlers/useful.py
@@ -7,6 +7,7 @@ from os import environ
 import time
 import datetime
 from concurrent.futures import ThreadPoolExecutor
+import re
 
 from . import *
 
@@ -18,18 +19,6 @@ markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol
 chat_message_dict = ExpiringDict(max_len=100, max_age_seconds=120)
 chat_user_dict = ExpiringDict(max_len=100, max_age_seconds=20)
 
-
-#### Customization ####
-Language = "zh-cn"  # "en" or "zh-cn".
-SUMMARY = "gemini"  # "cohere" or "gemini" or None
-Extra_clean = True  # Will Delete command message
-GEMINI_USE = True
-CHATGPT_USE = True
-COHERE_USE = True
-QWEN_USE = True
-CLADUE_USE = True
-LLAMA_USE = True
-
 #### Telegra.ph init ####
 # Will auto generate a token if not provided, restart will lose all TODO
 TELEGRA_PH_TOKEN = environ.get("TELEGRA_PH_TOKEN")
@@ -37,6 +26,23 @@ TELEGRA_PH_TOKEN = environ.get("TELEGRA_PH_TOKEN")
 ph = TelegraphAPI(TELEGRA_PH_TOKEN)
 
 
+#### Customization ####
+Language = "zh-cn"  # "en" or "zh-cn".
+SUMMARY = "gemini"  # "cohere" or "gemini" or None
+General_clean = True  # Will Delete LLM message
+Extra_clean = True  # Will Delete command message too
+
+#### LLMs ####
+GEMINI_USE = True
+CHATGPT_USE = True
+COHERE_USE = False  # Slow, but web search
+QWEN_USE = True
+CLADUE_USE = False  # Untested
+LLAMA_USE = False  # prompted for Language
+
+COHERE_USE_BACKGROUND = True  # Only display in telegra.ph
+LLAMA_USE_BACKGROUND = True
+
 #### LLMs init ####
 #### OpenAI init ####
 CHATGPT_API_KEY = environ.get("OPENAI_API_KEY")
@@ -72,7 +78,7 @@ if GEMINI_USE and GOOGLE_GEMINI_KEY:
     ]
 
     model = genai.GenerativeModel(
-        model_name="gemini-1.5-flash-latest",
+        model_name="gemini-1.5-pro-latest",
         generation_config=generation_config,
         safety_settings=safety_settings,
     )
@@ -84,9 +90,9 @@ if GEMINI_USE and GOOGLE_GEMINI_KEY:
 The user asked a question, and multiple AI have given answers to the same question.
 Your task is to summarize the responses from them in a concise and clear manner.
 The summary should:
-In one to two short sentences, as less as possible, and should not exceed 150 characters.
+In one to three short sentences, as less as possible.
 Your must use language of {Language} to respond.
-Start with "Summary:" or "总结:"
+Start with "Summary:" or"总结:"
 """,
     )
     convo = model.start_chat()
@@ -96,7 +102,7 @@ Start with "Summary:" or "总结:"
 #### Cohere init ####
 COHERE_API_KEY = environ.get("COHERE_API_KEY")
 
-if COHERE_USE and COHERE_API_KEY:
+if (COHERE_USE or COHERE_USE_BACKGROUND) and COHERE_API_KEY:
     import cohere
 
     COHERE_MODEL = "command-r-plus"
@@ -124,11 +130,11 @@ if CLADUE_USE and ANTHROPIC_API_KEY:
 
 #### llama init ####
 LLAMA_API_KEY = environ.get("GROQ_API_KEY")
-if LLAMA_USE and LLAMA_API_KEY:
+if (LLAMA_USE or LLAMA_USE_BACKGROUND) and LLAMA_API_KEY:
     from groq import Groq
 
     llama_client = Groq(api_key=LLAMA_API_KEY)
-    LLAMA_MODEL = "llama3-8b-8192"
+    LLAMA_MODEL = "llama3-70b-8192"
 
 
 #### init end ####
@@ -194,7 +200,9 @@ def answer_it_handler(message: Message, bot: TeleBot) -> None:
     latest_message = chat_message_dict.get(chat_id)
     m = latest_message.text.strip()
     m = enrich_text_with_urls(m)
-    full_answer = f"Question:\n{m}\n---\n"
+    full_answer = f"Question:\n{m}\n" if len(m) < 300 else ""
+    if Extra_clean:  # delete the command message
+        bot.delete_message(chat_id, message.message_id)
 
     #### Answers Thread ####
     executor = ThreadPoolExecutor(max_workers=5)
@@ -240,53 +248,32 @@ def answer_it_handler(message: Message, bot: TeleBot) -> None:
 
     print(full_chat_id_list)
 
+    if len(m) > 300:
+        full_answer += llm_answer("Question", m)
+
     ##### Telegraph #####
     final_answer(latest_message, bot, full_answer, full_chat_id_list)
-    if Extra_clean:
-        bot.delete_message(chat_id, message.message_id)
 
 
-# def thread_answers(latest_message: Message, bot: TeleBot, m: str):
-#     #### answers function init ####
-#     USE = {
-#         "gemini_answer": GEMINI_USE and GOOGLE_GEMINI_KEY,
-#         "chatgpt_answer": CHATGPT_USE and CHATGPT_API_KEY,
-#         "cohere_answer": COHERE_USE and COHERE_API_KEY,
-#         "qwen_answer": QWEN_USE and QWEN_API_KEY,
-#         # More LLMs
-#     }
+def update_time():
+    """Return the current time in UTC+8. Good for testing completion of content."""
+    return f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
 
 
-#     results = []
-#     full_chat_id_list = []
+def llm_answer(who: str, s: str) -> str:
+    """Universal llm answer format for telegra.ph. Use title so 'link#title' can be used."""
+    return f"\n\n---\n## {who}\n{s}"
 
-#     with ThreadPoolExecutor(max_workers=5) as executor:
-#         futures = {
-#             executor.submit(func, latest_message, bot, m): func
-#             for func, use in USE.items()
-#             if use
-#         }
 
-#         for future in as_completed(futures):
-#             try:
-#                 answer, message_id = future.result()
-#                 # Store the answer and message_id
-#                 results.append((message_id, answer))
-#                 full_chat_id_list.append(message_id)
-#             except Exception as e:
-#                 print(f"\n------\nthread_answers Error:\n{e}\n------\n")
-#                 continue
-
-#     # rank the results by message_id
-#     sorted_results = sorted(results)
-#     full_chat_id_list.sort()
-
-#     # final answer
-#     full_answer = f"Question:\n{m}\n---\n"
-#     for _, answer in sorted_results:
-#         full_answer += answer
-
-#     return full_answer, full_chat_id_list
+def llm_background(path: str, full_answer: str, m: str) -> str:
+    """Update the telegra.ph page with background answer result. Return new full answer."""
+    ph_path = re.search(r"https?://telegra\.ph/(.+)", path).group(1)
+    full_answer += m + update_time()
+    try:
+        ph.edit_page_md(path=ph_path, title="Answer it", markdown_text=full_answer)
+    except Exception as e:
+        print(f"\n------\nllm_background Error:\n{e}\n------\n")
+    return full_answer
 
 
 def gemini_answer(latest_message: Message, bot: TeleBot, m):
@@ -312,8 +299,7 @@ def gemini_answer(latest_message: Message, bot: TeleBot, m):
         bot_reply_markdown(reply_id, who, "Error", bot)
         return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
 
-    answer = f"\n---\n{who}:\n{s}"
-    return answer, reply_id.message_id
+    return llm_answer(who, s), reply_id.message_id
 
 
 def chatgpt_answer(latest_message: Message, bot: TeleBot, m):
@@ -350,8 +336,7 @@ def chatgpt_answer(latest_message: Message, bot: TeleBot, m):
         bot_reply_markdown(reply_id, who, "answer wrong", bot)
         return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
 
-    answer = f"\n---\n{who}:\n{s}"
-    return answer, reply_id.message_id
+    return llm_answer(who, s), reply_id.message_id
 
 
 def claude_answer(latest_message: Message, bot: TeleBot, m):
@@ -387,7 +372,7 @@ def claude_answer(latest_message: Message, bot: TeleBot, m):
         return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
 
     answer = f"\n---\n{who}:\n{s}"
-    return answer, reply_id.message_id
+    return llm_answer(who, s), reply_id.message_id
 
 
 def cohere_answer(latest_message: Message, bot: TeleBot, m):
@@ -456,8 +441,8 @@ def cohere_answer(latest_message: Message, bot: TeleBot, m):
         print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
         bot_reply_markdown(reply_id, who, "Answer wrong", bot)
         return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
-    answer = f"\n---\n{who}:\n{content}"
-    return answer, reply_id.message_id
+
+    return llm_answer(who, content), reply_id.message_id
 
 
 def qwen_answer(latest_message: Message, bot: TeleBot, m):
@@ -491,8 +476,7 @@ def qwen_answer(latest_message: Message, bot: TeleBot, m):
         bot_reply_markdown(reply_id, who, "answer wrong", bot)
         return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
 
-    answer = f"\n---\n{who}:\n{s}"
-    return answer, reply_id.message_id
+    return llm_answer(who, s), reply_id.message_id
 
 
 def llama_answer(latest_message: Message, bot: TeleBot, m):
@@ -503,9 +487,10 @@ def llama_answer(latest_message: Message, bot: TeleBot, m):
         r = llama_client.chat.completions.create(
             messages=[
                 {
-                    "role": "user",
-                    "content": f"{m}\nMotes: You must use language of {Language} to respond.",
-                }
+                    "role": "system",
+                    "content": f"You must use language of {Language} to respond.",
+                },
+                {"role": "user", "content": m},
             ],
             max_tokens=8192,
             model=LLAMA_MODEL,
@@ -531,8 +516,7 @@ def llama_answer(latest_message: Message, bot: TeleBot, m):
         bot_reply_markdown(reply_id, who, "answer wrong", bot)
         return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
 
-    answer = f"\n---\n{who}:\n{s}"
-    return answer, reply_id.message_id
+    return llm_answer(who, s), reply_id.message_id
 
 
 # TODO: Perplexity looks good. `pplx_answer`
@@ -542,11 +526,18 @@ def final_answer(latest_message: Message, bot: TeleBot, full_answer: str, answer
     """final answer"""
     who = "Answer it"
     reply_id = bot_reply_first(latest_message, who, bot)
+
+    # If disappeared means the answer is not complete in telegra.ph
+    full_answer += update_time()
+
+    # greate new telegra.ph page
     ph_s = ph.create_page_md(title="Answer it", markdown_text=full_answer)
     bot_reply_markdown(reply_id, who, f"**[Full Answer]({ph_s})**", bot)
+
     # delete the chat message, only leave a telegra.ph link
-    for i in answers_list:
-        bot.delete_message(latest_message.chat.id, i)
+    if General_clean:
+        for i in answers_list:
+            bot.delete_message(latest_message.chat.id, i)
 
     #### Summary ####
     if SUMMARY == None:
@@ -558,6 +549,77 @@ def final_answer(latest_message: Message, bot: TeleBot, full_answer: str, answer
     else:
         pass
 
+    #### Background LLM ####
+    # Run background llm, no show to telegram, just update the page, Good for slow llm
+    if LLAMA_USE_BACKGROUND and LLAMA_API_KEY:
+        llama_b_m = background_llama(latest_message.text)
+        print(llama_b_m)
+        full_answer = llm_background(ph_s, full_answer, llama_b_m)
+    if COHERE_USE_BACKGROUND and COHERE_API_KEY:
+        cohere_b_m = background_cohere(latest_message.text)
+        print(cohere_b_m)
+        full_answer = llm_background(ph_s, full_answer, cohere_b_m)
+
+
+def background_cohere(m: str) -> str:
+    """we run cohere get the full answer in background"""
+    who = "Command R Plus"
+    try:
+        stream = co.chat_stream(
+            model=COHERE_MODEL,
+            message=m,
+            temperature=0.8,
+            chat_history=[],  # One time, so no need for chat history
+            prompt_truncation="AUTO",
+            connectors=[{"id": "web-search"}],
+            citation_quality="accurate",
+            preamble="",
+        )
+        s = ""
+        source = ""
+        for event in stream:
+            if event.event_type == "search-results":
+                for doc in event.documents:
+                    source += f"\n{doc['title']}\n{doc['url']}\n"
+            elif event.event_type == "text-generation":
+                s += event.text.encode("utf-8").decode("utf-8", "ignore")
+            elif event.event_type == "stream-end":
+                break
+        content = llm_answer(who, f"{s}\n\n---\n{source}")
+
+    except Exception as e:
+        print(f"\n------\nbackground_cohere Error:\n{e}\n------\n")
+        content = llm_answer(who, "Background Answer wrong")
+    return content
+
+
+def background_llama(m: str) -> str:
+    """we run llama get the full answer in background"""
+    who = "llama"
+    try:
+        r = llama_client.chat.completions.create(
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"You must use language of {Language} to respond.",
+                },
+                {"role": "user", "content": m},
+            ],
+            max_tokens=8192,
+            model=LLAMA_MODEL,
+            stream=True,
+        )
+        s = ""
+        for chunk in r:
+            if chunk.choices[0].delta.content is None:
+                break
+            s += chunk.choices[0].delta.content
+
+    except Exception as e:
+        print(f"\n------\nbackground_llama Error:\n{e}\n------\n")
+        s = "Background Answer wrong"
+    return llm_answer(who, s)
+
 
 def summary_cohere(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) -> None:
     """Receive the full text, and the final_answer's chat_id, update with a summary."""

From 4755a4cedd62a6fe4b3d6a556739a1a837b640d8 Mon Sep 17 00:00:00 2001
From: Alter-xyz <88554920+alterxyz@users.noreply.github.com>
Date: Sat, 29 Jun 2024 03:10:08 -0400
Subject: [PATCH 5/6] feat: Web Preview (Instant View) switch for cleaner look

---
 handlers/__init__.py | 20 ++++++++++----------
 handlers/useful.py   | 41 ++++++++++++++++++++++++++---------------
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/handlers/__init__.py b/handlers/__init__.py
index 79ca3ca..db934e1 100644
--- a/handlers/__init__.py
+++ b/handlers/__init__.py
@@ -7,7 +7,6 @@ import traceback
 from functools import update_wrapper
 from pathlib import Path
 from typing import Any, Callable, TypeVar
-from expiringdict import ExpiringDict
 
 import requests
 from telebot import TeleBot
@@ -24,8 +23,6 @@ T = TypeVar("T", bound=Callable)
 
 BOT_MESSAGE_LENGTH = 4000
 
-REPLY_MESSAGE_CACHE = ExpiringDict(max_len=1000, max_age_seconds=300)
-
 
 def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
     """Create the first reply message which make user feel the bot is working."""
@@ -35,24 +32,25 @@ def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
 
 
 def bot_reply_markdown(
-    reply_id: Message, who: str, text: str, bot: TeleBot, split_text: bool = True
+    reply_id: Message,
+    who: str,
+    text: str,
+    bot: TeleBot,
+    split_text: bool = True,
+    disable_web_page_preview: bool = False,
 ) -> bool:
     """
     reply the Markdown by take care of the message length.
     it will fallback to plain text in case of any failure
     """
     try:
-        cache_key = f"{reply_id.chat.id}_{reply_id.message_id}"
-        if cache_key in REPLY_MESSAGE_CACHE and REPLY_MESSAGE_CACHE[cache_key] == text:
-            print(f"Skipping duplicate message for {cache_key}")
-            return True
-        REPLY_MESSAGE_CACHE[cache_key] = text
         if len(text.encode("utf-8")) <= BOT_MESSAGE_LENGTH or not split_text:
             bot.edit_message_text(
                 f"*{who}*:\n{telegramify_markdown.convert(text)}",
                 chat_id=reply_id.chat.id,
                 message_id=reply_id.message_id,
                 parse_mode="MarkdownV2",
+                disable_web_page_preview=disable_web_page_preview,
             )
             return True
 
@@ -63,6 +61,7 @@ def bot_reply_markdown(
             chat_id=reply_id.chat.id,
             message_id=reply_id.message_id,
             parse_mode="MarkdownV2",
+            disable_web_page_preview=disable_web_page_preview,
         )
         for i in range(1, len(msgs)):
             bot.reply_to(
@@ -79,6 +78,7 @@ def bot_reply_markdown(
             f"*{who}*:\n{text}",
             chat_id=reply_id.chat.id,
             message_id=reply_id.message_id,
+            disable_web_page_preview=disable_web_page_preview,
         )
         return False
 
@@ -286,7 +286,7 @@ class TelegraphAPI:
         data = {
             "access_token": self.access_token,
             "title": title,
-            "content": json.dumps(content, ensure_ascii=False),
+            "content": json.dumps(content),
             "return_content": return_content,
             "author_name": author_name if author_name else self.author_name,
             "author_url": author_url if author_url else self.author_url,
diff --git a/handlers/useful.py b/handlers/useful.py
index 2fe1975..1895414 100644
--- a/handlers/useful.py
+++ b/handlers/useful.py
@@ -31,17 +31,18 @@ Language = "zh-cn"  # "en" or "zh-cn".
 SUMMARY = "gemini"  # "cohere" or "gemini" or None
 General_clean = True  # Will Delete LLM message
 Extra_clean = True  # Will Delete command message too
-
+Link_Clean = False  # True will disable Instant View / Web Preview
 #### LLMs ####
 GEMINI_USE = True
 CHATGPT_USE = True
-COHERE_USE = False  # Slow, but web search
+CLADUE_USE = True
 QWEN_USE = True
-CLADUE_USE = False  # Untested
+
+COHERE_USE = False  # Slow, but web search
 LLAMA_USE = False  # prompted for Language
 
 COHERE_USE_BACKGROUND = True  # Only display in telegra.ph
-LLAMA_USE_BACKGROUND = True
+LLAMA_USE_BACKGROUND = True  # But telegra.ph's **instant view** may not up to date
 
 #### LLMs init ####
 #### OpenAI init ####
@@ -542,25 +543,33 @@ def final_answer(latest_message: Message, bot: TeleBot, full_answer: str, answer
     #### Summary ####
     if SUMMARY == None:
         pass
-    elif COHERE_USE and COHERE_API_KEY and SUMMARY == "cohere":
-        summary_cohere(bot, full_answer, ph_s, reply_id)
-    elif GEMINI_USE and GOOGLE_GEMINI_KEY and SUMMARY == "gemini":
-        summary_gemini(bot, full_answer, ph_s, reply_id)
     else:
-        pass
+        s = llm_summary(bot, full_answer, ph_s, reply_id)
+        bot_reply_markdown(reply_id, who, s, bot, disable_web_page_preview=True)
 
     #### Background LLM ####
-    # Run background llm, no show to telegram, just update the page, Good for slow llm
+    # Run background llm, no show to telegram, just update the ph page, Good for slow llm
     if LLAMA_USE_BACKGROUND and LLAMA_API_KEY:
         llama_b_m = background_llama(latest_message.text)
-        print(llama_b_m)
         full_answer = llm_background(ph_s, full_answer, llama_b_m)
+
     if COHERE_USE_BACKGROUND and COHERE_API_KEY:
         cohere_b_m = background_cohere(latest_message.text)
-        print(cohere_b_m)
         full_answer = llm_background(ph_s, full_answer, cohere_b_m)
 
 
+def llm_summary(bot, full_answer, ph_s, reply_id) -> str:
+    """llm summary return the summary of the full answer."""
+    if SUMMARY == "gemini":
+        s = summary_gemini(bot, full_answer, ph_s, reply_id)
+    elif SUMMARY == "cohere":
+        s = summary_cohere(bot, full_answer, ph_s, reply_id)
+    else:
+        print(f"\n---\nSummary Fail\n---\n")
+        s = f"**[Full Answer]({ph_s})**\n~~Summary Answer Wrong~~\n"
+    return s
+
+
 def background_cohere(m: str) -> str:
     """we run cohere get the full answer in background"""
     who = "Command R Plus"
@@ -621,7 +630,7 @@ def background_llama(m: str) -> str:
     return llm_answer(who, s)
 
 
-def summary_cohere(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) -> None:
+def summary_cohere(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) -> str:
     """Receive the full text, and the final_answer's chat_id, update with a summary."""
     who = "Answer it"
 
@@ -674,6 +683,7 @@ Start with "Summary:" or "总结:"
             bot_reply_markdown(reply_id, who, s, bot)
         except:
             pass
+        return s
 
     except Exception as e:
         if Language == "zh-cn":
@@ -689,9 +699,9 @@ def summary_gemini(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) ->
 
     # inherit
     if Language == "zh-cn":
-        s = f"**[全文]({ph_s})** | "
+        s = f"**[🔗全文]({ph_s})** | "
     elif Language == "en":
-        s = f"**[Full Answer]({ph_s})** | "
+        s = f"**[🔗Full Answer]({ph_s})** | "
 
     try:
         r = convo_summary.send_message(full_answer, stream=True)
@@ -703,6 +713,7 @@ def summary_gemini(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) ->
                 bot_reply_markdown(reply_id, who, s, bot, split_text=False)
         bot_reply_markdown(reply_id, who, s, bot)
         convo_summary.history.clear()
+        return s
     except Exception as e:
         if Language == "zh-cn":
             bot_reply_markdown(reply_id, who, f"[全文]({ph_s})", bot)

From 15404d91a3c2d324808c640a1e605984a2fb7730 Mon Sep 17 00:00:00 2001
From: Alter-xyz <88554920+alterxyz@users.noreply.github.com>
Date: Sat, 29 Jun 2024 08:58:49 -0400
Subject: [PATCH 6/6] feat: skip update duplicate message

---
 handlers/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/handlers/__init__.py b/handlers/__init__.py
index db934e1..0f5d961 100644
--- a/handlers/__init__.py
+++ b/handlers/__init__.py
@@ -15,6 +15,7 @@ from telebot.util import smart_split
 import telegramify_markdown
 from telegramify_markdown.customize import markdown_symbol
 from urlextract import URLExtract
+from expiringdict import ExpiringDict
 
 markdown_symbol.head_level_1 = "📌"  # If you want, Customizing the head level 1 symbol
 markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol
@@ -23,6 +24,8 @@ T = TypeVar("T", bound=Callable)
 
 BOT_MESSAGE_LENGTH = 4000
 
+REPLY_MESSAGE_CACHE = ExpiringDict(max_len=1000, max_age_seconds=300)
+
 
 def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
     """Create the first reply message which make user feel the bot is working."""
@@ -44,6 +47,11 @@ def bot_reply_markdown(
     it will fallback to plain text in case of any failure
     """
     try:
+        cache_key = f"{reply_id.chat.id}_{reply_id.message_id}"
+        if cache_key in REPLY_MESSAGE_CACHE and REPLY_MESSAGE_CACHE[cache_key] == text:
+            print(f"Skipping duplicate message for {cache_key}")
+            return True
+        REPLY_MESSAGE_CACHE[cache_key] = text
         if len(text.encode("utf-8")) <= BOT_MESSAGE_LENGTH or not split_text:
             bot.edit_message_text(
                 f"*{who}*:\n{telegramify_markdown.convert(text)}",