Merge pull request #39 from alterxyz/main

Answer_it refine
This commit is contained in:
yihong 2024-07-02 14:41:49 +08:00 committed by GitHub
commit 4d0b9ee799
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 625 additions and 255 deletions

View File

@ -15,6 +15,7 @@ from telebot.util import smart_split
import telegramify_markdown
from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract
from expiringdict import ExpiringDict
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
@ -23,6 +24,8 @@ T = TypeVar("T", bound=Callable)
BOT_MESSAGE_LENGTH = 4000
REPLY_MESSAGE_CACHE = ExpiringDict(max_len=1000, max_age_seconds=300)
def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
"""Create the first reply message which make user feel the bot is working."""
@ -32,19 +35,30 @@ def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
def bot_reply_markdown(
reply_id: Message, who: str, text: str, bot: TeleBot, split_text: bool = True
reply_id: Message,
who: str,
text: str,
bot: TeleBot,
split_text: bool = True,
disable_web_page_preview: bool = False,
) -> bool:
"""
reply the Markdown by take care of the message length.
it will fallback to plain text in case of any failure
"""
try:
cache_key = f"{reply_id.chat.id}_{reply_id.message_id}"
if cache_key in REPLY_MESSAGE_CACHE and REPLY_MESSAGE_CACHE[cache_key] == text:
print(f"Skipping duplicate message for {cache_key}")
return True
REPLY_MESSAGE_CACHE[cache_key] = text
if len(text.encode("utf-8")) <= BOT_MESSAGE_LENGTH or not split_text:
bot.edit_message_text(
f"*{who}*:\n{telegramify_markdown.convert(text)}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
parse_mode="MarkdownV2",
disable_web_page_preview=disable_web_page_preview,
)
return True
@ -55,6 +69,7 @@ def bot_reply_markdown(
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
parse_mode="MarkdownV2",
disable_web_page_preview=disable_web_page_preview,
)
for i in range(1, len(msgs)):
bot.reply_to(
@ -71,6 +86,7 @@ def bot_reply_markdown(
f"*{who}*:\n{text}",
chat_id=reply_id.chat.id,
message_id=reply_id.message_id,
disable_web_page_preview=disable_web_page_preview,
)
return False
@ -278,7 +294,7 @@ class TelegraphAPI:
data = {
"access_token": self.access_token,
"title": title,
"content": json.dumps(content, ensure_ascii=False),
"content": json.dumps(content),
"return_content": return_content,
"author_name": author_name if author_name else self.author_name,
"author_url": author_url if author_url else self.author_url,

View File

@ -1,6 +1,7 @@
from os import environ
import time
import datetime
import re
from telebot import TeleBot
from telebot.types import Message
@ -16,7 +17,7 @@ markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
COHERE_API_KEY = environ.get("COHERE_API_KEY")
COHERE_MODEL = "command-r-plus"
COHERE_MODEL = "command-r-plus" # command-r may cause Chinese garbled code, and non stream mode also may cause garbled code.
if COHERE_API_KEY:
co = cohere.Client(api_key=COHERE_API_KEY)
@ -27,98 +28,27 @@ ph = TelegraphAPI(TELEGRA_PH_TOKEN)
cohere_player_dict = ExpiringDict(max_len=1000, max_age_seconds=300)
def cohere_handler_direct(message: Message, bot: TeleBot) -> None:
"""cohere : /cohere <question>"""
m = message.text.strip()
def clean_text(text):
"""Clean up the garbled code in the UTF-8 encoded Chinese string.
player_message = []
if str(message.from_user.id) not in cohere_player_dict:
cohere_player_dict[str(message.from_user.id)] = player_message
Args:
text: String that needs to be cleaned.
Returns:
The cleaned string, if garbled code is detected, a prompt message is added at the end.
"""
if "<EFBFBD>" in text:
# Use re.sub to clean up garbled code
cleaned_text = re.sub(r"<EFBFBD>.*?([,。!?;:]|$)", r"\1", text)
cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()
print(f"\n---------\nOriginal text:\n{text}\n---------")
return cleaned_text + "\n\n~~(乱码已去除,可能存在错误,请注意)~~"
else:
player_message = cohere_player_dict[str(message.from_user.id)]
if m.strip() == "clear":
bot.reply_to(
message,
"Just cleared your Cohere messages history",
)
player_message.clear()
return
if m[:4].lower() == "new ":
m = m[4:].strip()
player_message.clear()
m = enrich_text_with_urls(m)
who = "Command R Plus"
reply_id = bot_reply_first(message, who, bot)
player_message.append({"role": "User", "message": m})
# keep the last 5, every has two ask and answer.
if len(player_message) > 10:
player_message = player_message[2:]
try:
stream = co.chat_stream(
model=COHERE_MODEL,
message=m,
temperature=0.8,
chat_history=player_message,
prompt_truncation="AUTO",
connectors=[{"id": "web-search"}],
citation_quality="accurate",
preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
)
s = ""
source = ""
start = time.time()
for event in stream:
if event.event_type == "stream-start":
bot_reply_markdown(reply_id, who, "Thinking...", bot)
elif event.event_type == "search-queries-generation":
bot_reply_markdown(reply_id, who, "Searching online...", bot)
elif event.event_type == "search-results":
bot_reply_markdown(reply_id, who, "Reading...", bot)
for doc in event.documents:
source += f"\n[{doc['title']}]({doc['url']})"
elif event.event_type == "text-generation":
s += event.text.encode("utf-8").decode("utf-8")
if time.time() - start > 0.4:
start = time.time()
bot_reply_markdown(
reply_id,
who,
f"\nStill thinking{len(s)}...",
bot,
split_text=True,
)
elif event.event_type == "stream-end":
break
s += "\n" + source + "\n"
try:
bot_reply_markdown(reply_id, who, s, bot, split_text=True)
except:
pass
player_message.append(
{
"role": "Chatbot",
"message": convert(s),
}
)
except Exception as e:
print(e)
bot_reply_markdown(reply_id, who, "Answer wrong", bot)
player_message.clear()
return
return text
def cohere_handler(message: Message, bot: TeleBot) -> None:
"""cohere : /cohere <question> This will return a telegraph link"""
"""cohere : /cohere_pro <question> Come with a telegraph link"""
m = message.text.strip()
player_message = []
@ -150,6 +80,14 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
player_message = player_message[2:]
try:
current_time = datetime.datetime.now(datetime.timezone.utc)
preamble = (
f"You are Command, a large language model trained to have polite, helpful, and inclusive conversations with people. Your responses should be accurate and graceful in user's original language."
f"The current UTC time is {current_time.strftime('%Y-%m-%d %H:%M:%S')}, "
f"UTC-4 (e.g. New York) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-4))).strftime('%Y-%m-%d %H:%M:%S')}, "
f"UTC-7 (e.g. Los Angeles) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, "
f"and UTC+8 (e.g. Beijing) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}."
)
stream = co.chat_stream(
model=COHERE_MODEL,
message=m,
@ -158,7 +96,7 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
prompt_truncation="AUTO",
connectors=[{"id": "web-search"}],
citation_quality="accurate",
preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
preamble=preamble,
)
s = ""
@ -175,22 +113,32 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
source += f"\n{doc['title']}\n{doc['url']}\n"
elif event.event_type == "text-generation":
s += event.text.encode("utf-8").decode("utf-8")
if time.time() - start > 0.4:
if time.time() - start > 1.4:
start = time.time()
bot_reply_markdown(
reply_id,
who,
f"\nStill thinking{len(s)}...",
bot,
split_text=True,
)
s = clean_text(s)
if len(s) > 3900:
bot_reply_markdown(
reply_id,
who,
f"\nStill thinking{len(s)}...\n",
bot,
split_text=True,
)
else:
bot_reply_markdown(
reply_id,
who,
f"\nStill thinking{len(s)}...\n{s}",
bot,
split_text=True,
)
elif event.event_type == "stream-end":
break
content = (
s
+ "\n------\n------\n"
+ source
+ f"\n------\n------\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+ f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
)
ph_s = ph.create_page_md(
title="Cohere", markdown_text=content
@ -218,14 +166,6 @@ def cohere_handler(message: Message, bot: TeleBot) -> None:
if COHERE_API_KEY:
def register(bot: TeleBot) -> None:
bot.register_message_handler(
cohere_handler_direct, commands=["cohere_no_ph"], pass_bot=True
)
bot.register_message_handler(
cohere_handler_direct, regexp="^cohere_no_ph:", pass_bot=True
)
def register(bot: TeleBot) -> None:
bot.register_message_handler(cohere_handler, commands=["cohere"], pass_bot=True)
bot.register_message_handler(cohere_handler, regexp="^cohere:", pass_bot=True)

View File

@ -7,81 +7,138 @@ from os import environ
import time
import datetime
from concurrent.futures import ThreadPoolExecutor
from openai import OpenAI
import google.generativeai as genai
from telebot import TeleBot
from together import Together
from telebot.types import Message
import re
from . import *
from telegramify_markdown.customize import markdown_symbol
#### Cohere init ####
import cohere
COHERE_API_KEY = environ.get("COHERE_API_KEY")
COHERE_MODEL = "command-r-plus"
# if you want to use cohere for answer it, set it to True
USE_CHHERE = False
USE_CLAUDE = True
if COHERE_API_KEY:
co = cohere.Client(api_key=COHERE_API_KEY)
#### Telegraph init ####
TELEGRA_PH_TOKEN = environ.get("TELEGRA_PH_TOKEN")
ph = TelegraphAPI(TELEGRA_PH_TOKEN)
#### Telegraph done ####
# If you want, Customizing the head level 1 symbol
markdown_symbol.head_level_1 = "📌"
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
chat_message_dict = ExpiringDict(max_len=100, max_age_seconds=120)
chat_user_dict = ExpiringDict(max_len=100, max_age_seconds=20)
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
#### Telegra.ph init ####
# Will auto generate a token if not provided, restart will lose all TODO
TELEGRA_PH_TOKEN = environ.get("TELEGRA_PH_TOKEN")
# Edit "Store_Token = False" in "__init__.py" to True to store it
ph = TelegraphAPI(TELEGRA_PH_TOKEN)
GOOGLE_GEMINI_KEY = environ.get("GOOGLE_GEMINI_KEY")
genai.configure(api_key=GOOGLE_GEMINI_KEY)
#### Customization ####
Language = "zh-cn" # "en" or "zh-cn".
SUMMARY = "gemini" # "cohere" or "gemini" or None
General_clean = True # Will Delete LLM message
Extra_clean = True # Will Delete command message too
Link_Clean = False # True will disable Instant View / Web Preview
#### LLMs ####
GEMINI_USE = True
CHATGPT_USE = True
CLADUE_USE = True
QWEN_USE = True
generation_config = {
"temperature": 0.7,
"top_p": 1,
"top_k": 1,
"max_output_tokens": 8192,
}
COHERE_USE = False # Slow, but web search
LLAMA_USE = False # prompted for Language
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
]
COHERE_USE_BACKGROUND = True # Only display in telegra.ph
LLAMA_USE_BACKGROUND = True # But telegra.ph's **instant view** may not up to date
model = genai.GenerativeModel(
model_name="gemini-1.5-flash-latest",
generation_config=generation_config,
safety_settings=safety_settings,
)
#### ChatGPT init ####
#### LLMs init ####
#### OpenAI init ####
CHATGPT_API_KEY = environ.get("OPENAI_API_KEY")
CHATGPT_BASE_URL = environ.get("OPENAI_API_BASE") or "https://api.openai.com/v1"
if CHATGPT_USE and CHATGPT_API_KEY:
from openai import OpenAI
CHATGPT_PRO_MODEL = "gpt-4o-2024-05-13"
client = OpenAI(api_key=CHATGPT_API_KEY, base_url=CHATGPT_BASE_URL, timeout=300)
#### Gemini init ####
GOOGLE_GEMINI_KEY = environ.get("GOOGLE_GEMINI_KEY")
if GEMINI_USE and GOOGLE_GEMINI_KEY:
import google.generativeai as genai
from google.generativeai import ChatSession
from google.generativeai.types.generation_types import StopCandidateException
genai.configure(api_key=GOOGLE_GEMINI_KEY)
generation_config = {
"temperature": 0.7,
"top_p": 1,
"top_k": 1,
"max_output_tokens": 8192,
}
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
]
model = genai.GenerativeModel(
model_name="gemini-1.5-pro-latest",
generation_config=generation_config,
safety_settings=safety_settings,
)
model_flash = genai.GenerativeModel(
model_name="gemini-1.5-flash-latest",
generation_config=generation_config,
safety_settings=safety_settings,
system_instruction=f"""
The user asked a question, and multiple AI have given answers to the same question.
Your task is to summarize the responses from them in a concise and clear manner.
The summary should:
In one to three short sentences, as less as possible.
Your must use language of {Language} to respond.
Start with "Summary:" or"总结:"
""",
)
convo = model.start_chat()
convo_summary = model_flash.start_chat()
#### Cohere init ####
COHERE_API_KEY = environ.get("COHERE_API_KEY")
if (COHERE_USE or COHERE_USE_BACKGROUND) and COHERE_API_KEY:
import cohere
COHERE_MODEL = "command-r-plus"
co = cohere.Client(api_key=COHERE_API_KEY)
#### Qwen init ####
QWEN_API_KEY = environ.get("TOGETHER_API_KEY")
QWEN_MODEL = "Qwen/Qwen2-72B-Instruct"
CHATGPT_PRO_MODEL = "gpt-4o-2024-05-13"
#### CLAUDE ####
if QWEN_USE and QWEN_API_KEY:
from together import Together
QWEN_MODEL = "Qwen/Qwen2-72B-Instruct"
qwen_client = Together(api_key=QWEN_API_KEY)
#### Claude init ####
ANTHROPIC_API_KEY = environ.get("ANTHROPIC_API_KEY")
ANTHROPIC_BASE_URL = environ.get("ANTHROPIC_BASE_URL")
ANTHROPIC_MODEL = "claude-3-5-sonnet-20240620"
# use openai for claude
claude_client = OpenAI(
api_key=ANTHROPIC_API_KEY, base_url=ANTHROPIC_BASE_URL, timeout=20
)
if CLADUE_USE and ANTHROPIC_API_KEY:
ANTHROPIC_BASE_URL = environ.get("ANTHROPIC_BASE_URL")
ANTHROPIC_MODEL = "claude-3-5-sonnet-20240620"
claude_client = OpenAI(
api_key=ANTHROPIC_API_KEY, base_url=ANTHROPIC_BASE_URL, timeout=20
)
client = OpenAI(api_key=CHATGPT_API_KEY, base_url=CHATGPT_BASE_URL, timeout=300)
qwen_client = Together(api_key=QWEN_API_KEY, timeout=300)
#### llama init ####
LLAMA_API_KEY = environ.get("GROQ_API_KEY")
if (LLAMA_USE or LLAMA_USE_BACKGROUND) and LLAMA_API_KEY:
from groq import Groq
llama_client = Groq(api_key=LLAMA_API_KEY)
LLAMA_MODEL = "llama3-70b-8192"
#### init end ####
def md_handler(message: Message, bot: TeleBot):
@ -134,35 +191,7 @@ def latest_handle_messages(message: Message, bot: TeleBot):
print(chat_message_dict[chat_id].text)
def get_gpt_answer(message):
chatgpt_reply_text = ""
player_message = [{"role": "user", "content": message}]
try:
r = client.chat.completions.create(
messages=player_message, max_tokens=4096, model=CHATGPT_PRO_MODEL
)
chatgpt_reply_text = r.choices[0].message.content.encode("utf8").decode()
except Exception as e:
print(e)
chatgpt_reply_text = "answer wrong"
return chatgpt_reply_text
def get_claude_answer(message):
chatgpt_reply_text = ""
player_message = [{"role": "user", "content": message}]
try:
r = claude_client.chat.completions.create(
messages=player_message, max_tokens=4096, model=ANTHROPIC_MODEL
)
chatgpt_reply_text = r.choices[0].message.content.encode("utf8").decode()
except Exception as e:
print(e)
chatgpt_reply_text = "answer wrong"
return chatgpt_reply_text
def answer_it_handler(message: Message, bot: TeleBot):
def answer_it_handler(message: Message, bot: TeleBot) -> None:
"""answer_it: /answer_it"""
# answer the last message in the chat group
who = "answer_it"
@ -172,85 +201,205 @@ def answer_it_handler(message: Message, bot: TeleBot):
latest_message = chat_message_dict.get(chat_id)
m = latest_message.text.strip()
m = enrich_text_with_urls(m)
full = "Question:\n" + m + "\n---\n"
##### Gemini #####
full_answer = f"Question:\n{m}\n" if len(m) < 300 else ""
if Extra_clean: # delete the command message
bot.delete_message(chat_id, message.message_id)
#### Answers Thread ####
executor = ThreadPoolExecutor(max_workers=5)
if GEMINI_USE and GOOGLE_GEMINI_KEY:
gemini_future = executor.submit(gemini_answer, latest_message, bot, m)
if CHATGPT_USE and CHATGPT_API_KEY:
chatgpt_future = executor.submit(chatgpt_answer, latest_message, bot, m)
if COHERE_USE and COHERE_API_KEY:
cohere_future = executor.submit(cohere_answer, latest_message, bot, m)
if QWEN_USE and QWEN_API_KEY:
qwen_future = executor.submit(qwen_answer, latest_message, bot, m)
if CLADUE_USE and ANTHROPIC_API_KEY:
claude_future = executor.submit(claude_answer, latest_message, bot, m)
if LLAMA_USE and LLAMA_API_KEY:
llama_future = executor.submit(llama_answer, latest_message, bot, m)
#### Answers List ####
full_chat_id_list = []
if GEMINI_USE and GOOGLE_GEMINI_KEY:
answer_gemini, gemini_chat_id = gemini_future.result()
full_chat_id_list.append(gemini_chat_id)
full_answer += answer_gemini
if CHATGPT_USE and CHATGPT_API_KEY:
anaswer_chatgpt, chatgpt_chat_id = chatgpt_future.result()
full_chat_id_list.append(chatgpt_chat_id)
full_answer += anaswer_chatgpt
if COHERE_USE and COHERE_API_KEY:
answer_cohere, cohere_chat_id = cohere_future.result()
full_chat_id_list.append(cohere_chat_id)
full_answer += answer_cohere
if QWEN_USE and QWEN_API_KEY:
answer_qwen, qwen_chat_id = qwen_future.result()
full_chat_id_list.append(qwen_chat_id)
full_answer += answer_qwen
if CLADUE_USE and ANTHROPIC_API_KEY:
answer_claude, claude_chat_id = claude_future.result()
full_chat_id_list.append(claude_chat_id)
full_answer += answer_claude
if LLAMA_USE and LLAMA_API_KEY:
answer_llama, llama_chat_id = llama_future.result()
full_chat_id_list.append(llama_chat_id)
full_answer += answer_llama
print(full_chat_id_list)
if len(m) > 300:
full_answer += llm_answer("Question", m)
##### Telegraph #####
final_answer(latest_message, bot, full_answer, full_chat_id_list)
def update_time():
"""Return the current time in UTC+8. Good for testing completion of content."""
return f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
def llm_answer(who: str, s: str) -> str:
"""Universal llm answer format for telegra.ph. Use title so 'link#title' can be used."""
return f"\n\n---\n## {who}\n{s}"
def llm_background(path: str, full_answer: str, m: str) -> str:
"""Update the telegra.ph page with background answer result. Return new full answer."""
ph_path = re.search(r"https?://telegra\.ph/(.+)", path).group(1)
full_answer += m + update_time()
try:
ph.edit_page_md(path=ph_path, title="Answer it", markdown_text=full_answer)
except Exception as e:
print(f"\n------\nllm_background Error:\n{e}\n------\n")
return full_answer
def gemini_answer(latest_message: Message, bot: TeleBot, m):
"""gemini answer"""
who = "Gemini Pro"
# show something, make it more responsible
reply_id = bot_reply_first(latest_message, who, bot)
#### excutor thread ####
executor = ThreadPoolExecutor(max_workers=5)
chatgpt_thread = executor.submit(get_gpt_answer, m)
claude_thread = None
claude_answer = ""
if ANTHROPIC_API_KEY:
claude_thread = executor.submit(get_claude_answer, m)
try:
r = model.generate_content(m, stream=True)
r = convo.send_message(m, stream=True)
s = ""
start = time.time()
for e in r:
s += e.text
if time.time() - start > 1.5:
if time.time() - start > 1.7:
start = time.time()
bot_reply_markdown(reply_id, who, s, bot, split_text=False)
bot_reply_markdown(reply_id, who, s, bot)
convo.history.clear()
except Exception as e:
print(e)
print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
convo.history.clear()
bot_reply_markdown(reply_id, who, "Error", bot)
return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
full += f"{who}:\n{s}"
chat_id_list = [reply_id.message_id]
return llm_answer(who, s), reply_id.message_id
##### ChatGPT #####
def chatgpt_answer(latest_message: Message, bot: TeleBot, m):
"""chatgpt answer"""
who = "ChatGPT Pro"
reply_id = bot_reply_first(latest_message, who, bot)
# get gpt answer using thread
chatgpt_answer = chatgpt_thread.result()
bot_reply_markdown(reply_id, who, chatgpt_answer, bot)
player_message = [{"role": "user", "content": m}]
full += f"\n---\n{who}:\n{chatgpt_answer}"
chat_id_list.append(reply_id.message_id)
try:
r = client.chat.completions.create(
messages=player_message,
max_tokens=4096,
model=CHATGPT_PRO_MODEL,
stream=True,
)
s = ""
start = time.time()
for chunk in r:
if chunk.choices[0].delta.content is None:
break
s += chunk.choices[0].delta.content
if time.time() - start > 1.5:
start = time.time()
bot_reply_markdown(reply_id, who, s, bot, split_text=False)
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot)
except:
pass
##### Claude #####
if USE_CLAUDE and ANTHROPIC_API_KEY:
who = "Claude Pro"
claude_answer = claude_thread.result()
reply_id = bot_reply_first(latest_message, who, bot)
bot_reply_markdown(reply_id, who, claude_answer, bot)
except Exception as e:
print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
bot_reply_markdown(reply_id, who, "answer wrong", bot)
return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
full += f"\n---\n{who}:\n{claude_answer}"
chat_id_list.append(reply_id.message_id)
##### Cohere #####
if USE_CHHERE and COHERE_API_KEY:
full, chat_id = cohere_answer(latest_message, bot, full, m)
chat_id_list.append(chat_id)
else:
pass
##### Telegraph #####
final_answer(latest_message, bot, full, chat_id_list)
return llm_answer(who, s), reply_id.message_id
def cohere_answer(latest_message: Message, bot: TeleBot, full, m):
def claude_answer(latest_message: Message, bot: TeleBot, m):
"""claude answer"""
who = "Claude Pro"
reply_id = bot_reply_first(latest_message, who, bot)
try:
r = claude_client.chat.completions.create(
messages=[{"role": "user", "content": m}],
max_tokens=4096,
model=ANTHROPIC_MODEL,
stream=True,
)
s = ""
start = time.time()
for chunk in r:
if chunk.choices[0].delta.content is None:
break
s += chunk.choices[0].delta.content
if time.time() - start > 1.5:
start = time.time()
bot_reply_markdown(reply_id, who, s, bot, split_text=False)
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot)
except:
pass
except Exception as e:
print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
bot_reply_markdown(reply_id, who, "answer wrong", bot)
return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
answer = f"\n---\n{who}:\n{s}"
return llm_answer(who, s), reply_id.message_id
def cohere_answer(latest_message: Message, bot: TeleBot, m):
"""cohere answer"""
who = "Command R Plus"
reply_id = bot_reply_first(latest_message, who, bot)
player_message = [{"role": "User", "message": m}]
try:
current_time = datetime.datetime.now(datetime.timezone.utc)
preamble = (
f"You are Command R Plus, a large language model trained to have polite, helpful, inclusive conversations with people. People are looking for information that may need you to search online. Make an accurate and fast response. If there are no search results, then provide responses based on your general knowledge(It's fine if it's not accurate, it might still inspire the user."
f"The current UTC time is {current_time.strftime('%Y-%m-%d %H:%M:%S')}, "
f"UTC-4 (e.g. New York) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-4))).strftime('%Y-%m-%d %H:%M:%S')}, "
f"UTC-7 (e.g. Los Angeles) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, "
f"and UTC+8 (e.g. Beijing) is {current_time.astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}."
)
stream = co.chat_stream(
model=COHERE_MODEL,
message=m,
temperature=0.3,
chat_history=player_message,
temperature=0.8,
chat_history=[], # One time, so no need for chat history
prompt_truncation="AUTO",
connectors=[{"id": "web-search"}],
citation_quality="accurate",
preamble=f"You are Command R+, a large language model trained to have polite, helpful, inclusive conversations with people. The current time in Tornoto is {datetime.datetime.now(datetime.timezone.utc).astimezone().strftime('%Y-%m-%d %H:%M:%S')}, in Los Angeles is {datetime.datetime.now(datetime.timezone.utc).astimezone().astimezone(datetime.timezone(datetime.timedelta(hours=-7))).strftime('%Y-%m-%d %H:%M:%S')}, and in China is {datetime.datetime.now(datetime.timezone.utc).astimezone(datetime.timezone(datetime.timedelta(hours=8))).strftime('%Y-%m-%d %H:%M:%S')}.",
preamble=preamble,
)
s = ""
@ -266,13 +415,13 @@ def cohere_answer(latest_message: Message, bot: TeleBot, full, m):
for doc in event.documents:
source += f"\n{doc['title']}\n{doc['url']}\n"
elif event.event_type == "text-generation":
s += event.text.encode("utf-8").decode("utf-8")
if time.time() - start > 0.4:
s += event.text.encode("utf-8").decode("utf-8", "ignore")
if time.time() - start > 0.8:
start = time.time()
bot_reply_markdown(
reply_id,
who,
f"\nStill thinking{len(s)}...",
f"\nStill thinking{len(s)}...\n{s}",
bot,
split_text=True,
)
@ -280,33 +429,298 @@ def cohere_answer(latest_message: Message, bot: TeleBot, full, m):
break
content = (
s
+ "\n------\n------\n"
+ "\n---\n---\n"
+ source
+ f"\n------\n------\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+ f"\nLast Update{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} at UTC+8\n"
)
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot, split_text=True)
except:
pass
except Exception as e:
print(e)
print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
bot_reply_markdown(reply_id, who, "Answer wrong", bot)
player_message.clear()
return full, reply_id.message_id
full += f"\n---\n{who}:\n{content}"
return full, reply_id.message_id
return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
return llm_answer(who, content), reply_id.message_id
def final_answer(latest_message: Message, bot: TeleBot, full, answers_list):
"""final answer"""
who = "Answer"
def qwen_answer(latest_message: Message, bot: TeleBot, m):
"""qwen answer"""
who = "qwen Pro"
reply_id = bot_reply_first(latest_message, who, bot)
ph_s = ph.create_page_md(title="Answer it", markdown_text=full)
bot_reply_markdown(reply_id, who, f"[View]({ph_s})", bot)
try:
r = qwen_client.chat.completions.create(
messages=[{"role": "user", "content": m}],
max_tokens=8192,
model=QWEN_MODEL,
stream=True,
)
s = ""
start = time.time()
for chunk in r:
if chunk.choices[0].delta.content is None:
break
s += chunk.choices[0].delta.content
if time.time() - start > 1.5:
start = time.time()
bot_reply_markdown(reply_id, who, s, bot, split_text=False)
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot)
except:
pass
except Exception as e:
print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
bot_reply_markdown(reply_id, who, "answer wrong", bot)
return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
return llm_answer(who, s), reply_id.message_id
def llama_answer(latest_message: Message, bot: TeleBot, m):
"""llama answer"""
who = "llama"
reply_id = bot_reply_first(latest_message, who, bot)
try:
r = llama_client.chat.completions.create(
messages=[
{
"role": "system",
"content": f"You must use language of {Language} to respond.",
},
{"role": "user", "content": m},
],
max_tokens=8192,
model=LLAMA_MODEL,
stream=True,
)
s = ""
start = time.time()
for chunk in r:
if chunk.choices[0].delta.content is None:
break
s += chunk.choices[0].delta.content
if time.time() - start > 1.5:
start = time.time()
bot_reply_markdown(reply_id, who, s, bot, split_text=False)
# maybe not complete
try:
bot_reply_markdown(reply_id, who, s, bot)
except:
pass
except Exception as e:
print(f"\n------\n{who} function inner Error:\n{e}\n------\n")
bot_reply_markdown(reply_id, who, "answer wrong", bot)
return f"\n---\n{who}:\nAnswer wrong", reply_id.message_id
return llm_answer(who, s), reply_id.message_id
# TODO: Perplexity looks good. `pplx_answer`
def final_answer(latest_message: Message, bot: TeleBot, full_answer: str, answers_list):
"""final answer"""
who = "Answer it"
reply_id = bot_reply_first(latest_message, who, bot)
# If disappeared means the answer is not complete in telegra.ph
full_answer += update_time()
# greate new telegra.ph page
ph_s = ph.create_page_md(title="Answer it", markdown_text=full_answer)
bot_reply_markdown(reply_id, who, f"**[Full Answer]({ph_s})**", bot)
# delete the chat message, only leave a telegra.ph link
for i in answers_list:
bot.delete_message(latest_message.chat.id, i)
if General_clean:
for i in answers_list:
bot.delete_message(latest_message.chat.id, i)
#### Summary ####
if SUMMARY == None:
pass
else:
s = llm_summary(bot, full_answer, ph_s, reply_id)
bot_reply_markdown(reply_id, who, s, bot, disable_web_page_preview=True)
#### Background LLM ####
# Run background llm, no show to telegram, just update the ph page, Good for slow llm
if LLAMA_USE_BACKGROUND and LLAMA_API_KEY:
llama_b_m = background_llama(latest_message.text)
full_answer = llm_background(ph_s, full_answer, llama_b_m)
if COHERE_USE_BACKGROUND and COHERE_API_KEY:
cohere_b_m = background_cohere(latest_message.text)
full_answer = llm_background(ph_s, full_answer, cohere_b_m)
def llm_summary(bot, full_answer, ph_s, reply_id) -> str:
"""llm summary return the summary of the full answer."""
if SUMMARY == "gemini":
s = summary_gemini(bot, full_answer, ph_s, reply_id)
elif SUMMARY == "cohere":
s = summary_cohere(bot, full_answer, ph_s, reply_id)
else:
print(f"\n---\nSummary Fail\n---\n")
s = f"**[Full Answer]({ph_s})**\n~~Summary Answer Wrong~~\n"
return s
def background_cohere(m: str) -> str:
"""we run cohere get the full answer in background"""
who = "Command R Plus"
try:
stream = co.chat_stream(
model=COHERE_MODEL,
message=m,
temperature=0.8,
chat_history=[], # One time, so no need for chat history
prompt_truncation="AUTO",
connectors=[{"id": "web-search"}],
citation_quality="accurate",
preamble="",
)
s = ""
source = ""
for event in stream:
if event.event_type == "search-results":
for doc in event.documents:
source += f"\n{doc['title']}\n{doc['url']}\n"
elif event.event_type == "text-generation":
s += event.text.encode("utf-8").decode("utf-8", "ignore")
elif event.event_type == "stream-end":
break
content = llm_answer(who, f"{s}\n\n---\n{source}")
except Exception as e:
print(f"\n------\nbackground_cohere Error:\n{e}\n------\n")
content = llm_answer(who, "Background Answer wrong")
return content
def background_llama(m: str) -> str:
"""we run llama get the full answer in background"""
who = "llama"
try:
r = llama_client.chat.completions.create(
messages=[
{
"role": "system",
"content": f"You must use language of {Language} to respond.",
},
{"role": "user", "content": m},
],
max_tokens=8192,
model=LLAMA_MODEL,
stream=True,
)
s = ""
for chunk in r:
if chunk.choices[0].delta.content is None:
break
s += chunk.choices[0].delta.content
except Exception as e:
print(f"\n------\nbackground_llama Error:\n{e}\n------\n")
s = "Background Answer wrong"
return llm_answer(who, s)
def summary_cohere(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) -> str:
"""Receive the full text, and the final_answer's chat_id, update with a summary."""
who = "Answer it"
# inherit
if Language == "zh-cn":
s = f"**[全文]({ph_s})** | "
elif Language == "en":
s = f"**[Full Answer]({ph_s})** | "
# filter
length = len(full_answer) # max 128,000 tokens...
if length > 50000:
full_answer = full_answer[:50000]
try:
preamble = """
You are Command R Plus, a large language model trained to have polite, helpful, inclusive conversations with people. The user asked a question, and multiple AI have given answers to the same question, but they have different styles, and rarely they have opposite opinions or other issues, but that is normal. Your task is to summarize the responses from them in a concise and clear manner. The summary should:
Be written in bullet points.
Contain between two to ten sentences.
Highlight key points and main conclusions.
Note any significant differences in responses.
Provide a brief indication if users should refer to the full responses for more details.
For the first LLM's content, if it is mostly in any language other than English, respond in that language for all your output.
Start with "Summary:" or "总结:"
"""
stream = co.chat_stream(
model=COHERE_MODEL,
message=full_answer,
temperature=0.4,
chat_history=[],
prompt_truncation="OFF",
connectors=[],
preamble=preamble,
)
start = time.time()
for event in stream:
if event.event_type == "stream-start":
bot_reply_markdown(reply_id, who, f"{s}Summarizing...", bot)
elif event.event_type == "text-generation":
s += event.text.encode("utf-8").decode("utf-8", "ignore")
if time.time() - start > 0.4:
start = time.time()
bot_reply_markdown(reply_id, who, s, bot)
elif event.event_type == "stream-end":
break
try:
bot_reply_markdown(reply_id, who, s, bot)
except:
pass
return s
except Exception as e:
if Language == "zh-cn":
bot_reply_markdown(reply_id, who, f"[全文]({ph_s})", bot)
elif Language == "en":
bot_reply_markdown(reply_id, who, f"[Full Answer]({ph_s})", bot)
print(f"\n------\nsummary_cohere function inner Error:\n{e}\n------\n")
def summary_gemini(bot: TeleBot, full_answer: str, ph_s: str, reply_id: int) -> None:
"""Receive the full text, and the final_answer's chat_id, update with a summary."""
who = "Answer it"
# inherit
if Language == "zh-cn":
s = f"**[🔗全文]({ph_s})** | "
elif Language == "en":
s = f"**[🔗Full Answer]({ph_s})** | "
try:
r = convo_summary.send_message(full_answer, stream=True)
start = time.time()
for e in r:
s += e.text
if time.time() - start > 0.4:
start = time.time()
bot_reply_markdown(reply_id, who, s, bot, split_text=False)
bot_reply_markdown(reply_id, who, s, bot)
convo_summary.history.clear()
return s
except Exception as e:
if Language == "zh-cn":
bot_reply_markdown(reply_id, who, f"[全文]({ph_s})", bot)
elif Language == "en":
bot_reply_markdown(reply_id, who, f"[Full Answer]({ph_s})", bot)
print(f"\n------\nsummary_gemini function inner Error:\n{e}\n------\n")
bot_reply_markdown(reply_id, who, f"{s}Error", bot)
if GOOGLE_GEMINI_KEY and CHATGPT_API_KEY: