Merge pull request #29 from F4ria/url-reader

support Claude and Yi handlers to extract and enrich text with URLs
This commit is contained in:
yihong 2024-04-16 11:43:13 +08:00 committed by GitHub
commit f1515791ea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 39 additions and 29 deletions

View File

@ -7,11 +7,13 @@ from functools import update_wrapper
from pathlib import Path from pathlib import Path
from typing import Any, Callable, TypeVar from typing import Any, Callable, TypeVar
import requests
from telebot import TeleBot from telebot import TeleBot
from telebot.types import BotCommand, Message from telebot.types import BotCommand, Message
from telebot.util import smart_split from telebot.util import smart_split
import telegramify_markdown import telegramify_markdown
from telegramify_markdown.customize import markdown_symbol from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
@ -155,5 +157,34 @@ def list_available_commands() -> list[str]:
return commands return commands
def extract_url_from_text(text: str) -> list[str]:
extractor = URLExtract()
urls = extractor.find_urls(text)
return urls
def get_text_from_jina_reader(url: str):
try:
r = requests.get(f"https://r.jina.ai/{url}")
return r.text
except Exception as e:
print(e)
return None
def enrich_text_with_urls(text: str) -> str:
urls = extract_url_from_text(text)
for u in urls:
try:
url_text = get_text_from_jina_reader(u)
url_text = f"\n```markdown\n{url_text}\n```\n"
text = text.replace(u, url_text)
except Exception as e:
# just ignore the error
pass
return text
# `import *` will give you these # `import *` will give you these
__all__ = ["bot_reply_first", "bot_reply_markdown"] __all__ = ["bot_reply_first", "bot_reply_markdown", "enrich_text_with_urls"]

View File

@ -48,6 +48,7 @@ def claude_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ": if m[:4].lower() == "new ":
m = m[4:].strip() m = m[4:].strip()
player_message.clear() player_message.clear()
m = enrich_text_with_urls(m)
who = "Claude" who = "Claude"
# show something, make it more responsible # show something, make it more responsible
@ -109,6 +110,7 @@ def claude_pro_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ": if m[:4].lower() == "new ":
m = m[4:].strip() m = m[4:].strip()
player_message.clear() player_message.clear()
m = enrich_text_with_urls(m)
who = "Claude Pro" who = "Claude Pro"
# show something, make it more responsible # show something, make it more responsible

View File

@ -7,10 +7,8 @@ from google.generativeai.types.generation_types import StopCandidateException
from telebot import TeleBot from telebot import TeleBot
from telebot.types import Message from telebot.types import Message
import requests
from telegramify_markdown import convert from telegramify_markdown import convert
from telegramify_markdown.customize import markdown_symbol from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract
from . import * from . import *
@ -40,21 +38,6 @@ gemini_pro_player_dict = {}
gemini_file_player_dict = {} gemini_file_player_dict = {}
def extract_url_from_text(text: str) -> list[str]:
extractor = URLExtract()
urls = extractor.find_urls(text)
return urls
def get_text_from_jina_reader(url: str):
try:
r = requests.get(f"https://r.jina.ai/{url}")
return r.text
except Exception as e:
print(e)
return None
def make_new_gemini_convo(is_pro=False): def make_new_gemini_convo(is_pro=False):
model_name = "models/gemini-1.0-pro-latest" model_name = "models/gemini-1.0-pro-latest"
if is_pro: if is_pro:
@ -89,6 +72,7 @@ def gemini_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ": if m[:4].lower() == "new ":
m = m[4:].strip() m = m[4:].strip()
player.history.clear() player.history.clear()
m = enrich_text_with_urls(m)
who = "Gemini" who = "Gemini"
# show something, make it more responsible # show something, make it more responsible
@ -141,17 +125,9 @@ def gemini_pro_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ": if m[:4].lower() == "new ":
m = m[4:].strip() m = m[4:].strip()
player.history.clear() player.history.clear()
urls = extract_url_from_text(m) if gemini_file_player_dict.get(str(message.from_user.id)):
if urls: del gemini_file_player_dict[str(message.from_user.id)]
m = m + "\n" + "Content: \n" m = enrich_text_with_urls(m)
for u in urls:
# remove the url from the text tricky to lie to the model
m = m.replace(u, "")
try:
m += get_text_from_jina_reader(u)
except Exception as e:
# just ignore the error
pass
who = "Gemini Pro" who = "Gemini Pro"
# show something, make it more responsible # show something, make it more responsible

View File

@ -45,6 +45,7 @@ def yi_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ": if m[:4].lower() == "new ":
m = m[4:].strip() m = m[4:].strip()
player_message.clear() player_message.clear()
m = enrich_text_with_urls(m)
who = "Yi" who = "Yi"
# show something, make it more responsible # show something, make it more responsible