Merge pull request #29 from F4ria/url-reader

support Claude and Yi handlers to extract and enrich text with URLs
This commit is contained in:
yihong 2024-04-16 11:43:13 +08:00 committed by GitHub
commit f1515791ea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 39 additions and 29 deletions

View File

@ -7,11 +7,13 @@ from functools import update_wrapper
from pathlib import Path
from typing import Any, Callable, TypeVar
import requests
from telebot import TeleBot
from telebot.types import BotCommand, Message
from telebot.util import smart_split
import telegramify_markdown
from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
@ -155,5 +157,34 @@ def list_available_commands() -> list[str]:
return commands
def extract_url_from_text(text: str) -> list[str]:
extractor = URLExtract()
urls = extractor.find_urls(text)
return urls
def get_text_from_jina_reader(url: str):
try:
r = requests.get(f"https://r.jina.ai/{url}")
return r.text
except Exception as e:
print(e)
return None
def enrich_text_with_urls(text: str) -> str:
urls = extract_url_from_text(text)
for u in urls:
try:
url_text = get_text_from_jina_reader(u)
url_text = f"\n```markdown\n{url_text}\n```\n"
text = text.replace(u, url_text)
except Exception as e:
# just ignore the error
pass
return text
# `import *` will give you these
__all__ = ["bot_reply_first", "bot_reply_markdown"]
__all__ = ["bot_reply_first", "bot_reply_markdown", "enrich_text_with_urls"]

View File

@ -48,6 +48,7 @@ def claude_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ":
m = m[4:].strip()
player_message.clear()
m = enrich_text_with_urls(m)
who = "Claude"
# show something, make it more responsible
@ -109,6 +110,7 @@ def claude_pro_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ":
m = m[4:].strip()
player_message.clear()
m = enrich_text_with_urls(m)
who = "Claude Pro"
# show something, make it more responsible

View File

@ -7,10 +7,8 @@ from google.generativeai.types.generation_types import StopCandidateException
from telebot import TeleBot
from telebot.types import Message
import requests
from telegramify_markdown import convert
from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract
from . import *
@ -40,21 +38,6 @@ gemini_pro_player_dict = {}
gemini_file_player_dict = {}
def extract_url_from_text(text: str) -> list[str]:
extractor = URLExtract()
urls = extractor.find_urls(text)
return urls
def get_text_from_jina_reader(url: str):
try:
r = requests.get(f"https://r.jina.ai/{url}")
return r.text
except Exception as e:
print(e)
return None
def make_new_gemini_convo(is_pro=False):
model_name = "models/gemini-1.0-pro-latest"
if is_pro:
@ -89,6 +72,7 @@ def gemini_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ":
m = m[4:].strip()
player.history.clear()
m = enrich_text_with_urls(m)
who = "Gemini"
# show something, make it more responsible
@ -141,17 +125,9 @@ def gemini_pro_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ":
m = m[4:].strip()
player.history.clear()
urls = extract_url_from_text(m)
if urls:
m = m + "\n" + "Content: \n"
for u in urls:
# remove the url from the text tricky to lie to the model
m = m.replace(u, "")
try:
m += get_text_from_jina_reader(u)
except Exception as e:
# just ignore the error
pass
if gemini_file_player_dict.get(str(message.from_user.id)):
del gemini_file_player_dict[str(message.from_user.id)]
m = enrich_text_with_urls(m)
who = "Gemini Pro"
# show something, make it more responsible

View File

@ -45,6 +45,7 @@ def yi_handler(message: Message, bot: TeleBot) -> None:
if m[:4].lower() == "new ":
m = m[4:].strip()
player_message.clear()
m = enrich_text_with_urls(m)
who = "Yi"
# show something, make it more responsible