tg_bot_collections/handlers/__init__.py

from __future__ import annotations

import base64
import importlib
import re
import traceback
from functools import update_wrapper
from pathlib import Path
from typing import Any, Callable, TypeVar

import requests
from telebot import TeleBot
from telebot.types import BotCommand, Message
from telebot.util import smart_split
import telegramify_markdown
from telegramify_markdown.customize import markdown_symbol
from urlextract import URLExtract

markdown_symbol.head_level_1 = "📌"  # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗"  # If you want, Customizing the link symbol

T = TypeVar("T", bound=Callable)

BOT_MESSAGE_LENGTH = 4000


def bot_reply_first(message: Message, who: str, bot: TeleBot) -> Message:
    """Create the first reply message which make user feel the bot is working."""
    return bot.reply_to(
        message, f"*{who}* is _thinking_ \.\.\.", parse_mode="MarkdownV2"
    )


def bot_reply_markdown(
    reply_id: Message, who: str, text: str, bot: TeleBot, split_text: bool = True
) -> bool:
    """
    reply the Markdown by take care of the message length.
    it will fallback to plain text in case of any failure
    """
    try:
        if len(text.encode("utf-8")) <= BOT_MESSAGE_LENGTH or not split_text:
            bot.edit_message_text(
                f"*{who}*:\n{telegramify_markdown.convert(text)}",
                chat_id=reply_id.chat.id,
                message_id=reply_id.message_id,
                parse_mode="MarkdownV2",
            )
            return True

        # Need a split of message
        msgs = smart_split(text, BOT_MESSAGE_LENGTH)
        bot.edit_message_text(
            f"*{who}* \[1/{len(msgs)}\]:\n{telegramify_markdown.convert(msgs[0])}",
            chat_id=reply_id.chat.id,
            message_id=reply_id.message_id,
            parse_mode="MarkdownV2",
        )
        for i in range(1, len(msgs)):
            bot.reply_to(
                reply_id.reply_to_message,
                f"*{who}* \[{i+1}/{len(msgs)}\]:\n{telegramify_markdown.convert(msgs[i])}",
                parse_mode="MarkdownV2",
            )

        return True
    except Exception as e:
        print(traceback.format_exc())
        # print(f"wrong markdown format: {text}")
        bot.edit_message_text(
            f"*{who}*:\n{text}",
            chat_id=reply_id.chat.id,
            message_id=reply_id.message_id,
        )
        return False


def extract_prompt(message: str, bot_name: str) -> str:
    """
    This function filters messages for prompts.

    Returns:
      str: If it is not a prompt, return None. Otherwise, return the trimmed prefix of the actual prompt.
    """
    # remove '@bot_name' as it is considered part of the command when in a group chat.
    message = re.sub(re.escape(f"@{bot_name}"), "", message).strip()
    # add a whitespace after the first colon as we separate the prompt from the command by the first whitespace.
    message = re.sub(":", ": ", message, count=1).strip()
    try:
        left, message = message.split(maxsplit=1)
    except ValueError:
        return ""
    if ":" not in left:
        # the replacement happens in the right part, restore it.
        message = message.replace(": ", ":", 1)
    return message.strip()


def wrap_handler(handler: T, bot: TeleBot) -> T:
    def wrapper(message: Message, *args: Any, **kwargs: Any) -> None:
        try:
            m = ""

            if message.text and message.text.find("answer_it") != -1:
                # for answer_it no args
                return handler(message, *args, **kwargs)
            elif message.text is not None:
                m = message.text = extract_prompt(message.text, bot.get_me().username)
            elif message.caption is not None:
                m = message.caption = extract_prompt(
                    message.caption, bot.get_me().username
                )
            elif message.location and message.location.latitude is not None:
                # for location map handler just return
                return handler(message, *args, **kwargs)
            if not m:
                bot.reply_to(message, "Please provide info after start words.")
                return
            return handler(message, *args, **kwargs)
        except Exception as e:
            traceback.print_exc()
            # handle more here
            if str(e).find("RECITATION") > 0:
                bot.reply_to(message, "Your prompt `RECITATION` please check the log")
            else:
                bot.reply_to(message, "Something wrong, please check the log")

    return update_wrapper(wrapper, handler)


def load_handlers(bot: TeleBot, disable_commands: list[str]) -> None:
    # import all submodules
    for name in list_available_commands():
        if name in disable_commands:
            continue
        module = importlib.import_module(f".{name}", __package__)
        if hasattr(module, "register"):
            print(f"Loading {name} handlers.")
            module.register(bot)
    print("Loading handlers done.")

    all_commands: list[BotCommand] = []
    for handler in bot.message_handlers:
        help_text = getattr(handler["function"], "__doc__", "")
        # tricky ignore the latest_handle_messages
        if help_text and help_text == "ignore":
            continue
        # Add pre-processing and error handling to all callbacks
        handler["function"] = wrap_handler(handler["function"], bot)
        for command in handler["filters"].get("commands", []):
            all_commands.append(BotCommand(command, help_text))

    if all_commands:
        bot.set_my_commands(all_commands)
        print("Setting commands done.")


def list_available_commands() -> list[str]:
    commands = []
    this_path = Path(__file__).parent
    for child in this_path.iterdir():
        if child.name.startswith("_"):
            continue
        commands.append(child.stem)
    return commands


def extract_url_from_text(text: str) -> list[str]:
    extractor = URLExtract()
    urls = extractor.find_urls(text)
    return urls


def get_text_from_jina_reader(url: str):
    try:
        r = requests.get(f"https://r.jina.ai/{url}")
        return r.text
    except Exception as e:
        print(e)
        return None


def enrich_text_with_urls(text: str) -> str:
    urls = extract_url_from_text(text)
    for u in urls:
        try:
            url_text = get_text_from_jina_reader(u)
            url_text = f"\n```markdown\n{url_text}\n```\n"
            text = text.replace(u, url_text)
        except Exception as e:
            # just ignore the error
            pass

    return text


def image_to_data_uri(file_path):
    with open(file_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
        return f"data:image/png;base64,{encoded_image}"


import requests
import json
import markdown  # pip install Markdown
from bs4 import BeautifulSoup  # pip install beautifulsoup4


class TelegraphAPI:
    def __init__(self, access_token):
        self.access_token = access_token
        self.base_url = "https://api.telegra.ph"

        # Get account info on initialization
        account_info = self.get_account_info()
        self.short_name = account_info.get("short_name")
        self.author_name = account_info.get("author_name")
        self.author_url = account_info.get("author_url")

    def create_page(
        self, title, content, author_name=None, author_url=None, return_content=False
    ):
        """
        Creates a new Telegraph page.

        Args:
            title (str): Page title (1-256 characters).
            content (list): Content of the page as a list of Node dictionaries.
            author_name (str, optional): Author name (0-128 characters). Defaults to account's author_name.
            author_url (str, optional): Profile link (0-512 characters). Defaults to account's author_url.
            return_content (bool, optional): If True, return the content field in the response.

        Returns:
            str: URL of the created page.

        Raises:
            requests.exceptions.RequestException: If the request fails.


        """
        url = f"{self.base_url}/createPage"
        data = {
            "access_token": self.access_token,
            "title": title,
            "content": json.dumps(content),
            "return_content": return_content,
            # Use provided author info or fall back to account info
            "author_name": author_name if author_name else self.author_name,
            "author_url": author_url if author_url else self.author_url,
        }

        response = requests.post(url, data=data)
        response.raise_for_status()
        response = response.json()
        page_url = response["result"]["url"]
        return page_url

    def get_account_info(self):
        """
        Gets information about the Telegraph account.

        Returns:
            dict: Account information including short_name, author_name, and author_url.
                 Returns None if there's an error.
        """
        url = f"{self.base_url}/getAccountInfo?access_token={self.access_token}"  # &fields=[\"author_name\",\"author_url\"] for specific fields
        response = requests.get(url)

        if response.status_code == 200:
            return response.json()["result"]
        else:
            print(f"Fail getting telegra.ph token info: {response.status_code}")
            return None

    def edit_page(
        self,
        path,
        title,
        content,
        author_name=None,
        author_url=None,
        return_content=False,
    ):
        """
        Edits an existing Telegraph page.

        Args:
            path (str): Path of the page to edit.
            title (str): New page title (1-256 characters).
            content (list): New content of the page as a list of Node dictionaries.
            author_name (str, optional): Author name (0-128 characters). Defaults to account's author_name.
            author_url (str, optional): Profile link (0-512 characters). Defaults to account's author_url.
            return_content (bool, optional): If True, return the content field in the response.

        Returns:
            str: URL of the edited page.

        Raises:
            requests.exceptions.RequestException: If the request fails.
        """
        url = f"{self.base_url}/editPage"
        data = {
            "access_token": self.access_token,
            "path": path,
            "title": title,
            "content": json.dumps(content),
            "return_content": return_content,
            # Use provided author info or fall back to account info
            "author_name": author_name if author_name else self.author_name,
            "author_url": author_url if author_url else self.author_url,
        }

        response = requests.post(url, data=data)
        response.raise_for_status()
        response = response.json()

        page_url = response["result"]["url"]
        return page_url

    def get_page(self, path):
        """
        Gets information about a Telegraph page.

        Args:
            path (str): Path of the page to get.

        Returns:
            dict: Information about the page.
        """
        url = f"{self.base_url}/getPage/{path}?return_content=true"
        response = requests.get(url)
        response.raise_for_status()
        return response.json()["result"]

    def create_page_md(
        self,
        title,
        markdown_text,
        author_name=None,
        author_url=None,
        return_content=False,
    ):
        """
        Creates a new Telegraph page from markdown text.

        Args:
            title (str): Page title (1-256 characters).
            markdown_text (str): Markdown text to convert to HTML.
            author_name (str, optional): Author name (0-128 characters). Defaults to account's author_name.
            author_url (str, optional): Profile link (0-512 characters). Defaults to account's author_url.
            return_content (bool, optional): If True, return the content field in the response.

        Returns:
            str: URL of the created page.

        Raises:
            requests.exceptions.RequestException: If the request fails.
        """
        content = md_to_dom(markdown_text)
        return self.create_page(title, content, author_name, author_url, return_content)

    def edit_page_md(
        self,
        path,
        title,
        markdown_text,
        author_name=None,
        author_url=None,
        return_content=False,
    ):
        content = md_to_dom(markdown_text)
        return self.edit_page(
            path, title, content, author_name, author_url, return_content
        )


def md_to_dom(markdown_text):
    """Converts markdown text to a Python dictionary representing the DOM,
    limiting heading levels to h3 and h4.

    Args:
        markdown_text: The markdown text to convert.

    Returns:
        A Python list representing the DOM, where each element is a dictionary
        with the following keys:
            - 'tag': The tag name of the element.
            - 'attributes': A dictionary of attributes for the element (optional).
            - 'children': A list of child elements (optional).
    """

    # Convert markdown to HTML
    html = markdown.markdown(
        markdown_text,
        extensions=["markdown.extensions.extra", "markdown.extensions.sane_lists"],
    )

    # Parse the HTML with BeautifulSoup
    soup = BeautifulSoup(html, "html.parser")

    def parse_element(element):
        tag_dict = {"tag": element.name}
        if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
            if element.name == "h1":
                tag_dict["tag"] = "h3"
            elif element.name == "h2":
                tag_dict["tag"] = "h4"
            else:
                tag_dict["tag"] = "p"
                tag_dict["children"] = [{"tag": "strong", "children": element.contents}]

            # Correctly handle children for h1-h6
            if element.attrs:
                tag_dict["attributes"] = element.attrs
            if element.contents:
                children = []
                for child in element.contents:
                    if isinstance(child, str):
                        # Remove leading/trailing whitespace from text nodes
                        children.append(child.strip())
                    else:  # it's another tag
                        children.append(parse_element(child))
                tag_dict["children"] = children
        else:
            if element.attrs:
                tag_dict["attributes"] = element.attrs
            if element.contents:
                children = []
                for child in element.contents:
                    if isinstance(child, str):
                        # Remove leading/trailing whitespace from text nodes
                        children.append(child.strip())
                    else:  # it's another tag
                        children.append(parse_element(child))
                if children:
                    tag_dict["children"] = children
        return tag_dict

    new_dom = []
    for element in soup.contents:
        if isinstance(element, str) and not element.strip():
            # Skip empty text nodes
            continue
        elif isinstance(element, str):
            # Treat remaining text nodes as separate elements for clarity
            new_dom.append({"tag": "text", "content": element.strip()})
        else:
            new_dom.append(parse_element(element))

    return new_dom


# `import *` will give you these
__all__ = [
    "bot_reply_first",
    "bot_reply_markdown",
    "enrich_text_with_urls",
    "image_to_data_uri",
    "TelegraphAPI",
]