From 28246121a6ae55aa1000fc5354d8fd80b143b0aa Mon Sep 17 00:00:00 2001 From: Seaswimmer Date: Tue, 28 May 2024 20:29:39 -0400 Subject: [PATCH] feat(seautils): use markdownify to convert rfc html documents to markdown --- poetry.lock | 17 ++++++++++++++++- pyproject.toml | 1 + seautils/info.json | 2 +- seautils/seautils.py | 32 ++++++++++++++++---------------- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/poetry.lock b/poetry.lock index d30b1f7..017d640 100644 --- a/poetry.lock +++ b/poetry.lock @@ -911,6 +911,21 @@ profiling = ["gprof2dot"] rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] +[[package]] +name = "markdownify" +version = "0.12.1" +description = "Convert HTML to markdown." +optional = false +python-versions = "*" +files = [ + {file = "markdownify-0.12.1-py3-none-any.whl", hash = "sha256:a3805abd8166dbb7b27783c5599d91f54f10d79894b2621404d85b333c7ce561"}, + {file = "markdownify-0.12.1.tar.gz", hash = "sha256:1fb08c618b30e0ee7a31a39b998f44a18fb28ab254f55f4af06b6d35a2179e27"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.9,<5" +six = ">=1.15,<2" + [[package]] name = "markupsafe" version = "2.1.5" @@ -2483,4 +2498,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "55119c37c690ab197058ad091cb31bdf7c1c51ae62947e0026f4cddb423093d3" +content-hash = "229d7fd39618cf708f3cd5409dde2e6e25b822e4f936e14b3ade9800bf00daab" diff --git a/pyproject.toml b/pyproject.toml index a2afb43..872ccdf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ pillow = "^10.3.0" numpy = "^1.26.4" colorthief = "^0.2.1" beautifulsoup4 = "^4.12.3" +markdownify = "^0.12.1" [tool.poetry.group.dev] optional = true diff --git a/seautils/info.json b/seautils/info.json index 8e93f9d..7356137 100644 --- a/seautils/info.json +++ b/seautils/info.json @@ -9,5 +9,5 @@ "disabled": false, "min_bot_version": "3.5.0", "min_python_version": [3, 8, 0], - "requirements": ["beautifulsoup4"] + "requirements": ["beautifulsoup4", "markdownify"] } diff --git a/seautils/seautils.py b/seautils/seautils.py index 389ea71..a32d02c 100644 --- a/seautils/seautils.py +++ b/seautils/seautils.py @@ -17,6 +17,7 @@ import yaml from bs4 import BeautifulSoup from discord import Color, Embed, app_commands from discord.utils import CachedSlotProperty, cached_property +from markdownify import MarkdownConverter from redbot.core import commands from redbot.core.bot import Red from redbot.core.dev_commands import cleanup_code @@ -24,6 +25,9 @@ from redbot.core.utils import chat_formatting as cf from redbot.core.utils.views import SimpleMenu +def md(soup: BeautifulSoup, **options) -> Any | str: + return MarkdownConverter(**options).convert_soup(soup) + class SeaUtils(commands.Cog): """A collection of random utilities.""" @@ -188,21 +192,6 @@ class SeaUtils(commands.Cog): except (FileNotFoundError): await ctx.maybe_send_embed(message=cf.error("Neither `dig` nor `nslookup` are installed on the system. Unable to resolve DNS query.")) - async def get_results(self, ctx: commands.Context, soup: BeautifulSoup) -> list: - pre_tags = soup.find_all('pre') - content = [] - for pre_tag in pre_tags: - if await ctx.embed_requested(): - embed = Embed( - title="RFC Document", - description=pre_tag.text, - color=await ctx.embed_color() - ) - content.append(embed) - else: - content.append(pre_tag.text) - return content - @commands.command() async def rfc(self, ctx: commands.Context, number: int) -> None: """Retrieve the text of an RFC document.""" @@ -212,7 +201,18 @@ class SeaUtils(commands.Cog): if response.status == 200: html = await response.text() soup = BeautifulSoup(html, 'html.parser') - content = await self.get_results(ctx, soup) + pre_tags = soup.find_all('pre') + content = [] + for pre_tag in pre_tags: + if await ctx.embed_requested(): + embed = Embed( + title="RFC Document", + description=md(pre_tag), + color=await ctx.embed_color() + ) + content.append(embed) + else: + content.append(md(pre_tag)) await SimpleMenu(pages=content, disable_after_timeout=True, timeout=300).start(ctx) else: await ctx.maybe_send_embed(content=cf.error(f"An error occurred while fetching RFC {number}. Status code: {response.status}."))