feat(seautils): use markdownify to convert rfc html documents to markdown

This commit is contained in:
Seaswimmer 2024-05-28 20:29:39 -04:00
parent a641cae640
commit 28246121a6
Signed by untrusted user: cswimr
GPG key ID: 5D671B5D03D65A7F
4 changed files with 34 additions and 18 deletions

17
poetry.lock generated
View file

@ -911,6 +911,21 @@ profiling = ["gprof2dot"]
rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
[[package]]
name = "markdownify"
version = "0.12.1"
description = "Convert HTML to markdown."
optional = false
python-versions = "*"
files = [
{file = "markdownify-0.12.1-py3-none-any.whl", hash = "sha256:a3805abd8166dbb7b27783c5599d91f54f10d79894b2621404d85b333c7ce561"},
{file = "markdownify-0.12.1.tar.gz", hash = "sha256:1fb08c618b30e0ee7a31a39b998f44a18fb28ab254f55f4af06b6d35a2179e27"},
]
[package.dependencies]
beautifulsoup4 = ">=4.9,<5"
six = ">=1.15,<2"
[[package]] [[package]]
name = "markupsafe" name = "markupsafe"
version = "2.1.5" version = "2.1.5"
@ -2483,4 +2498,4 @@ multidict = ">=4.0"
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.11,<3.12" python-versions = ">=3.11,<3.12"
content-hash = "55119c37c690ab197058ad091cb31bdf7c1c51ae62947e0026f4cddb423093d3" content-hash = "229d7fd39618cf708f3cd5409dde2e6e25b822e4f936e14b3ade9800bf00daab"

View file

@ -16,6 +16,7 @@ pillow = "^10.3.0"
numpy = "^1.26.4" numpy = "^1.26.4"
colorthief = "^0.2.1" colorthief = "^0.2.1"
beautifulsoup4 = "^4.12.3" beautifulsoup4 = "^4.12.3"
markdownify = "^0.12.1"
[tool.poetry.group.dev] [tool.poetry.group.dev]
optional = true optional = true

View file

@ -9,5 +9,5 @@
"disabled": false, "disabled": false,
"min_bot_version": "3.5.0", "min_bot_version": "3.5.0",
"min_python_version": [3, 8, 0], "min_python_version": [3, 8, 0],
"requirements": ["beautifulsoup4"] "requirements": ["beautifulsoup4", "markdownify"]
} }

View file

@ -17,6 +17,7 @@ import yaml
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from discord import Color, Embed, app_commands from discord import Color, Embed, app_commands
from discord.utils import CachedSlotProperty, cached_property from discord.utils import CachedSlotProperty, cached_property
from markdownify import MarkdownConverter
from redbot.core import commands from redbot.core import commands
from redbot.core.bot import Red from redbot.core.bot import Red
from redbot.core.dev_commands import cleanup_code from redbot.core.dev_commands import cleanup_code
@ -24,6 +25,9 @@ from redbot.core.utils import chat_formatting as cf
from redbot.core.utils.views import SimpleMenu from redbot.core.utils.views import SimpleMenu
def md(soup: BeautifulSoup, **options) -> Any | str:
return MarkdownConverter(**options).convert_soup(soup)
class SeaUtils(commands.Cog): class SeaUtils(commands.Cog):
"""A collection of random utilities.""" """A collection of random utilities."""
@ -188,21 +192,6 @@ class SeaUtils(commands.Cog):
except (FileNotFoundError): except (FileNotFoundError):
await ctx.maybe_send_embed(message=cf.error("Neither `dig` nor `nslookup` are installed on the system. Unable to resolve DNS query.")) await ctx.maybe_send_embed(message=cf.error("Neither `dig` nor `nslookup` are installed on the system. Unable to resolve DNS query."))
async def get_results(self, ctx: commands.Context, soup: BeautifulSoup) -> list:
pre_tags = soup.find_all('pre')
content = []
for pre_tag in pre_tags:
if await ctx.embed_requested():
embed = Embed(
title="RFC Document",
description=pre_tag.text,
color=await ctx.embed_color()
)
content.append(embed)
else:
content.append(pre_tag.text)
return content
@commands.command() @commands.command()
async def rfc(self, ctx: commands.Context, number: int) -> None: async def rfc(self, ctx: commands.Context, number: int) -> None:
"""Retrieve the text of an RFC document.""" """Retrieve the text of an RFC document."""
@ -212,7 +201,18 @@ class SeaUtils(commands.Cog):
if response.status == 200: if response.status == 200:
html = await response.text() html = await response.text()
soup = BeautifulSoup(html, 'html.parser') soup = BeautifulSoup(html, 'html.parser')
content = await self.get_results(ctx, soup) pre_tags = soup.find_all('pre')
content = []
for pre_tag in pre_tags:
if await ctx.embed_requested():
embed = Embed(
title="RFC Document",
description=md(pre_tag),
color=await ctx.embed_color()
)
content.append(embed)
else:
content.append(md(pre_tag))
await SimpleMenu(pages=content, disable_after_timeout=True, timeout=300).start(ctx) await SimpleMenu(pages=content, disable_after_timeout=True, timeout=300).start(ctx)
else: else:
await ctx.maybe_send_embed(content=cf.error(f"An error occurred while fetching RFC {number}. Status code: {response.status}.")) await ctx.maybe_send_embed(content=cf.error(f"An error occurred while fetching RFC {number}. Status code: {response.status}."))