commit 77f1d100bfc493c03c5646248bdfa090899a704d Author: bung Date: Wed Jul 3 07:28:58 2024 -0500 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1e5eba4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +rdrama.json +.venv +__pycache__ +.vscode \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..40315c6 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +## Makeshift reddit aggregator using pullpush.io and Jinja2 + +Notable problems: +* Using just pullpush.io means that the score counter usually sits at 1. +* No support for BlueSky +* Code needs comments and refactoring + * I got lazy towards the end +* Recursive quotation breaks if any of the comments have markdown quotes +* It seems like it's culling some parent comments, but I haven't bothered to check. + +What this does well compared to the others (at time of writing): +* Recursive quotations +* (presumably) Rendering markdown in Jinja to make extensions easier +* (mixed) Uses async for faster processing + * This would be more useful when integrated with BlueSky or if rDrama didn't have a rate limit of 1/s for submitting comments. + * It makes it harder for people not familiar with async to use/reuse code + +## Installing +``` +python3 -m venv .venv +source .venv/bin/activate # (or .venv/bin/activate.fish if using fish) +python3 -m pip install -r requirements.txt +``` + +## Running +``` +source .venv/bin/activate # (or .venv/bin/activate.fish if using fish) +python3 src/main.py +``` + +## Example +https://rdrama.net/post/280889/testing-yet-another-bardfinn-digest-20240702 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c043733 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +## Async HTTP +aiohttp +## Template rendering +Jinja2 +## BlueSky +# atproto +## Reddit +# asyncpraw \ No newline at end of file diff --git a/src/digest.py b/src/digest.py new file mode 100644 index 0000000..47ad949 --- /dev/null +++ b/src/digest.py @@ -0,0 +1,99 @@ +from jinja2 import Environment, FileSystemLoader +from typing import List, Tuple +import pullpush +import asyncio +from datetime import datetime, UTC +from rdrama import RDrama +import json +import logging + +jinja_env = Environment(loader=FileSystemLoader("src/templates"), enable_async=True) +with open('rdrama.json') as fi: + rdrama_params = json.load(fi) + rdrama = RDrama(rdrama_params['client_id'], rdrama_params['token'], base_url="https://rdrama.net") + +class DigestThread: + time_format = '%Y-%m-%d %H:%M %Z' + + def __init__(self, user: str, since: int, until: int, depth: int): + self.user = user + self.since = since + self.until = until + self.depth = depth + self.comments = None + self.submissions = None + self.parent_submissions = None + assert depth > 0 + + async def render_thread(self) -> str: + await self._fetch_if_not_present() + + metadata = { + 'since': format(datetime.fromtimestamp(self.since, UTC), DigestThread.time_format), + 'until': format(datetime.fromtimestamp(self.until, UTC), DigestThread.time_format), + } + return await jinja_env.get_template("digest_header.jinja").render_async( + {"comments": self.comments, "submissions": self.submissions, "metadata": metadata} + ) + + async def render_comments(self) -> List[str]: + await self._fetch_if_not_present() + template = jinja_env.get_template("digest_comment.jinja") + async def render(comment): + parent_submission = self.parent_submissions[pullpush.get_id_from_permalink(comment['permalink'])] + return await template.render_async(comment=comment, parent_submission=parent_submission) + + return list( + await asyncio.gather( + *(render(comment) for comment in self.comments) + ) + ) + + async def render_submissions(self) -> List[str]: + await self._fetch_if_not_present() + template = jinja_env.get_template("digest_submission.jinja") + return list( + await asyncio.gather( + *( + template.render_async(submission=submission) + for submission in self.submissions + ) + ) + ) + + async def _fetch_if_not_present(self) -> Tuple[List[dict], List[dict]]: + # TODO: fix spaghetti with _fetch_comments being removed + return await asyncio.gather(self._fetch_submissions(), self._fetch_comment_parent_submissions()) + + async def _fetch_comments(self) -> List[dict]: + if self.comments is None: + self.comments = await pullpush.get_comments_from_user_recursive( + self.user, self.since, self.until, self.depth + ) + return self.comments + + async def _fetch_submissions(self) -> List[dict]: + if self.submissions is None: + self.submissions = await pullpush.get_submissions_from_user( + self.user, self.since, self.until + ) + return self.submissions + + async def _fetch_comment_parent_submissions(self) -> List[dict]: + await self._fetch_comments() + if self.parent_submissions is None: + self.parent_submissions = await pullpush.get_submissions_by_ids( + (pullpush.get_id_from_permalink(comment['permalink']) for comment in self.comments)) + return self.parent_submissions + + async def publish(self, title: str, hole: str): + submission_body = await self.render_thread() + logging.info(f'Creating submission...') + submission_id = await rdrama.make_submission(title, None, submission_body, hole) + submissions, comments = await asyncio.gather(self.render_submissions(), self.render_comments()) + submission_tasks = [asyncio.create_task(rdrama.make_comment(submission_id, submission)) for submission in submissions] + comment_tasks = [asyncio.create_task(rdrama.make_comment(submission_id, comment)) for comment in comments] + logging.info(f'Adding comments...') + await asyncio.gather(*(submission_tasks + comment_tasks)) + logging.info(f'Publishing done!') + diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..ebc343e --- /dev/null +++ b/src/main.py @@ -0,0 +1,33 @@ +import logging +from typing import Union +from digest import DigestThread +import asyncio +import time +from datetime import datetime, UTC + + +logging.basicConfig(level=logging.INFO) + + +async def main(user: str, since: int, until: int, depth: int, hole: Union[str, None]): + thread = DigestThread(user, since, until, depth) + + print(await thread.render_thread()) + for submission in await thread.render_submissions(): + print(submission) + for comment in await thread.render_comments(): + print(comment) + + if input("Submit?") == "Y": + since = format(datetime.fromtimestamp(since, UTC), DigestThread.time_format) + until = format(datetime.fromtimestamp(until, UTC), DigestThread.time_format) + submission_title = ( + f"Testing Yet Another Bardfinn Digest: {since} through {until}" + ) + await thread.publish(submission_title, hole) + + +if __name__ == "__main__": + asyncio.new_event_loop().run_until_complete( + main("bardfinn", time.time() - (60 * 60 * 24), time.time(), 4, None) + ) diff --git a/src/pullpush.py b/src/pullpush.py new file mode 100644 index 0000000..32cc364 --- /dev/null +++ b/src/pullpush.py @@ -0,0 +1,84 @@ +from typing import Iterable, List, Dict, Generator +import asyncio +import logging +import aiohttp +import re + +pullpush_semaphore = asyncio.Semaphore(3) + + +async def get_comments_from_user(user: str, since: int, until: int) -> List[dict]: + since = int(since) + until = int(until) + return await pullpush_fetch( + "comment", {"author": user, "since": since, "until": until} + ) + + +async def get_submissions_from_user(user: str, since: int, until: int) -> List[dict]: + since = int(since) + until = int(until) + return await pullpush_fetch( + "submission", {"author": user, "since": since, "until": until} + ) + + +async def get_comments_by_ids(ids: Iterable[int]) -> Dict[str, dict]: + ids = filter(lambda id: not id.startswith("t3"), ids) + ids = set(ids) + response = await pullpush_fetch("comment", {"ids": ",".join(ids)}) + return dict(((comment["id"], comment) for comment in response)) + + +async def get_submissions_by_ids(ids: Iterable[int]) -> Dict[str, dict]: + response = await pullpush_fetch("submission", {"ids": ",".join(ids)}) + ids = set(ids) + return dict(((submission["id"], submission) for submission in response)) + + +permalink_id_re = re.compile(r"/r/\w+/comments/(\w+?)/") + + +def get_id_from_permalink(permalink: str): + return permalink_id_re.search(permalink).group(1) + + +async def get_comments_by_ids_recursive( + ids: Iterable[str], depth: int +) -> Dict[str, dict]: + if depth <= 0: + return None + comments_map = await get_comments_by_ids(ids) + parent_ids = set(comment["parent_id"] for comment in comments_map.values()) + parent_comments = await get_comments_by_ids_recursive(parent_ids, depth - 1) + if parent_comments is not None: + for comment in comments_map.values(): + if comment["parent_id"] in parent_comments: + comment["parent"] = parent_comments["parent_id"] + return comments_map + + +async def get_comments_from_user_recursive( + user: str, since: str, until: str, depth: int +) -> List[dict]: + comments = await get_comments_from_user(user, since, until) + parent_comments = await get_comments_by_ids_recursive( + (comment["parent_id"] for comment in comments), depth - 1 + ) + for comment in comments: + parent_id_trimmed = comment["parent_id"].replace("t1_", "") + if parent_id_trimmed in parent_comments: + comment["parent"] = parent_comments[parent_id_trimmed] + return comments + + +async def pullpush_fetch(type: str, params: dict) -> dict: + params.setdefault("html_decode", "True") + + # Fetch from pullpush + async with pullpush_semaphore: + async with aiohttp.ClientSession("https://api.pullpush.io/") as session: + async with session.get(f"/reddit/{type}/search", params=params) as response: + logging.info(f"Retrieved from URL {response.url}") + assert response.status == 200 + return (await response.json())["data"] diff --git a/src/rdrama.py b/src/rdrama.py new file mode 100644 index 0000000..304788d --- /dev/null +++ b/src/rdrama.py @@ -0,0 +1,71 @@ +import asyncio +import logging +import socket +from typing import List, Union +import aiohttp +from jinja2 import FileSystemLoader, Environment + + +class RDrama: + semaphore = asyncio.Semaphore(1) + wait_time = 2 + + def __init__(self, client_id: str, token=None, base_url="https://rdrama.net/"): + self.client_id = client_id + self.base_url = base_url + self.token = token + + # async def log_in(self): # TODO: awful, redo in asyncio + # logging.info(f'Logging in...') + # self.logged_in = True + + async def make_submission( + self, + title: str, + url: Union[str, None], + body: Union[str, None], + hole: Union[str, None], + ) -> Union[str, None]: + if self.token is None: + self.log_in() + logging.info(f"Creating new rDrama submission...") + form_data = aiohttp.FormData() + form_data_dict = {"url": url, "title": title, "body": body, "hole": hole} + for k, v in form_data_dict.items(): + if v is not None: + form_data.add_field(k, v) + request_headers = {"Authorization": self.token} + async with RDrama.semaphore: + async with aiohttp.ClientSession(self.base_url) as session: + async with session.post( + "/submit", data=form_data, headers=request_headers + ) as response: + ret: dict = await response.json() + logging.info(f"Submission {form_data} response: {ret}") + assert response.status == 200 + await asyncio.sleep(RDrama.wait_time) + return ret.get("id", None) + + async def make_comment(self, parent_id: str, body: str) -> Union[str, None]: + parent_id = str(parent_id) + if self.token is None: + self.log_in() + logging.info(f"Creating new rDrama comment...") + form_data = aiohttp.FormData() + if not parent_id.startswith("p_"): + parent_id = "p_" + parent_id + form_data_dict = {"body": body, "parent_fullname": parent_id} + for k, v in form_data_dict.items(): + if v is not None: + form_data.add_field(k, v) + request_headers = {"Authorization": self.token} + async with RDrama.semaphore: + async with aiohttp.ClientSession(self.base_url) as session: + async with session.post( + "/comment", data=form_data, headers=request_headers + ) as response: + ret: dict = await response.json() + logging.info(f"Comment {form_data} response: {ret}") + assert response.status == 200 + await asyncio.sleep(RDrama.wait_time) + return ret.get("id", None) diff --git a/src/templates/digest_comment.jinja b/src/templates/digest_comment.jinja new file mode 100644 index 0000000..4dfad57 --- /dev/null +++ b/src/templates/digest_comment.jinja @@ -0,0 +1,13 @@ +{% macro comment_chain(comment_descent) -%} +{% if 'parent' in comment_descent -%} +{% for line in comment_chain(comment_descent.parent).split('\n\n') -%} +>{{line|replace('\n',' ')|trim}} +{% endfor -%} +{% endif -%} +{% for line in comment_descent.body.split('\n\n') -%} +> {{line|replace('\n',' ')|trim}} +{% endfor -%} +{% endmacro -%} +Comment in {{ comment.subreddit_name_prefixed }}: +Main thread: [{{parent_submission.title|trim}}](https://reddit.com{{parent_submission.permalink}}) +{{ comment_chain(comment)}} \ No newline at end of file diff --git a/src/templates/digest_header.jinja b/src/templates/digest_header.jinja new file mode 100644 index 0000000..484d337 --- /dev/null +++ b/src/templates/digest_header.jinja @@ -0,0 +1,2 @@ +## Bardfinn digest from {{ metadata.since }} until {{ metadata.until }}: +{{ comments|length }} comment{% if comments|length > 1 %}s{% endif %} and {{ submissions|length }} post{% if posts|length > 1 %}s{% endif %} \ No newline at end of file diff --git a/src/templates/digest_submission.jinja b/src/templates/digest_submission.jinja new file mode 100644 index 0000000..c6c7c59 --- /dev/null +++ b/src/templates/digest_submission.jinja @@ -0,0 +1,5 @@ +Submission in {{ submission.subreddit_name_prefixed }} +[{{ submission.title}}](https://reddit.com{{submission.permalink}}) +{% for line in submission.selftext.split('\n\n') -%} +> {{line|replace('\n',' ')|trim}} +{% endfor %} \ No newline at end of file