Initial commit

master
bung 2024-07-03 07:28:58 -05:00
commit 77f1d100bf
10 changed files with 351 additions and 0 deletions

4
.gitignore vendored 100644
View File

@ -0,0 +1,4 @@
rdrama.json
.venv
__pycache__
.vscode

32
README.md 100644
View File

@ -0,0 +1,32 @@
## Makeshift reddit aggregator using pullpush.io and Jinja2
Notable problems:
* Using just pullpush.io means that the score counter usually sits at 1.
* No support for BlueSky
* Code needs comments and refactoring
* I got lazy towards the end
* Recursive quotation breaks if any of the comments have markdown quotes
* It seems like it's culling some parent comments, but I haven't bothered to check.
What this does well compared to the others (at time of writing):
* Recursive quotations
* (presumably) Rendering markdown in Jinja to make extensions easier
* (mixed) Uses async for faster processing
* This would be more useful when integrated with BlueSky or if rDrama didn't have a rate limit of 1/s for submitting comments.
* It makes it harder for people not familiar with async to use/reuse code
## Installing
```
python3 -m venv .venv
source .venv/bin/activate # (or .venv/bin/activate.fish if using fish)
python3 -m pip install -r requirements.txt
```
## Running
```
source .venv/bin/activate # (or .venv/bin/activate.fish if using fish)
python3 src/main.py
```
## Example
https://rdrama.net/post/280889/testing-yet-another-bardfinn-digest-20240702

8
requirements.txt 100644
View File

@ -0,0 +1,8 @@
## Async HTTP
aiohttp
## Template rendering
Jinja2
## BlueSky
# atproto
## Reddit
# asyncpraw

99
src/digest.py 100644
View File

@ -0,0 +1,99 @@
from jinja2 import Environment, FileSystemLoader
from typing import List, Tuple
import pullpush
import asyncio
from datetime import datetime, UTC
from rdrama import RDrama
import json
import logging
jinja_env = Environment(loader=FileSystemLoader("src/templates"), enable_async=True)
with open('rdrama.json') as fi:
rdrama_params = json.load(fi)
rdrama = RDrama(rdrama_params['client_id'], rdrama_params['token'], base_url="https://rdrama.net")
class DigestThread:
time_format = '%Y-%m-%d %H:%M %Z'
def __init__(self, user: str, since: int, until: int, depth: int):
self.user = user
self.since = since
self.until = until
self.depth = depth
self.comments = None
self.submissions = None
self.parent_submissions = None
assert depth > 0
async def render_thread(self) -> str:
await self._fetch_if_not_present()
metadata = {
'since': format(datetime.fromtimestamp(self.since, UTC), DigestThread.time_format),
'until': format(datetime.fromtimestamp(self.until, UTC), DigestThread.time_format),
}
return await jinja_env.get_template("digest_header.jinja").render_async(
{"comments": self.comments, "submissions": self.submissions, "metadata": metadata}
)
async def render_comments(self) -> List[str]:
await self._fetch_if_not_present()
template = jinja_env.get_template("digest_comment.jinja")
async def render(comment):
parent_submission = self.parent_submissions[pullpush.get_id_from_permalink(comment['permalink'])]
return await template.render_async(comment=comment, parent_submission=parent_submission)
return list(
await asyncio.gather(
*(render(comment) for comment in self.comments)
)
)
async def render_submissions(self) -> List[str]:
await self._fetch_if_not_present()
template = jinja_env.get_template("digest_submission.jinja")
return list(
await asyncio.gather(
*(
template.render_async(submission=submission)
for submission in self.submissions
)
)
)
async def _fetch_if_not_present(self) -> Tuple[List[dict], List[dict]]:
# TODO: fix spaghetti with _fetch_comments being removed
return await asyncio.gather(self._fetch_submissions(), self._fetch_comment_parent_submissions())
async def _fetch_comments(self) -> List[dict]:
if self.comments is None:
self.comments = await pullpush.get_comments_from_user_recursive(
self.user, self.since, self.until, self.depth
)
return self.comments
async def _fetch_submissions(self) -> List[dict]:
if self.submissions is None:
self.submissions = await pullpush.get_submissions_from_user(
self.user, self.since, self.until
)
return self.submissions
async def _fetch_comment_parent_submissions(self) -> List[dict]:
await self._fetch_comments()
if self.parent_submissions is None:
self.parent_submissions = await pullpush.get_submissions_by_ids(
(pullpush.get_id_from_permalink(comment['permalink']) for comment in self.comments))
return self.parent_submissions
async def publish(self, title: str, hole: str):
submission_body = await self.render_thread()
logging.info(f'Creating submission...')
submission_id = await rdrama.make_submission(title, None, submission_body, hole)
submissions, comments = await asyncio.gather(self.render_submissions(), self.render_comments())
submission_tasks = [asyncio.create_task(rdrama.make_comment(submission_id, submission)) for submission in submissions]
comment_tasks = [asyncio.create_task(rdrama.make_comment(submission_id, comment)) for comment in comments]
logging.info(f'Adding comments...')
await asyncio.gather(*(submission_tasks + comment_tasks))
logging.info(f'Publishing done!')

33
src/main.py 100644
View File

@ -0,0 +1,33 @@
import logging
from typing import Union
from digest import DigestThread
import asyncio
import time
from datetime import datetime, UTC
logging.basicConfig(level=logging.INFO)
async def main(user: str, since: int, until: int, depth: int, hole: Union[str, None]):
thread = DigestThread(user, since, until, depth)
print(await thread.render_thread())
for submission in await thread.render_submissions():
print(submission)
for comment in await thread.render_comments():
print(comment)
if input("Submit?") == "Y":
since = format(datetime.fromtimestamp(since, UTC), DigestThread.time_format)
until = format(datetime.fromtimestamp(until, UTC), DigestThread.time_format)
submission_title = (
f"Testing Yet Another Bardfinn Digest: {since} through {until}"
)
await thread.publish(submission_title, hole)
if __name__ == "__main__":
asyncio.new_event_loop().run_until_complete(
main("bardfinn", time.time() - (60 * 60 * 24), time.time(), 4, None)
)

84
src/pullpush.py 100644
View File

@ -0,0 +1,84 @@
from typing import Iterable, List, Dict, Generator
import asyncio
import logging
import aiohttp
import re
pullpush_semaphore = asyncio.Semaphore(3)
async def get_comments_from_user(user: str, since: int, until: int) -> List[dict]:
since = int(since)
until = int(until)
return await pullpush_fetch(
"comment", {"author": user, "since": since, "until": until}
)
async def get_submissions_from_user(user: str, since: int, until: int) -> List[dict]:
since = int(since)
until = int(until)
return await pullpush_fetch(
"submission", {"author": user, "since": since, "until": until}
)
async def get_comments_by_ids(ids: Iterable[int]) -> Dict[str, dict]:
ids = filter(lambda id: not id.startswith("t3"), ids)
ids = set(ids)
response = await pullpush_fetch("comment", {"ids": ",".join(ids)})
return dict(((comment["id"], comment) for comment in response))
async def get_submissions_by_ids(ids: Iterable[int]) -> Dict[str, dict]:
response = await pullpush_fetch("submission", {"ids": ",".join(ids)})
ids = set(ids)
return dict(((submission["id"], submission) for submission in response))
permalink_id_re = re.compile(r"/r/\w+/comments/(\w+?)/")
def get_id_from_permalink(permalink: str):
return permalink_id_re.search(permalink).group(1)
async def get_comments_by_ids_recursive(
ids: Iterable[str], depth: int
) -> Dict[str, dict]:
if depth <= 0:
return None
comments_map = await get_comments_by_ids(ids)
parent_ids = set(comment["parent_id"] for comment in comments_map.values())
parent_comments = await get_comments_by_ids_recursive(parent_ids, depth - 1)
if parent_comments is not None:
for comment in comments_map.values():
if comment["parent_id"] in parent_comments:
comment["parent"] = parent_comments["parent_id"]
return comments_map
async def get_comments_from_user_recursive(
user: str, since: str, until: str, depth: int
) -> List[dict]:
comments = await get_comments_from_user(user, since, until)
parent_comments = await get_comments_by_ids_recursive(
(comment["parent_id"] for comment in comments), depth - 1
)
for comment in comments:
parent_id_trimmed = comment["parent_id"].replace("t1_", "")
if parent_id_trimmed in parent_comments:
comment["parent"] = parent_comments[parent_id_trimmed]
return comments
async def pullpush_fetch(type: str, params: dict) -> dict:
params.setdefault("html_decode", "True")
# Fetch from pullpush
async with pullpush_semaphore:
async with aiohttp.ClientSession("https://api.pullpush.io/") as session:
async with session.get(f"/reddit/{type}/search", params=params) as response:
logging.info(f"Retrieved from URL {response.url}")
assert response.status == 200
return (await response.json())["data"]

71
src/rdrama.py 100644
View File

@ -0,0 +1,71 @@
import asyncio
import logging
import socket
from typing import List, Union
import aiohttp
from jinja2 import FileSystemLoader, Environment
class RDrama:
semaphore = asyncio.Semaphore(1)
wait_time = 2
def __init__(self, client_id: str, token=None, base_url="https://rdrama.net/"):
self.client_id = client_id
self.base_url = base_url
self.token = token
# async def log_in(self): # TODO: awful, redo in asyncio
# logging.info(f'Logging in...')
# self.logged_in = True
async def make_submission(
self,
title: str,
url: Union[str, None],
body: Union[str, None],
hole: Union[str, None],
) -> Union[str, None]:
if self.token is None:
self.log_in()
logging.info(f"Creating new rDrama submission...")
form_data = aiohttp.FormData()
form_data_dict = {"url": url, "title": title, "body": body, "hole": hole}
for k, v in form_data_dict.items():
if v is not None:
form_data.add_field(k, v)
request_headers = {"Authorization": self.token}
async with RDrama.semaphore:
async with aiohttp.ClientSession(self.base_url) as session:
async with session.post(
"/submit", data=form_data, headers=request_headers
) as response:
ret: dict = await response.json()
logging.info(f"Submission {form_data} response: {ret}")
assert response.status == 200
await asyncio.sleep(RDrama.wait_time)
return ret.get("id", None)
async def make_comment(self, parent_id: str, body: str) -> Union[str, None]:
parent_id = str(parent_id)
if self.token is None:
self.log_in()
logging.info(f"Creating new rDrama comment...")
form_data = aiohttp.FormData()
if not parent_id.startswith("p_"):
parent_id = "p_" + parent_id
form_data_dict = {"body": body, "parent_fullname": parent_id}
for k, v in form_data_dict.items():
if v is not None:
form_data.add_field(k, v)
request_headers = {"Authorization": self.token}
async with RDrama.semaphore:
async with aiohttp.ClientSession(self.base_url) as session:
async with session.post(
"/comment", data=form_data, headers=request_headers
) as response:
ret: dict = await response.json()
logging.info(f"Comment {form_data} response: {ret}")
assert response.status == 200
await asyncio.sleep(RDrama.wait_time)
return ret.get("id", None)

View File

@ -0,0 +1,13 @@
{% macro comment_chain(comment_descent) -%}
{% if 'parent' in comment_descent -%}
{% for line in comment_chain(comment_descent.parent).split('\n\n') -%}
>{{line|replace('\n',' ')|trim}}
{% endfor -%}
{% endif -%}
{% for line in comment_descent.body.split('\n\n') -%}
> {{line|replace('\n',' ')|trim}}
{% endfor -%}
{% endmacro -%}
Comment in {{ comment.subreddit_name_prefixed }}:
Main thread: [{{parent_submission.title|trim}}](https://reddit.com{{parent_submission.permalink}})
{{ comment_chain(comment)}}

View File

@ -0,0 +1,2 @@
## Bardfinn digest from {{ metadata.since }} until {{ metadata.until }}:
{{ comments|length }} comment{% if comments|length > 1 %}s{% endif %} and {{ submissions|length }} post{% if posts|length > 1 %}s{% endif %}

View File

@ -0,0 +1,5 @@
Submission in {{ submission.subreddit_name_prefixed }}
[{{ submission.title}}](https://reddit.com{{submission.permalink}})
{% for line in submission.selftext.split('\n\n') -%}
> {{line|replace('\n',' ')|trim}}
{% endfor %}