2022-11-13 11:00:05 +00:00
|
|
|
import time
|
|
|
|
from typing import Iterable
|
2022-06-07 14:42:24 +00:00
|
|
|
import itertools
|
2022-11-15 09:19:08 +00:00
|
|
|
|
2022-06-07 14:42:24 +00:00
|
|
|
import requests
|
2022-11-15 09:19:08 +00:00
|
|
|
from flask_caching import Cache
|
|
|
|
from flask import g
|
2022-06-10 15:13:32 +00:00
|
|
|
from sqlalchemy import or_
|
2022-11-15 09:19:08 +00:00
|
|
|
|
2022-12-11 23:44:34 +00:00
|
|
|
import files.helpers.config.const as const
|
2022-10-14 17:15:29 +00:00
|
|
|
from files.classes.badges import Badge
|
2022-11-15 09:19:08 +00:00
|
|
|
from files.classes.comment import Comment
|
|
|
|
from files.classes.user import User
|
2023-02-07 03:31:49 +00:00
|
|
|
from files.helpers.sanitize import *
|
2023-02-24 17:58:09 +00:00
|
|
|
from files.helpers.alerts import push_notif
|
2022-12-04 19:48:28 +00:00
|
|
|
from files.classes.notifications import Notification
|
2022-06-07 14:42:24 +00:00
|
|
|
|
2022-10-07 08:50:20 +00:00
|
|
|
# Note: while https://api.pushshift.io/meta provides the key
|
|
|
|
# server_ratelimit_per_minute, in practice Cloudflare puts stricter,
|
2023-01-01 11:36:20 +00:00
|
|
|
# unofficially documented limits at around 60/minute. We get nowhere near this
|
|
|
|
# with current keyword quantities. If this ever changes, consider reading the
|
2022-10-07 08:50:20 +00:00
|
|
|
# value from /meta (or just guessing) and doing a random selection of keywords.
|
2022-06-07 14:42:24 +00:00
|
|
|
|
2022-11-13 11:00:05 +00:00
|
|
|
def offsite_mentions_task(cache:Cache):
|
2022-11-30 22:29:13 +00:00
|
|
|
site_mentions = get_mentions(cache, const.REDDIT_NOTIFS_SITE)
|
2022-12-01 00:24:21 +00:00
|
|
|
notify_mentions(site_mentions)
|
2022-06-07 14:42:24 +00:00
|
|
|
|
|
|
|
if const.REDDIT_NOTIFS_USERS:
|
|
|
|
for query, send_user in const.REDDIT_NOTIFS_USERS.items():
|
2022-11-13 11:00:05 +00:00
|
|
|
user_mentions = get_mentions(cache, [query], reddit_notifs_users=True)
|
2022-12-01 00:24:21 +00:00
|
|
|
notify_mentions(user_mentions, send_to=send_user, mention_str='mention of you')
|
|
|
|
|
2023-03-16 06:27:58 +00:00
|
|
|
g.db.commit() # commit early otherwise localhost testing fails to commit
|
2022-06-07 14:42:24 +00:00
|
|
|
|
2022-11-13 11:00:05 +00:00
|
|
|
def get_mentions(cache:Cache, queries:Iterable[str], reddit_notifs_users=False):
|
2022-06-07 14:42:24 +00:00
|
|
|
kinds = ['submission', 'comment']
|
|
|
|
mentions = []
|
2022-11-15 14:44:08 +00:00
|
|
|
exclude_subreddits = ['PokemonGoRaids', 'SubSimulatorGPT2', 'SubSimGPT2Interactive']
|
2022-11-13 11:00:05 +00:00
|
|
|
try:
|
2022-11-27 16:59:36 +00:00
|
|
|
after = int(cache.get(const.REDDIT_NOTIFS_CACHE_KEY) or time.time())
|
2022-11-13 11:00:05 +00:00
|
|
|
except:
|
2022-12-20 22:35:02 +00:00
|
|
|
print("Failed to retrieve last mention time from cache", flush=True)
|
2022-11-13 11:00:05 +00:00
|
|
|
after = time.time()
|
|
|
|
size = 1 if reddit_notifs_users else 100
|
|
|
|
for kind in kinds:
|
2022-06-07 14:42:24 +00:00
|
|
|
try:
|
2022-12-20 00:27:09 +00:00
|
|
|
url = (
|
|
|
|
f'https://api.pushshift.io/reddit/{kind}/search?html_decode=true'
|
|
|
|
f'&q={"%7C".join(queries)}'
|
2022-12-20 00:36:53 +00:00
|
|
|
# f'&subreddit=!{",!".join(exclude_subreddits)}'
|
2022-12-20 00:27:09 +00:00
|
|
|
f'&after={after}'
|
|
|
|
f'&size={size}')
|
2022-11-21 05:58:17 +00:00
|
|
|
data = requests.get((
|
|
|
|
f'https://api.pushshift.io/reddit/{kind}/search?html_decode=true'
|
|
|
|
f'&q={"%7C".join(queries)}'
|
2022-12-20 00:36:53 +00:00
|
|
|
# f'&subreddit=!{",!".join(exclude_subreddits)}'
|
2022-11-21 05:58:17 +00:00
|
|
|
f'&after={after}'
|
|
|
|
f'&size={size}'), timeout=15).json()['data']
|
2022-12-20 00:20:16 +00:00
|
|
|
except Exception as e:
|
2022-11-13 11:00:05 +00:00
|
|
|
continue
|
2022-11-11 17:18:38 +00:00
|
|
|
|
2022-11-13 11:00:05 +00:00
|
|
|
for thing in data:
|
2022-12-20 00:36:53 +00:00
|
|
|
if thing['subreddit'] in exclude_subreddits: continue
|
2022-11-13 11:00:05 +00:00
|
|
|
if 'bot' in thing['author'].lower(): continue
|
2023-01-13 11:36:10 +00:00
|
|
|
if 'AutoModerator' == thing['author']: continue
|
2022-11-13 11:00:05 +00:00
|
|
|
after = max(after, thing["created_utc"]) if thing["created_utc"] else after
|
2022-06-22 22:40:53 +00:00
|
|
|
if kind == 'comment':
|
2022-11-13 11:00:05 +00:00
|
|
|
body = thing["body"].replace('>', '> ')
|
2022-07-03 02:13:25 +00:00
|
|
|
text = f'<blockquote><p>{body}</p></blockquote>'
|
2022-06-22 22:40:53 +00:00
|
|
|
else:
|
2022-11-13 11:00:05 +00:00
|
|
|
title = thing["title"].replace('>', '> ')
|
2022-07-17 17:14:01 +00:00
|
|
|
|
|
|
|
# Special case: a spambot says 'WPD' a lot unrelated to us.
|
|
|
|
if 'Kathrine Mclaurin' in title: continue
|
2022-07-03 02:13:25 +00:00
|
|
|
text = f'<blockquote><p>{title}</p></blockquote>'
|
|
|
|
|
2022-11-13 11:00:05 +00:00
|
|
|
if thing["selftext"]:
|
|
|
|
selftext = thing["selftext"].replace('>', '> ')[:5000]
|
2022-07-03 02:13:25 +00:00
|
|
|
text += f'<br><blockquote><p>{selftext}</p></blockquote>'
|
|
|
|
|
2022-06-22 22:40:53 +00:00
|
|
|
|
2022-06-20 21:01:42 +00:00
|
|
|
mentions.append({
|
2022-11-13 11:00:05 +00:00
|
|
|
'permalink': thing['permalink'],
|
|
|
|
'author': thing['author'],
|
2022-06-22 22:40:53 +00:00
|
|
|
'text': text,
|
2022-06-20 21:01:42 +00:00
|
|
|
})
|
2022-11-13 11:00:05 +00:00
|
|
|
try:
|
2023-01-01 11:36:20 +00:00
|
|
|
if not reddit_notifs_users:
|
2022-11-30 20:44:01 +00:00
|
|
|
cache.set(const.REDDIT_NOTIFS_CACHE_KEY, after + 1)
|
2022-11-13 11:00:05 +00:00
|
|
|
except:
|
2022-12-20 22:35:02 +00:00
|
|
|
print("Failed to set cache value; there may be duplication of reddit notifications", flush=True)
|
2022-06-07 14:42:24 +00:00
|
|
|
return mentions
|
|
|
|
|
2022-12-01 00:24:21 +00:00
|
|
|
def notify_mentions(mentions, send_to=None, mention_str='site mention'):
|
2022-06-07 14:42:24 +00:00
|
|
|
for m in mentions:
|
2022-07-10 13:02:24 +00:00
|
|
|
author = m['author']
|
2022-06-20 21:01:42 +00:00
|
|
|
permalink = m['permalink']
|
2023-02-07 03:31:49 +00:00
|
|
|
text = sanitize(m['text'], blackjack="reddit mention", golden=False)
|
2022-11-21 05:58:17 +00:00
|
|
|
notif_text = (
|
|
|
|
f'<p>New {mention_str} by <a href="https://old.reddit.com/u/{author}" '
|
2022-11-21 17:37:38 +00:00
|
|
|
f'rel="nofollow noopener" target="_blank">/u/{author}</a></p>'
|
2022-11-21 05:58:17 +00:00
|
|
|
f'<p><a href="https://old.reddit.com{permalink}?context=89" '
|
2022-11-21 17:37:38 +00:00
|
|
|
'rel="nofollow noopener" target="_blank">'
|
2022-11-21 05:58:17 +00:00
|
|
|
f'https://old.reddit.com{permalink}?context=89</a></p>'
|
|
|
|
f'{text}'
|
|
|
|
)
|
2022-06-07 14:42:24 +00:00
|
|
|
|
2023-03-16 06:27:58 +00:00
|
|
|
existing_comment = g.db.query(Comment.id).filter_by(
|
2022-07-08 19:03:04 +00:00
|
|
|
author_id=const.AUTOJANNY_ID,
|
2022-06-20 21:01:42 +00:00
|
|
|
parent_submission=None,
|
2022-06-07 14:42:24 +00:00
|
|
|
body_html=notif_text).one_or_none()
|
2022-07-10 13:02:24 +00:00
|
|
|
if existing_comment: break
|
2022-06-07 14:42:24 +00:00
|
|
|
|
|
|
|
new_comment = Comment(
|
2022-07-08 19:03:04 +00:00
|
|
|
author_id=const.AUTOJANNY_ID,
|
2022-06-07 14:42:24 +00:00
|
|
|
parent_submission=None,
|
|
|
|
body_html=notif_text,
|
|
|
|
distinguish_level=6)
|
2023-03-16 06:27:58 +00:00
|
|
|
g.db.add(new_comment)
|
|
|
|
g.db.flush()
|
2022-06-07 14:42:24 +00:00
|
|
|
new_comment.top_comment_id = new_comment.id
|
|
|
|
|
2022-12-01 00:24:21 +00:00
|
|
|
if send_to:
|
|
|
|
notif = Notification(comment_id=new_comment.id, user_id=send_to)
|
2023-03-16 06:27:58 +00:00
|
|
|
g.db.add(notif)
|
2023-02-24 02:04:54 +00:00
|
|
|
|
2023-03-06 16:53:36 +00:00
|
|
|
push_notif({send_to}, f'New mention of you on reddit by /u/{author}', '', f'{SITE_FULL}/comment/{new_comment.id}?read=true#context')
|