From b1ae6a324ae3128d1b749e1fe4f8a9bc5621e7e3 Mon Sep 17 00:00:00 2001 From: Aevann Date: Sat, 9 Sep 2023 21:51:09 +0300 Subject: [PATCH] restore y'all-seeing-eye function using pullpush (initial commit) --- env_template.env | 2 +- files/helpers/config/const.py | 7 ++-- files/helpers/cron.py | 2 +- files/helpers/offsitementions.py | 55 ++++++++++++++------------------ 4 files changed, 28 insertions(+), 38 deletions(-) diff --git a/env_template.env b/env_template.env index 58f97d9787..ffdd800647 100644 --- a/env_template.env +++ b/env_template.env @@ -18,4 +18,4 @@ export DONATE_LINK='https://blahblahblah' export CF_KEY='blahblahblah' export CF_ZONE='blahblahblah' export DEBIAN_FRONTEND='noninteractive' -export WORKER_COUNT="2" +export WORKER_COUNT="1" diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index 5e656ef8b5..ca5acdbab8 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -72,7 +72,6 @@ if SITE == 'staging.rdrama.net': LOGGED_IN_CACHE_KEY = "loggedin" LOGGED_OUT_CACHE_KEY = "loggedout" CHAT_ONLINE_CACHE_KEY = "online" -REDDIT_NOTIFS_CACHE_KEY = "reddit_notifications" CASINO_RELEASE_DAY = 1662825600 @@ -272,10 +271,8 @@ if SITE_NAME == 'rDrama': 'love4fatpeople', } - REDDIT_NOTIFS_SITE.add('marsey') - REDDIT_NOTIFS_SITE.add('"r/drama"') - REDDIT_NOTIFS_SITE.add('justice4darrell') - REDDIT_NOTIFS_SITE.add('cringetopia.org') + REDDIT_NOTIFS_SITE.update({'marsey', 'r/drama', 'justice4darrell', 'cringetopia.org'}) + elif SITE_NAME == 'WPD': REDDIT_NOTIFS_SITE.update({'watchpeopledie', 'makemycoffin'}) diff --git a/files/helpers/cron.py b/files/helpers/cron.py index ba8d2e7821..1460595f3b 100644 --- a/files/helpers/cron.py +++ b/files/helpers/cron.py @@ -38,7 +38,7 @@ def cron_fn(every_5m, every_1d): check_if_end_lottery_task() spin_roulette_wheel() - #offsitementions.offsite_mentions_task(cache) + offsitementions.offsite_mentions_task(cache) _award_timers_task() _unpin_expired() _grant_one_year_badges() diff --git a/files/helpers/offsitementions.py b/files/helpers/offsitementions.py index 53fcfb5b55..0940cfcafb 100644 --- a/files/helpers/offsitementions.py +++ b/files/helpers/offsitementions.py @@ -32,32 +32,27 @@ def offsite_mentions_task(cache): g.db.commit() # commit early otherwise localhost testing fails to commit def get_mentions(cache, queries, reddit_notifs_users=False): - kinds = ['post', 'comment'] + kinds = ['submission', 'comment'] mentions = [] - exclude_subreddits = ['PokemonGoRaids', 'SubSimulatorGPT2', 'SubSimGPT2Interactive'] - try: - after = int(cache.get(const.REDDIT_NOTIFS_CACHE_KEY) or time.time()) - except: - print("Failed to retrieve last mention time from cache", flush=True) - after = time.time() - size = 1 if reddit_notifs_users else 100 for kind in kinds: - try: - url = ( - f'https://api.pushshift.io/reddit/{kind}/search?html_decode=true' - f'&q={"%7C".join(queries)}' - # f'&subreddit=!{",!".join(exclude_subreddits)}' - f'&after={after}' - f'&size={size}') - data = requests.get(url, timeout=15).json()['data'] - except Exception as e: - continue + data = [] + + for query in queries: + last_processed = 9999999999 + while True: + url = f'https://api.pullpush.io/reddit/search/{kind}?q={query}&before={last_processed}' + new_data = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies).json()['data'] + data += new_data + try: last_processed = int(new_data[-1]['created_utc']) + except: break + if last_processed < 1682872206 or len(new_data) < 100: break + + data = sorted(data, key=lambda x: x['created_utc'], reverse=True) for thing in data: - if thing['subreddit'] in exclude_subreddits: continue + if thing['subreddit'] in {'IAmA', 'PokemonGoRaids', 'SubSimulatorGPT2', 'SubSimGPT2Interactive'}: continue if 'bot' in thing['author'].lower(): continue if 'AutoModerator' == thing['author']: continue - after = max(after, thing["created_utc"]) if thing["created_utc"] else after if kind == 'comment': body = thing["body"].replace('>', '> ') text = f'

{body}

' @@ -72,17 +67,12 @@ def get_mentions(cache, queries, reddit_notifs_users=False): selftext = thing["selftext"].replace('>', '> ')[:5000] text += f'

{selftext}

' - mentions.append({ 'permalink': thing['permalink'], 'author': thing['author'], + 'created_utc': thing['created_utc'], 'text': text, }) - try: - if not reddit_notifs_users: - cache.set(const.REDDIT_NOTIFS_CACHE_KEY, after + 1) - except: - print("Failed to set cache value; there may be duplication of reddit notifications", flush=True) return mentions def notify_mentions(mentions, send_to=None, mention_str='site mention'): @@ -99,17 +89,20 @@ def notify_mentions(mentions, send_to=None, mention_str='site mention'): f'{text}' ) + g.db.flush() existing_comment = g.db.query(Comment.id).filter_by( author_id=const.AUTOJANNY_ID, parent_post=None, body_html=notif_text).one_or_none() - if existing_comment: break + if existing_comment: continue new_comment = Comment( - author_id=const.AUTOJANNY_ID, - parent_post=None, - body_html=notif_text, - distinguish_level=6) + author_id=const.AUTOJANNY_ID, + parent_post=None, + body_html=notif_text, + distinguish_level=6, + created_utc=int(m['created_utc']), + ) g.db.add(new_comment) g.db.flush() new_comment.top_comment_id = new_comment.id