ban promotions, improve lolcow detection

master
Hey Moon 2022-05-07 14:36:25 -05:00
parent 56b87645bb
commit 76cc5a8530
1 changed files with 59 additions and 12 deletions

View File

@ -1,8 +1,10 @@
TEST_MODE = True
from audioop import avg
from re import sub
from time import sleep
from typing import Tuple
from numpy import average
import praw
from praw.models import Comment, Submission
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
@ -11,6 +13,8 @@ from os.path import exists, join, realpath, split
from RDramaAPIInterface import RDramaAPIInterface
BANNED_WORDS_IN_POST = ['comment', 'promotion']
def get_real_filename(filename : str):
path_to_script = realpath(__file__)
path_to_script_directory, _ = split(path_to_script)
@ -31,6 +35,10 @@ reddit = praw.Reddit(
pushshift_api = PushshiftAPI(reddit)
def contains_banned_words(sentence):
santitized_sentence = ''.join([i for i in sentence.lower() if i in 'abcdefghijklmnopqrstuvwxyz '])
return bool(set(BANNED_WORDS_IN_POST).intersection(santitized_sentence.split(" ")))
def get_based_submissions(subreddit, time_frame, limit):
subscriber_cache = {}
@ -42,7 +50,7 @@ def get_based_submissions(subreddit, time_frame, limit):
for submission in reddit.subreddit(subreddit).controversial(time_frame, limit=limit):
try:
basedness = (1-submission.upvote_ratio)*submission.num_comments
if (submission.author.name == "AutoModerator" or "comment" in submission.title.lower()):
if (submission.author.name == "AutoModerator" or contains_banned_words(submission.title)):
continue
if (submission.subreddit not in subscriber_cache):
subscriber_cache[submission.subreddit] = submission.subreddit.subscribers
@ -63,8 +71,9 @@ def get_based_submissions(subreddit, time_frame, limit):
def analyze_comments(submission : 'Submission'):
print(f"[{submission.id}]Retrieving Comments")
comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
comment_list = list(comments)
print(f"[{submission.id}]Creating Network")
comment_map = {i.id:i for i in list(comments)}
comment_map = {i.id:i for i in comment_list}
child_map = {}
for comment in comment_map.values():
try:
@ -78,6 +87,7 @@ def analyze_comments(submission : 'Submission'):
sid_obj = SentimentIntensityAnalyzer()
print(f"[{submission.id}]Classifying Comments")
user_to_total_anger = {}
redditors = {}
ranked_comments = []
angry_comments = []
for comment in comment_map.values():
@ -126,7 +136,14 @@ def analyze_comments(submission : 'Submission'):
if comment.author not in user_to_total_anger:
user_to_total_anger[comment.author] = 0.0
redditors[comment.author] = {}
redditors[comment.author]['comments'] = []
redditors[comment.author]['angriness'] = 0
redditors[comment.author]['author'] = comment.author
user_to_total_anger[comment.author]+=score
redditors[comment.author]['comments'].append((score, comment_info))
redditors[comment.author]['angriness'] += score
except Exception as e:
print(f"Error while processing {comment}: {e}")
@ -135,10 +152,13 @@ def analyze_comments(submission : 'Submission'):
angry_comments.sort(key=lambda a:a[0])
lolcows = [(v, k) for k, v in user_to_total_anger.items()]
lolcows.sort(key=lambda a:a[0])
redditors_ranked = [(data['angriness'], data) for username, data in redditors.items()]
redditors_ranked.sort(key=lambda a:a[0])
return {
'based' : ranked_comments,
'angry': angry_comments,
'lolcows': lolcows
'lolcows': lolcows,
'redditors': redditors_ranked
}
#get_based_submissions("all", "hour", 25, True)
@ -202,18 +222,40 @@ def angriness_score_string(angriness):
return get_score_string(score, "😡", "🔘")
def get_reddit_full_url(partial_url):
return f"https://reddit.com{partial_url}"
def generate_lolcow_display_section(lolcows):
markdown_lines = []
biggest_lolcow_info = lolcows[0]
biggest_lolcow_score = biggest_lolcow_info[0]
biggest_lolcow = biggest_lolcow_info[1]['author']
number_of_comments = len(biggest_lolcow_info[1]['comments'])
lolcow_comments = biggest_lolcow_info[1]['comments']
lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘", allow_over=True)
markdown_lines.append(f"# Biggest Lolcow: /u/{biggest_lolcow.name}")
markdown_lines.append(f"Score: {lolcow_score_string}")
markdown_lines.append(f"Number of comments: {number_of_comments}")
comment_angryness_scores = [-1*a[0] for a in biggest_lolcow_info[1]['comments']]
average_angriness = average(comment_angryness_scores)
maximum_angry_info = max(lolcow_comments, key=lambda a : a[0])
minimum_angry_info = min(lolcow_comments, key=lambda a : a[0])
markdown_lines.append(f"Average angriness: {angriness_score_string(average_angriness)}")
markdown_lines.append(f"Maximum angriness: [{angriness_score_string(-1*maximum_angry_info[0])}]({get_reddit_full_url(maximum_angry_info[1]['comment'].permalink)})")
markdown_lines.append(f"Minimum angriness: [{angriness_score_string(-1*minimum_angry_info[0])}]({get_reddit_full_url(minimum_angry_info[1]['comment'].permalink)})")
return markdown_lines
def generate_submission_report(submission : 'Submission', absolute: bool):
markdown_lines = []
comment_analysis_results = analyze_comments(submission)
basedness_display_func = lambda a : get_comment_basedness_out_of_five(a, absolute)
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['based'], "Most Based Comments", "Basedness", 3, detail_func=basedness_display_func))
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['angry'], "Angriest Comments", "Angriness", 3, detail_func=angriness_score_string))
biggest_lolcow_info = comment_analysis_results['lolcows'][0]
biggest_lolcow_score = biggest_lolcow_info[0]
biggest_lolcow = biggest_lolcow_info[1]
markdown_lines.append(f"# Biggest lolcow")
lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘")
markdown_lines.append(f"/u/{biggest_lolcow.name} {lolcow_score_string}")
markdown_lines.extend(generate_lolcow_display_section(comment_analysis_results['redditors']))
markdown_lines.append(f"")
markdown_lines.append("*:marppy: autodrama: automating away the jobs of dramautists. :marseycapitalistmanlet: Ping HeyMoon if there are any problems or you have a suggestion :marseyjamming:*")
return "\n\n".join(markdown_lines)
@ -277,8 +319,12 @@ def get_comment_basedness_out_of_five(basedness: int, absolute : bool):
score = 0
return get_score_string(score, "🔥", "🔘")
def get_score_string(score: int, filled_emoji, empty_emoji) -> str:
return "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)])
def get_score_string(score: int, filled_emoji, empty_emoji, allow_over = False) -> str:
to_return = "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)])
if (allow_over):
if (score > 5):
to_return += f"(+{int(score)-5}{filled_emoji})"
return to_return
def create_rdrama_report(rdrama : RDramaAPIInterface, submission : 'Submission', basedness: int, absolute_basedness: bool):
score = get_basedness_score_out_of_five(basedness)
@ -326,4 +372,5 @@ else:
timeout = 10
rdrama = RDramaAPIInterface(auth, website, timeout, https=https)
daily_drama_post(rdrama)
#daily_drama_post(rdrama)
print(generate_submission_report(Submission(reddit, "uesnfd"), True))