ban promotions, improve lolcow detection

master
Hey Moon 2022-05-07 14:36:25 -05:00
parent 56b87645bb
commit 76cc5a8530
1 changed files with 59 additions and 12 deletions

View File

@ -1,8 +1,10 @@
TEST_MODE = True TEST_MODE = True
from audioop import avg
from re import sub from re import sub
from time import sleep from time import sleep
from typing import Tuple from typing import Tuple
from numpy import average
import praw import praw
from praw.models import Comment, Submission from praw.models import Comment, Submission
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
@ -11,6 +13,8 @@ from os.path import exists, join, realpath, split
from RDramaAPIInterface import RDramaAPIInterface from RDramaAPIInterface import RDramaAPIInterface
BANNED_WORDS_IN_POST = ['comment', 'promotion']
def get_real_filename(filename : str): def get_real_filename(filename : str):
path_to_script = realpath(__file__) path_to_script = realpath(__file__)
path_to_script_directory, _ = split(path_to_script) path_to_script_directory, _ = split(path_to_script)
@ -31,6 +35,10 @@ reddit = praw.Reddit(
pushshift_api = PushshiftAPI(reddit) pushshift_api = PushshiftAPI(reddit)
def contains_banned_words(sentence):
santitized_sentence = ''.join([i for i in sentence.lower() if i in 'abcdefghijklmnopqrstuvwxyz '])
return bool(set(BANNED_WORDS_IN_POST).intersection(santitized_sentence.split(" ")))
def get_based_submissions(subreddit, time_frame, limit): def get_based_submissions(subreddit, time_frame, limit):
subscriber_cache = {} subscriber_cache = {}
@ -42,7 +50,7 @@ def get_based_submissions(subreddit, time_frame, limit):
for submission in reddit.subreddit(subreddit).controversial(time_frame, limit=limit): for submission in reddit.subreddit(subreddit).controversial(time_frame, limit=limit):
try: try:
basedness = (1-submission.upvote_ratio)*submission.num_comments basedness = (1-submission.upvote_ratio)*submission.num_comments
if (submission.author.name == "AutoModerator" or "comment" in submission.title.lower()): if (submission.author.name == "AutoModerator" or contains_banned_words(submission.title)):
continue continue
if (submission.subreddit not in subscriber_cache): if (submission.subreddit not in subscriber_cache):
subscriber_cache[submission.subreddit] = submission.subreddit.subscribers subscriber_cache[submission.subreddit] = submission.subreddit.subscribers
@ -63,8 +71,9 @@ def get_based_submissions(subreddit, time_frame, limit):
def analyze_comments(submission : 'Submission'): def analyze_comments(submission : 'Submission'):
print(f"[{submission.id}]Retrieving Comments") print(f"[{submission.id}]Retrieving Comments")
comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id) comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
comment_list = list(comments)
print(f"[{submission.id}]Creating Network") print(f"[{submission.id}]Creating Network")
comment_map = {i.id:i for i in list(comments)} comment_map = {i.id:i for i in comment_list}
child_map = {} child_map = {}
for comment in comment_map.values(): for comment in comment_map.values():
try: try:
@ -78,6 +87,7 @@ def analyze_comments(submission : 'Submission'):
sid_obj = SentimentIntensityAnalyzer() sid_obj = SentimentIntensityAnalyzer()
print(f"[{submission.id}]Classifying Comments") print(f"[{submission.id}]Classifying Comments")
user_to_total_anger = {} user_to_total_anger = {}
redditors = {}
ranked_comments = [] ranked_comments = []
angry_comments = [] angry_comments = []
for comment in comment_map.values(): for comment in comment_map.values():
@ -126,7 +136,14 @@ def analyze_comments(submission : 'Submission'):
if comment.author not in user_to_total_anger: if comment.author not in user_to_total_anger:
user_to_total_anger[comment.author] = 0.0 user_to_total_anger[comment.author] = 0.0
redditors[comment.author] = {}
redditors[comment.author]['comments'] = []
redditors[comment.author]['angriness'] = 0
redditors[comment.author]['author'] = comment.author
user_to_total_anger[comment.author]+=score user_to_total_anger[comment.author]+=score
redditors[comment.author]['comments'].append((score, comment_info))
redditors[comment.author]['angriness'] += score
except Exception as e: except Exception as e:
print(f"Error while processing {comment}: {e}") print(f"Error while processing {comment}: {e}")
@ -135,10 +152,13 @@ def analyze_comments(submission : 'Submission'):
angry_comments.sort(key=lambda a:a[0]) angry_comments.sort(key=lambda a:a[0])
lolcows = [(v, k) for k, v in user_to_total_anger.items()] lolcows = [(v, k) for k, v in user_to_total_anger.items()]
lolcows.sort(key=lambda a:a[0]) lolcows.sort(key=lambda a:a[0])
redditors_ranked = [(data['angriness'], data) for username, data in redditors.items()]
redditors_ranked.sort(key=lambda a:a[0])
return { return {
'based' : ranked_comments, 'based' : ranked_comments,
'angry': angry_comments, 'angry': angry_comments,
'lolcows': lolcows 'lolcows': lolcows,
'redditors': redditors_ranked
} }
#get_based_submissions("all", "hour", 25, True) #get_based_submissions("all", "hour", 25, True)
@ -202,18 +222,40 @@ def angriness_score_string(angriness):
return get_score_string(score, "😡", "🔘") return get_score_string(score, "😡", "🔘")
def get_reddit_full_url(partial_url):
return f"https://reddit.com{partial_url}"
def generate_lolcow_display_section(lolcows):
markdown_lines = []
biggest_lolcow_info = lolcows[0]
biggest_lolcow_score = biggest_lolcow_info[0]
biggest_lolcow = biggest_lolcow_info[1]['author']
number_of_comments = len(biggest_lolcow_info[1]['comments'])
lolcow_comments = biggest_lolcow_info[1]['comments']
lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘", allow_over=True)
markdown_lines.append(f"# Biggest Lolcow: /u/{biggest_lolcow.name}")
markdown_lines.append(f"Score: {lolcow_score_string}")
markdown_lines.append(f"Number of comments: {number_of_comments}")
comment_angryness_scores = [-1*a[0] for a in biggest_lolcow_info[1]['comments']]
average_angriness = average(comment_angryness_scores)
maximum_angry_info = max(lolcow_comments, key=lambda a : a[0])
minimum_angry_info = min(lolcow_comments, key=lambda a : a[0])
markdown_lines.append(f"Average angriness: {angriness_score_string(average_angriness)}")
markdown_lines.append(f"Maximum angriness: [{angriness_score_string(-1*maximum_angry_info[0])}]({get_reddit_full_url(maximum_angry_info[1]['comment'].permalink)})")
markdown_lines.append(f"Minimum angriness: [{angriness_score_string(-1*minimum_angry_info[0])}]({get_reddit_full_url(minimum_angry_info[1]['comment'].permalink)})")
return markdown_lines
def generate_submission_report(submission : 'Submission', absolute: bool): def generate_submission_report(submission : 'Submission', absolute: bool):
markdown_lines = [] markdown_lines = []
comment_analysis_results = analyze_comments(submission) comment_analysis_results = analyze_comments(submission)
basedness_display_func = lambda a : get_comment_basedness_out_of_five(a, absolute) basedness_display_func = lambda a : get_comment_basedness_out_of_five(a, absolute)
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['based'], "Most Based Comments", "Basedness", 3, detail_func=basedness_display_func)) markdown_lines.extend(generate_comment_display_section(comment_analysis_results['based'], "Most Based Comments", "Basedness", 3, detail_func=basedness_display_func))
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['angry'], "Angriest Comments", "Angriness", 3, detail_func=angriness_score_string)) markdown_lines.extend(generate_comment_display_section(comment_analysis_results['angry'], "Angriest Comments", "Angriness", 3, detail_func=angriness_score_string))
biggest_lolcow_info = comment_analysis_results['lolcows'][0] markdown_lines.extend(generate_lolcow_display_section(comment_analysis_results['redditors']))
biggest_lolcow_score = biggest_lolcow_info[0] markdown_lines.append(f"")
biggest_lolcow = biggest_lolcow_info[1]
markdown_lines.append(f"# Biggest lolcow")
lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘")
markdown_lines.append(f"/u/{biggest_lolcow.name} {lolcow_score_string}")
markdown_lines.append("*:marppy: autodrama: automating away the jobs of dramautists. :marseycapitalistmanlet: Ping HeyMoon if there are any problems or you have a suggestion :marseyjamming:*") markdown_lines.append("*:marppy: autodrama: automating away the jobs of dramautists. :marseycapitalistmanlet: Ping HeyMoon if there are any problems or you have a suggestion :marseyjamming:*")
return "\n\n".join(markdown_lines) return "\n\n".join(markdown_lines)
@ -277,8 +319,12 @@ def get_comment_basedness_out_of_five(basedness: int, absolute : bool):
score = 0 score = 0
return get_score_string(score, "🔥", "🔘") return get_score_string(score, "🔥", "🔘")
def get_score_string(score: int, filled_emoji, empty_emoji) -> str: def get_score_string(score: int, filled_emoji, empty_emoji, allow_over = False) -> str:
return "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)]) to_return = "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)])
if (allow_over):
if (score > 5):
to_return += f"(+{int(score)-5}{filled_emoji})"
return to_return
def create_rdrama_report(rdrama : RDramaAPIInterface, submission : 'Submission', basedness: int, absolute_basedness: bool): def create_rdrama_report(rdrama : RDramaAPIInterface, submission : 'Submission', basedness: int, absolute_basedness: bool):
score = get_basedness_score_out_of_five(basedness) score = get_basedness_score_out_of_five(basedness)
@ -326,4 +372,5 @@ else:
timeout = 10 timeout = 10
rdrama = RDramaAPIInterface(auth, website, timeout, https=https) rdrama = RDramaAPIInterface(auth, website, timeout, https=https)
daily_drama_post(rdrama) #daily_drama_post(rdrama)
print(generate_submission_report(Submission(reddit, "uesnfd"), True))