diff --git a/autodrama.py b/autodrama.py index 5526453..b935552 100644 --- a/autodrama.py +++ b/autodrama.py @@ -1,8 +1,10 @@ TEST_MODE = True +from audioop import avg from re import sub from time import sleep from typing import Tuple +from numpy import average import praw from praw.models import Comment, Submission from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer @@ -11,6 +13,8 @@ from os.path import exists, join, realpath, split from RDramaAPIInterface import RDramaAPIInterface +BANNED_WORDS_IN_POST = ['comment', 'promotion'] + def get_real_filename(filename : str): path_to_script = realpath(__file__) path_to_script_directory, _ = split(path_to_script) @@ -31,6 +35,10 @@ reddit = praw.Reddit( pushshift_api = PushshiftAPI(reddit) +def contains_banned_words(sentence): + santitized_sentence = ''.join([i for i in sentence.lower() if i in 'abcdefghijklmnopqrstuvwxyz ']) + return bool(set(BANNED_WORDS_IN_POST).intersection(santitized_sentence.split(" "))) + def get_based_submissions(subreddit, time_frame, limit): subscriber_cache = {} @@ -42,7 +50,7 @@ def get_based_submissions(subreddit, time_frame, limit): for submission in reddit.subreddit(subreddit).controversial(time_frame, limit=limit): try: basedness = (1-submission.upvote_ratio)*submission.num_comments - if (submission.author.name == "AutoModerator" or "comment" in submission.title.lower()): + if (submission.author.name == "AutoModerator" or contains_banned_words(submission.title)): continue if (submission.subreddit not in subscriber_cache): subscriber_cache[submission.subreddit] = submission.subreddit.subscribers @@ -63,8 +71,9 @@ def get_based_submissions(subreddit, time_frame, limit): def analyze_comments(submission : 'Submission'): print(f"[{submission.id}]Retrieving Comments") comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id) + comment_list = list(comments) print(f"[{submission.id}]Creating Network") - comment_map = {i.id:i for i in list(comments)} + comment_map = {i.id:i for i in comment_list} child_map = {} for comment in comment_map.values(): try: @@ -78,6 +87,7 @@ def analyze_comments(submission : 'Submission'): sid_obj = SentimentIntensityAnalyzer() print(f"[{submission.id}]Classifying Comments") user_to_total_anger = {} + redditors = {} ranked_comments = [] angry_comments = [] for comment in comment_map.values(): @@ -126,7 +136,14 @@ def analyze_comments(submission : 'Submission'): if comment.author not in user_to_total_anger: user_to_total_anger[comment.author] = 0.0 + redditors[comment.author] = {} + redditors[comment.author]['comments'] = [] + redditors[comment.author]['angriness'] = 0 + redditors[comment.author]['author'] = comment.author + user_to_total_anger[comment.author]+=score + redditors[comment.author]['comments'].append((score, comment_info)) + redditors[comment.author]['angriness'] += score except Exception as e: print(f"Error while processing {comment}: {e}") @@ -135,10 +152,13 @@ def analyze_comments(submission : 'Submission'): angry_comments.sort(key=lambda a:a[0]) lolcows = [(v, k) for k, v in user_to_total_anger.items()] lolcows.sort(key=lambda a:a[0]) + redditors_ranked = [(data['angriness'], data) for username, data in redditors.items()] + redditors_ranked.sort(key=lambda a:a[0]) return { 'based' : ranked_comments, 'angry': angry_comments, - 'lolcows': lolcows + 'lolcows': lolcows, + 'redditors': redditors_ranked } #get_based_submissions("all", "hour", 25, True) @@ -202,18 +222,40 @@ def angriness_score_string(angriness): return get_score_string(score, "😡", "🔘") +def get_reddit_full_url(partial_url): + return f"https://reddit.com{partial_url}" + +def generate_lolcow_display_section(lolcows): + markdown_lines = [] + + + biggest_lolcow_info = lolcows[0] + biggest_lolcow_score = biggest_lolcow_info[0] + biggest_lolcow = biggest_lolcow_info[1]['author'] + number_of_comments = len(biggest_lolcow_info[1]['comments']) + lolcow_comments = biggest_lolcow_info[1]['comments'] + lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘", allow_over=True) + markdown_lines.append(f"# Biggest Lolcow: /u/{biggest_lolcow.name}") + markdown_lines.append(f"Score: {lolcow_score_string}") + markdown_lines.append(f"Number of comments: {number_of_comments}") + comment_angryness_scores = [-1*a[0] for a in biggest_lolcow_info[1]['comments']] + average_angriness = average(comment_angryness_scores) + maximum_angry_info = max(lolcow_comments, key=lambda a : a[0]) + minimum_angry_info = min(lolcow_comments, key=lambda a : a[0]) + markdown_lines.append(f"Average angriness: {angriness_score_string(average_angriness)}") + markdown_lines.append(f"Maximum angriness: [{angriness_score_string(-1*maximum_angry_info[0])}]({get_reddit_full_url(maximum_angry_info[1]['comment'].permalink)})") + markdown_lines.append(f"Minimum angriness: [{angriness_score_string(-1*minimum_angry_info[0])}]({get_reddit_full_url(minimum_angry_info[1]['comment'].permalink)})") + + return markdown_lines + def generate_submission_report(submission : 'Submission', absolute: bool): markdown_lines = [] comment_analysis_results = analyze_comments(submission) basedness_display_func = lambda a : get_comment_basedness_out_of_five(a, absolute) markdown_lines.extend(generate_comment_display_section(comment_analysis_results['based'], "Most Based Comments", "Basedness", 3, detail_func=basedness_display_func)) markdown_lines.extend(generate_comment_display_section(comment_analysis_results['angry'], "Angriest Comments", "Angriness", 3, detail_func=angriness_score_string)) - biggest_lolcow_info = comment_analysis_results['lolcows'][0] - biggest_lolcow_score = biggest_lolcow_info[0] - biggest_lolcow = biggest_lolcow_info[1] - markdown_lines.append(f"# Biggest lolcow") - lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘") - markdown_lines.append(f"/u/{biggest_lolcow.name} {lolcow_score_string}") + markdown_lines.extend(generate_lolcow_display_section(comment_analysis_results['redditors'])) + markdown_lines.append(f"") markdown_lines.append("*:marppy: autodrama: automating away the jobs of dramautists. :marseycapitalistmanlet: Ping HeyMoon if there are any problems or you have a suggestion :marseyjamming:*") return "\n\n".join(markdown_lines) @@ -277,8 +319,12 @@ def get_comment_basedness_out_of_five(basedness: int, absolute : bool): score = 0 return get_score_string(score, "🔥", "🔘") -def get_score_string(score: int, filled_emoji, empty_emoji) -> str: - return "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)]) +def get_score_string(score: int, filled_emoji, empty_emoji, allow_over = False) -> str: + to_return = "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)]) + if (allow_over): + if (score > 5): + to_return += f"(+{int(score)-5}{filled_emoji})" + return to_return def create_rdrama_report(rdrama : RDramaAPIInterface, submission : 'Submission', basedness: int, absolute_basedness: bool): score = get_basedness_score_out_of_five(basedness) @@ -326,4 +372,5 @@ else: timeout = 10 rdrama = RDramaAPIInterface(auth, website, timeout, https=https) -daily_drama_post(rdrama) +#daily_drama_post(rdrama) +print(generate_submission_report(Submission(reddit, "uesnfd"), True)) \ No newline at end of file