ban promotions, improve lolcow detection

2022-05-07 14:36:25 -05:00 · 2022-05-07 14:36:25 -05:00 · 76cc5a8530
parent 56b87645bb
commit 76cc5a8530
1 changed files with 59 additions and 12 deletions
--- a/autodrama.py
+++ b/autodrama.py
@ -1,8 +1,10 @@
 TEST_MODE = True
 from audioop import avg
 from re import sub
 from time import sleep
 from typing import Tuple
 from numpy import average
 import praw
 from praw.models import Comment, Submission
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
@ -11,6 +13,8 @@ from os.path import exists, join, realpath, split
 from RDramaAPIInterface import RDramaAPIInterface
 BANNED_WORDS_IN_POST = ['comment', 'promotion']
 def get_real_filename(filename : str):
    path_to_script = realpath(__file__)
    path_to_script_directory, _ = split(path_to_script)
@ -31,6 +35,10 @@ reddit = praw.Reddit(
 pushshift_api = PushshiftAPI(reddit)
 def contains_banned_words(sentence):
    santitized_sentence = ''.join([i for i in sentence.lower() if i in 'abcdefghijklmnopqrstuvwxyz '])
    return bool(set(BANNED_WORDS_IN_POST).intersection(santitized_sentence.split(" ")))
 def get_based_submissions(subreddit, time_frame, limit):
    subscriber_cache = {}
@ -42,7 +50,7 @@ def get_based_submissions(subreddit, time_frame, limit):
    for submission in reddit.subreddit(subreddit).controversial(time_frame, limit=limit):
        try:
            basedness = (1-submission.upvote_ratio)*submission.num_comments
-            if (submission.author.name == "AutoModerator" or "comment" in submission.title.lower()):
+            if (submission.author.name == "AutoModerator" or contains_banned_words(submission.title)):
                continue
            if (submission.subreddit not in subscriber_cache):
                subscriber_cache[submission.subreddit] = submission.subreddit.subscribers
@ -63,8 +71,9 @@ def get_based_submissions(subreddit, time_frame, limit):
 def analyze_comments(submission : 'Submission'):
    print(f"[{submission.id}]Retrieving Comments")
    comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
    comment_list = list(comments)
    print(f"[{submission.id}]Creating Network")
-    comment_map = {i.id:i for i in list(comments)}
+    comment_map = {i.id:i for i in comment_list}
    child_map = {}
    for comment in comment_map.values():
        try:
@ -78,6 +87,7 @@ def analyze_comments(submission : 'Submission'):
    sid_obj = SentimentIntensityAnalyzer()
    print(f"[{submission.id}]Classifying Comments")
    user_to_total_anger = {}
    redditors = {}
    ranked_comments = []
    angry_comments = []
    for comment in comment_map.values():
@ -126,7 +136,14 @@ def analyze_comments(submission : 'Submission'):
            if comment.author not in user_to_total_anger:
                user_to_total_anger[comment.author] = 0.0
                redditors[comment.author] = {}
                redditors[comment.author]['comments'] = []
                redditors[comment.author]['angriness'] = 0
                redditors[comment.author]['author'] = comment.author
            user_to_total_anger[comment.author]+=score
            redditors[comment.author]['comments'].append((score, comment_info))
            redditors[comment.author]['angriness'] += score
        except Exception as e:
            print(f"Error while processing {comment}: {e}")
@ -135,10 +152,13 @@ def analyze_comments(submission : 'Submission'):
    angry_comments.sort(key=lambda a:a[0])
    lolcows = [(v, k) for k, v in user_to_total_anger.items()]
    lolcows.sort(key=lambda a:a[0])
    redditors_ranked = [(data['angriness'], data) for username, data in redditors.items()]
    redditors_ranked.sort(key=lambda a:a[0])
    return {
        'based' : ranked_comments,
        'angry': angry_comments,
-        'lolcows': lolcows
+        'lolcows': lolcows,
        'redditors': redditors_ranked
    }
 #get_based_submissions("all", "hour", 25, True)
@ -202,18 +222,40 @@ def angriness_score_string(angriness):
    return get_score_string(score, "😡", "🔘")
 def get_reddit_full_url(partial_url):
    return f"https://reddit.com{partial_url}"
 def generate_lolcow_display_section(lolcows):
    markdown_lines = []
    biggest_lolcow_info = lolcows[0]
    biggest_lolcow_score = biggest_lolcow_info[0]
    biggest_lolcow = biggest_lolcow_info[1]['author']
    number_of_comments = len(biggest_lolcow_info[1]['comments'])
    lolcow_comments = biggest_lolcow_info[1]['comments']
    lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘", allow_over=True)
    markdown_lines.append(f"# Biggest Lolcow: /u/{biggest_lolcow.name}")
    markdown_lines.append(f"Score: {lolcow_score_string}")
    markdown_lines.append(f"Number of comments: {number_of_comments}")
    comment_angryness_scores = [-1*a[0] for a in biggest_lolcow_info[1]['comments']]
    average_angriness = average(comment_angryness_scores)
    maximum_angry_info = max(lolcow_comments, key=lambda a : a[0])
    minimum_angry_info = min(lolcow_comments, key=lambda a : a[0])
    markdown_lines.append(f"Average angriness: {angriness_score_string(average_angriness)}")
    markdown_lines.append(f"Maximum angriness: [{angriness_score_string(-1*maximum_angry_info[0])}]({get_reddit_full_url(maximum_angry_info[1]['comment'].permalink)})")
    markdown_lines.append(f"Minimum angriness: [{angriness_score_string(-1*minimum_angry_info[0])}]({get_reddit_full_url(minimum_angry_info[1]['comment'].permalink)})")
    return markdown_lines
 def generate_submission_report(submission : 'Submission', absolute: bool):
    markdown_lines = []
    comment_analysis_results = analyze_comments(submission)
    basedness_display_func = lambda a : get_comment_basedness_out_of_five(a, absolute)
    markdown_lines.extend(generate_comment_display_section(comment_analysis_results['based'], "Most Based Comments", "Basedness", 3, detail_func=basedness_display_func))
    markdown_lines.extend(generate_comment_display_section(comment_analysis_results['angry'], "Angriest Comments", "Angriness", 3, detail_func=angriness_score_string))
-    biggest_lolcow_info = comment_analysis_results['lolcows'][0]
+    markdown_lines.extend(generate_lolcow_display_section(comment_analysis_results['redditors']))
-    biggest_lolcow_score = biggest_lolcow_info[0]
+    markdown_lines.append(f"")
    biggest_lolcow = biggest_lolcow_info[1]
    markdown_lines.append(f"# Biggest lolcow")
    lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘")
    markdown_lines.append(f"/u/{biggest_lolcow.name} {lolcow_score_string}")
    markdown_lines.append("*:marppy: autodrama: automating away the jobs of dramautists. :marseycapitalistmanlet: Ping HeyMoon if there are any problems or you have a suggestion :marseyjamming:*")
    return "\n\n".join(markdown_lines)
@ -277,8 +319,12 @@ def get_comment_basedness_out_of_five(basedness: int, absolute : bool):
            score = 0
    return get_score_string(score, "🔥", "🔘")
-def get_score_string(score: int, filled_emoji, empty_emoji) -> str:
+def get_score_string(score: int, filled_emoji, empty_emoji, allow_over = False) -> str:
-    return "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)])
+    to_return = "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)])
    if (allow_over):
        if (score > 5):
            to_return += f"(+{int(score)-5}{filled_emoji})"
    return to_return
 def create_rdrama_report(rdrama : RDramaAPIInterface, submission : 'Submission', basedness: int, absolute_basedness: bool):
    score = get_basedness_score_out_of_five(basedness)
@ -326,4 +372,5 @@ else:
    timeout = 10
 rdrama = RDramaAPIInterface(auth, website, timeout, https=https)
-daily_drama_post(rdrama)
+#daily_drama_post(rdrama)
 print(generate_submission_report(Submission(reddit, "uesnfd"), True))