ban promotions, improve lolcow detection
parent
56b87645bb
commit
76cc5a8530
71
autodrama.py
71
autodrama.py
|
@ -1,8 +1,10 @@
|
||||||
TEST_MODE = True
|
TEST_MODE = True
|
||||||
|
|
||||||
|
from audioop import avg
|
||||||
from re import sub
|
from re import sub
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
from numpy import average
|
||||||
import praw
|
import praw
|
||||||
from praw.models import Comment, Submission
|
from praw.models import Comment, Submission
|
||||||
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||||
|
@ -11,6 +13,8 @@ from os.path import exists, join, realpath, split
|
||||||
|
|
||||||
from RDramaAPIInterface import RDramaAPIInterface
|
from RDramaAPIInterface import RDramaAPIInterface
|
||||||
|
|
||||||
|
BANNED_WORDS_IN_POST = ['comment', 'promotion']
|
||||||
|
|
||||||
def get_real_filename(filename : str):
|
def get_real_filename(filename : str):
|
||||||
path_to_script = realpath(__file__)
|
path_to_script = realpath(__file__)
|
||||||
path_to_script_directory, _ = split(path_to_script)
|
path_to_script_directory, _ = split(path_to_script)
|
||||||
|
@ -31,6 +35,10 @@ reddit = praw.Reddit(
|
||||||
|
|
||||||
pushshift_api = PushshiftAPI(reddit)
|
pushshift_api = PushshiftAPI(reddit)
|
||||||
|
|
||||||
|
def contains_banned_words(sentence):
|
||||||
|
santitized_sentence = ''.join([i for i in sentence.lower() if i in 'abcdefghijklmnopqrstuvwxyz '])
|
||||||
|
return bool(set(BANNED_WORDS_IN_POST).intersection(santitized_sentence.split(" ")))
|
||||||
|
|
||||||
def get_based_submissions(subreddit, time_frame, limit):
|
def get_based_submissions(subreddit, time_frame, limit):
|
||||||
subscriber_cache = {}
|
subscriber_cache = {}
|
||||||
|
|
||||||
|
@ -42,7 +50,7 @@ def get_based_submissions(subreddit, time_frame, limit):
|
||||||
for submission in reddit.subreddit(subreddit).controversial(time_frame, limit=limit):
|
for submission in reddit.subreddit(subreddit).controversial(time_frame, limit=limit):
|
||||||
try:
|
try:
|
||||||
basedness = (1-submission.upvote_ratio)*submission.num_comments
|
basedness = (1-submission.upvote_ratio)*submission.num_comments
|
||||||
if (submission.author.name == "AutoModerator" or "comment" in submission.title.lower()):
|
if (submission.author.name == "AutoModerator" or contains_banned_words(submission.title)):
|
||||||
continue
|
continue
|
||||||
if (submission.subreddit not in subscriber_cache):
|
if (submission.subreddit not in subscriber_cache):
|
||||||
subscriber_cache[submission.subreddit] = submission.subreddit.subscribers
|
subscriber_cache[submission.subreddit] = submission.subreddit.subscribers
|
||||||
|
@ -63,8 +71,9 @@ def get_based_submissions(subreddit, time_frame, limit):
|
||||||
def analyze_comments(submission : 'Submission'):
|
def analyze_comments(submission : 'Submission'):
|
||||||
print(f"[{submission.id}]Retrieving Comments")
|
print(f"[{submission.id}]Retrieving Comments")
|
||||||
comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
|
comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
|
||||||
|
comment_list = list(comments)
|
||||||
print(f"[{submission.id}]Creating Network")
|
print(f"[{submission.id}]Creating Network")
|
||||||
comment_map = {i.id:i for i in list(comments)}
|
comment_map = {i.id:i for i in comment_list}
|
||||||
child_map = {}
|
child_map = {}
|
||||||
for comment in comment_map.values():
|
for comment in comment_map.values():
|
||||||
try:
|
try:
|
||||||
|
@ -78,6 +87,7 @@ def analyze_comments(submission : 'Submission'):
|
||||||
sid_obj = SentimentIntensityAnalyzer()
|
sid_obj = SentimentIntensityAnalyzer()
|
||||||
print(f"[{submission.id}]Classifying Comments")
|
print(f"[{submission.id}]Classifying Comments")
|
||||||
user_to_total_anger = {}
|
user_to_total_anger = {}
|
||||||
|
redditors = {}
|
||||||
ranked_comments = []
|
ranked_comments = []
|
||||||
angry_comments = []
|
angry_comments = []
|
||||||
for comment in comment_map.values():
|
for comment in comment_map.values():
|
||||||
|
@ -126,7 +136,14 @@ def analyze_comments(submission : 'Submission'):
|
||||||
|
|
||||||
if comment.author not in user_to_total_anger:
|
if comment.author not in user_to_total_anger:
|
||||||
user_to_total_anger[comment.author] = 0.0
|
user_to_total_anger[comment.author] = 0.0
|
||||||
|
redditors[comment.author] = {}
|
||||||
|
redditors[comment.author]['comments'] = []
|
||||||
|
redditors[comment.author]['angriness'] = 0
|
||||||
|
redditors[comment.author]['author'] = comment.author
|
||||||
|
|
||||||
user_to_total_anger[comment.author]+=score
|
user_to_total_anger[comment.author]+=score
|
||||||
|
redditors[comment.author]['comments'].append((score, comment_info))
|
||||||
|
redditors[comment.author]['angriness'] += score
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error while processing {comment}: {e}")
|
print(f"Error while processing {comment}: {e}")
|
||||||
|
|
||||||
|
@ -135,10 +152,13 @@ def analyze_comments(submission : 'Submission'):
|
||||||
angry_comments.sort(key=lambda a:a[0])
|
angry_comments.sort(key=lambda a:a[0])
|
||||||
lolcows = [(v, k) for k, v in user_to_total_anger.items()]
|
lolcows = [(v, k) for k, v in user_to_total_anger.items()]
|
||||||
lolcows.sort(key=lambda a:a[0])
|
lolcows.sort(key=lambda a:a[0])
|
||||||
|
redditors_ranked = [(data['angriness'], data) for username, data in redditors.items()]
|
||||||
|
redditors_ranked.sort(key=lambda a:a[0])
|
||||||
return {
|
return {
|
||||||
'based' : ranked_comments,
|
'based' : ranked_comments,
|
||||||
'angry': angry_comments,
|
'angry': angry_comments,
|
||||||
'lolcows': lolcows
|
'lolcows': lolcows,
|
||||||
|
'redditors': redditors_ranked
|
||||||
}
|
}
|
||||||
#get_based_submissions("all", "hour", 25, True)
|
#get_based_submissions("all", "hour", 25, True)
|
||||||
|
|
||||||
|
@ -202,18 +222,40 @@ def angriness_score_string(angriness):
|
||||||
|
|
||||||
return get_score_string(score, "😡", "🔘")
|
return get_score_string(score, "😡", "🔘")
|
||||||
|
|
||||||
|
def get_reddit_full_url(partial_url):
|
||||||
|
return f"https://reddit.com{partial_url}"
|
||||||
|
|
||||||
|
def generate_lolcow_display_section(lolcows):
|
||||||
|
markdown_lines = []
|
||||||
|
|
||||||
|
|
||||||
|
biggest_lolcow_info = lolcows[0]
|
||||||
|
biggest_lolcow_score = biggest_lolcow_info[0]
|
||||||
|
biggest_lolcow = biggest_lolcow_info[1]['author']
|
||||||
|
number_of_comments = len(biggest_lolcow_info[1]['comments'])
|
||||||
|
lolcow_comments = biggest_lolcow_info[1]['comments']
|
||||||
|
lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘", allow_over=True)
|
||||||
|
markdown_lines.append(f"# Biggest Lolcow: /u/{biggest_lolcow.name}")
|
||||||
|
markdown_lines.append(f"Score: {lolcow_score_string}")
|
||||||
|
markdown_lines.append(f"Number of comments: {number_of_comments}")
|
||||||
|
comment_angryness_scores = [-1*a[0] for a in biggest_lolcow_info[1]['comments']]
|
||||||
|
average_angriness = average(comment_angryness_scores)
|
||||||
|
maximum_angry_info = max(lolcow_comments, key=lambda a : a[0])
|
||||||
|
minimum_angry_info = min(lolcow_comments, key=lambda a : a[0])
|
||||||
|
markdown_lines.append(f"Average angriness: {angriness_score_string(average_angriness)}")
|
||||||
|
markdown_lines.append(f"Maximum angriness: [{angriness_score_string(-1*maximum_angry_info[0])}]({get_reddit_full_url(maximum_angry_info[1]['comment'].permalink)})")
|
||||||
|
markdown_lines.append(f"Minimum angriness: [{angriness_score_string(-1*minimum_angry_info[0])}]({get_reddit_full_url(minimum_angry_info[1]['comment'].permalink)})")
|
||||||
|
|
||||||
|
return markdown_lines
|
||||||
|
|
||||||
def generate_submission_report(submission : 'Submission', absolute: bool):
|
def generate_submission_report(submission : 'Submission', absolute: bool):
|
||||||
markdown_lines = []
|
markdown_lines = []
|
||||||
comment_analysis_results = analyze_comments(submission)
|
comment_analysis_results = analyze_comments(submission)
|
||||||
basedness_display_func = lambda a : get_comment_basedness_out_of_five(a, absolute)
|
basedness_display_func = lambda a : get_comment_basedness_out_of_five(a, absolute)
|
||||||
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['based'], "Most Based Comments", "Basedness", 3, detail_func=basedness_display_func))
|
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['based'], "Most Based Comments", "Basedness", 3, detail_func=basedness_display_func))
|
||||||
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['angry'], "Angriest Comments", "Angriness", 3, detail_func=angriness_score_string))
|
markdown_lines.extend(generate_comment_display_section(comment_analysis_results['angry'], "Angriest Comments", "Angriness", 3, detail_func=angriness_score_string))
|
||||||
biggest_lolcow_info = comment_analysis_results['lolcows'][0]
|
markdown_lines.extend(generate_lolcow_display_section(comment_analysis_results['redditors']))
|
||||||
biggest_lolcow_score = biggest_lolcow_info[0]
|
markdown_lines.append(f"")
|
||||||
biggest_lolcow = biggest_lolcow_info[1]
|
|
||||||
markdown_lines.append(f"# Biggest lolcow")
|
|
||||||
lolcow_score_string = get_score_string(-1*biggest_lolcow_score, "🐮", "🔘")
|
|
||||||
markdown_lines.append(f"/u/{biggest_lolcow.name} {lolcow_score_string}")
|
|
||||||
markdown_lines.append("*:marppy: autodrama: automating away the jobs of dramautists. :marseycapitalistmanlet: Ping HeyMoon if there are any problems or you have a suggestion :marseyjamming:*")
|
markdown_lines.append("*:marppy: autodrama: automating away the jobs of dramautists. :marseycapitalistmanlet: Ping HeyMoon if there are any problems or you have a suggestion :marseyjamming:*")
|
||||||
return "\n\n".join(markdown_lines)
|
return "\n\n".join(markdown_lines)
|
||||||
|
|
||||||
|
@ -277,8 +319,12 @@ def get_comment_basedness_out_of_five(basedness: int, absolute : bool):
|
||||||
score = 0
|
score = 0
|
||||||
return get_score_string(score, "🔥", "🔘")
|
return get_score_string(score, "🔥", "🔘")
|
||||||
|
|
||||||
def get_score_string(score: int, filled_emoji, empty_emoji) -> str:
|
def get_score_string(score: int, filled_emoji, empty_emoji, allow_over = False) -> str:
|
||||||
return "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)])
|
to_return = "".join([filled_emoji if ((i+1) <= score) else empty_emoji for i in range(5)])
|
||||||
|
if (allow_over):
|
||||||
|
if (score > 5):
|
||||||
|
to_return += f"(+{int(score)-5}{filled_emoji})"
|
||||||
|
return to_return
|
||||||
|
|
||||||
def create_rdrama_report(rdrama : RDramaAPIInterface, submission : 'Submission', basedness: int, absolute_basedness: bool):
|
def create_rdrama_report(rdrama : RDramaAPIInterface, submission : 'Submission', basedness: int, absolute_basedness: bool):
|
||||||
score = get_basedness_score_out_of_five(basedness)
|
score = get_basedness_score_out_of_five(basedness)
|
||||||
|
@ -326,4 +372,5 @@ else:
|
||||||
timeout = 10
|
timeout = 10
|
||||||
rdrama = RDramaAPIInterface(auth, website, timeout, https=https)
|
rdrama = RDramaAPIInterface(auth, website, timeout, https=https)
|
||||||
|
|
||||||
daily_drama_post(rdrama)
|
#daily_drama_post(rdrama)
|
||||||
|
print(generate_submission_report(Submission(reddit, "uesnfd"), True))
|
Loading…
Reference in New Issue