remove markdown
parent
39d1aac958
commit
bbe412b81c
11
autodrama.py
11
autodrama.py
|
@ -12,6 +12,8 @@ from psaw import PushshiftAPI
|
||||||
from os.path import exists, join, realpath, split
|
from os.path import exists, join, realpath, split
|
||||||
import langdetect
|
import langdetect
|
||||||
from RDramaAPIInterface import RDramaAPIInterface
|
from RDramaAPIInterface import RDramaAPIInterface
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from markdown import markdown
|
||||||
|
|
||||||
BANNED_WORDS_IN_POST = ['comment', 'promotion']
|
BANNED_WORDS_IN_POST = ['comment', 'promotion']
|
||||||
LANGUAGE_DETECTION_ACCURACY_THRESHOLD = 10
|
LANGUAGE_DETECTION_ACCURACY_THRESHOLD = 10
|
||||||
|
@ -97,6 +99,12 @@ def get_based_submissions(subreddit, time_frame, limit):
|
||||||
print(f"Error while processing {submission} : {e}")
|
print(f"Error while processing {submission} : {e}")
|
||||||
return submissions
|
return submissions
|
||||||
|
|
||||||
|
def strip_markdown(markdown_string):
|
||||||
|
html = markdown(markdown_string)
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
text = ''.join(soup.findAll(text=True))
|
||||||
|
return text
|
||||||
|
|
||||||
def analyze_comments(submission : 'Submission'):
|
def analyze_comments(submission : 'Submission'):
|
||||||
print(f"[{submission.id}]Retrieving Comments")
|
print(f"[{submission.id}]Retrieving Comments")
|
||||||
comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
|
comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
|
||||||
|
@ -208,7 +216,9 @@ def generate_comment_display_section(submissions : 'Tuple[float, Submission]', s
|
||||||
|
|
||||||
if (parent != None):
|
if (parent != None):
|
||||||
parent_body = remove_quoted_text(parent.body)
|
parent_body = remove_quoted_text(parent.body)
|
||||||
|
parent_body = strip_markdown(parent_body)
|
||||||
parent_body = parent_body.replace("\n", "")
|
parent_body = parent_body.replace("\n", "")
|
||||||
|
|
||||||
if len(parent_body) > max_len:
|
if len(parent_body) > max_len:
|
||||||
parent_body = parent_body[0:max_len-3] + "..."
|
parent_body = parent_body[0:max_len-3] + "..."
|
||||||
markdown_lines.append(f"> {parent_body} ({parent.score})")
|
markdown_lines.append(f"> {parent_body} ({parent.score})")
|
||||||
|
@ -217,6 +227,7 @@ def generate_comment_display_section(submissions : 'Tuple[float, Submission]', s
|
||||||
comment_indent = ">"
|
comment_indent = ">"
|
||||||
|
|
||||||
comment_body = remove_quoted_text(comment.body)
|
comment_body = remove_quoted_text(comment.body)
|
||||||
|
comment_body = strip_markdown(comment_body)
|
||||||
comment_body = comment_body.replace("\n", "")
|
comment_body = comment_body.replace("\n", "")
|
||||||
if len(comment_body) > max_len:
|
if len(comment_body) > max_len:
|
||||||
comment_body = comment_body[0:max_len-3] + "..."
|
comment_body = comment_body[0:max_len-3] + "..."
|
||||||
|
|
Loading…
Reference in New Issue