remove markdown
parent
39d1aac958
commit
bbe412b81c
11
autodrama.py
11
autodrama.py
|
@ -12,6 +12,8 @@ from psaw import PushshiftAPI
|
|||
from os.path import exists, join, realpath, split
|
||||
import langdetect
|
||||
from RDramaAPIInterface import RDramaAPIInterface
|
||||
from bs4 import BeautifulSoup
|
||||
from markdown import markdown
|
||||
|
||||
BANNED_WORDS_IN_POST = ['comment', 'promotion']
|
||||
LANGUAGE_DETECTION_ACCURACY_THRESHOLD = 10
|
||||
|
@ -97,6 +99,12 @@ def get_based_submissions(subreddit, time_frame, limit):
|
|||
print(f"Error while processing {submission} : {e}")
|
||||
return submissions
|
||||
|
||||
def strip_markdown(markdown_string):
|
||||
html = markdown(markdown_string)
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
text = ''.join(soup.findAll(text=True))
|
||||
return text
|
||||
|
||||
def analyze_comments(submission : 'Submission'):
|
||||
print(f"[{submission.id}]Retrieving Comments")
|
||||
comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
|
||||
|
@ -208,7 +216,9 @@ def generate_comment_display_section(submissions : 'Tuple[float, Submission]', s
|
|||
|
||||
if (parent != None):
|
||||
parent_body = remove_quoted_text(parent.body)
|
||||
parent_body = strip_markdown(parent_body)
|
||||
parent_body = parent_body.replace("\n", "")
|
||||
|
||||
if len(parent_body) > max_len:
|
||||
parent_body = parent_body[0:max_len-3] + "..."
|
||||
markdown_lines.append(f"> {parent_body} ({parent.score})")
|
||||
|
@ -217,6 +227,7 @@ def generate_comment_display_section(submissions : 'Tuple[float, Submission]', s
|
|||
comment_indent = ">"
|
||||
|
||||
comment_body = remove_quoted_text(comment.body)
|
||||
comment_body = strip_markdown(comment_body)
|
||||
comment_body = comment_body.replace("\n", "")
|
||||
if len(comment_body) > max_len:
|
||||
comment_body = comment_body[0:max_len-3] + "..."
|
||||
|
|
Loading…
Reference in New Issue