From bbe412b81c452b07f4669c4fc9c2b6e4d24943a0 Mon Sep 17 00:00:00 2001
From: Hey Moon <HeyMoon@rdrama.net>
Date: Sun, 8 May 2022 10:21:05 -0500
Subject: [PATCH] remove markdown

---
 autodrama.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/autodrama.py b/autodrama.py
index cb13630..d35dedc 100644
--- a/autodrama.py
+++ b/autodrama.py
@@ -12,6 +12,8 @@ from psaw import PushshiftAPI
 from os.path import exists, join, realpath, split
 import langdetect
 from RDramaAPIInterface import RDramaAPIInterface
+from bs4 import BeautifulSoup
+from markdown import markdown
 
 BANNED_WORDS_IN_POST = ['comment', 'promotion']
 LANGUAGE_DETECTION_ACCURACY_THRESHOLD = 10
@@ -97,6 +99,12 @@ def get_based_submissions(subreddit, time_frame, limit):
             print(f"Error while processing {submission} : {e}")
     return submissions
 
+def strip_markdown(markdown_string):
+    html = markdown(markdown_string)
+    soup = BeautifulSoup(html, "html.parser")
+    text = ''.join(soup.findAll(text=True))
+    return text
+
 def analyze_comments(submission : 'Submission'):
     print(f"[{submission.id}]Retrieving Comments")
     comments = pushshift_api.search_comments(subreddit=submission.subreddit.display_name, link_id=submission.id)
@@ -208,7 +216,9 @@ def generate_comment_display_section(submissions : 'Tuple[float, Submission]', s
 
             if (parent != None):
                 parent_body = remove_quoted_text(parent.body)
+                parent_body = strip_markdown(parent_body)
                 parent_body = parent_body.replace("\n", "")
+                
                 if len(parent_body) > max_len:
                     parent_body = parent_body[0:max_len-3] + "..."
                 markdown_lines.append(f"> {parent_body} ({parent.score})")
@@ -217,6 +227,7 @@ def generate_comment_display_section(submissions : 'Tuple[float, Submission]', s
                 comment_indent = ">"
 
             comment_body = remove_quoted_text(comment.body)
+            comment_body = strip_markdown(comment_body)
             comment_body = comment_body.replace("\n", "")
             if len(comment_body) > max_len:
                 comment_body = comment_body[0:max_len-3] + "..."