From 701f64c91b6440c4771ca0081cafe110c442dca3 Mon Sep 17 00:00:00 2001 From: TLSM Date: Sun, 2 Oct 2022 04:55:39 -0400 Subject: [PATCH] Change comment search to use tsvector. --- files/classes/comment.py | 2 ++ files/routes/search.py | 32 ++++++++------------------------ files/templates/search.html | 4 ---- sql/20221002-body-tsvector.sql | 3 +++ 4 files changed, 13 insertions(+), 28 deletions(-) create mode 100644 sql/20221002-body-tsvector.sql diff --git a/files/classes/comment.py b/files/classes/comment.py index 999e9f9d6..930985107 100644 --- a/files/classes/comment.py +++ b/files/classes/comment.py @@ -4,6 +4,7 @@ from urllib.parse import urlencode, urlparse, parse_qs from flask import * from sqlalchemy import * from sqlalchemy.orm import relationship +from sqlalchemy.dialects.postgresql import TSVECTOR from files.__main__ import Base from files.classes.votes import CommentVote from files.helpers.const import * @@ -59,6 +60,7 @@ class Comment(Base): realupvotes = Column(Integer, default=1) body = Column(String) body_html = Column(String) + body_ts = Column(TSVECTOR) ban_reason = Column(String) wordle_result = Column(String) treasure_amount = Column(String) diff --git a/files/routes/search.py b/files/routes/search.py index 13bb77ed5..5a0a652c7 100644 --- a/files/routes/search.py +++ b/files/routes/search.py @@ -14,11 +14,10 @@ valid_params = [ 'author', 'domain', 'over18', - "post", - "before", - "after", - "title", - "exact", + 'post', + 'before', + 'after', + 'title', search_operator_hole, ] @@ -90,14 +89,7 @@ def searchposts(v): ) else: posts = posts.filter(Submission.author_id == author.id) - if 'exact' in criteria and 'full_text' in criteria: - regex_str = '[[:<:]]'+criteria['full_text']+'[[:>:]]' # https://docs.oracle.com/cd/E17952_01/mysql-5.5-en/regexp.html "word boundaries" - if 'title' in criteria: - words = [Submission.title.regexp_match(regex_str)] - else: - words = [or_(Submission.title.regexp_match(regex_str), Submission.body.regexp_match(regex_str))] - posts = posts.filter(*words) - elif 'q' in criteria: + if 'q' in criteria: if('title' in criteria): words = [or_(Submission.title.ilike('%'+x+'%')) \ for x in criteria['q']] @@ -183,9 +175,6 @@ def searchposts(v): @app.get("/search/comments") @auth_required def searchcomments(v): - - return {"error": "Searching comments is disabled temporarily."}, 403 - query = request.values.get("q", '').strip() try: page = max(1, int(request.values.get("page", 1))) @@ -217,14 +206,9 @@ def searchcomments(v): else: comments = comments.filter(Comment.author_id == author.id) - if 'exact' in criteria and 'full_text' in criteria: - regex_str = '[[:<:]]'+criteria['full_text']+'[[:>:]]' # https://docs.oracle.com/cd/E17952_01/mysql-5.5-en/regexp.html "word boundaries" - words = [Comment.body.regexp_match(regex_str)] - comments = comments.filter(*words) - elif 'q' in criteria: - words = [or_(Comment.body.ilike('%'+x+'%')) \ - for x in criteria['q']] - comments = comments.filter(*words) + if 'q' in criteria: + comments = comments.filter(Comment.body_ts.op('@@')( + func.plainto_tsquery(' & '.join(criteria['q'])))) if 'over18' in criteria: comments = comments.filter(Comment.over_18 == True) diff --git a/files/templates/search.html b/files/templates/search.html index 66ca09709..27f0d361b 100644 --- a/files/templates/search.html +++ b/files/templates/search.html @@ -76,10 +76,6 @@ {% endif %} -
-
Exact Match Only:
- -

diff --git a/sql/20221002-body-tsvector.sql b/sql/20221002-body-tsvector.sql new file mode 100644 index 000000000..1151ef682 --- /dev/null +++ b/sql/20221002-body-tsvector.sql @@ -0,0 +1,3 @@ +ALTER TABLE public.comments ADD COLUMN body_ts tsvector + GENERATED ALWAYS AS (to_tsvector('english', body)) STORED; +CREATE INDEX comments_body_ts_idx ON public.comments USING GIN (body_ts);