Change comment search to use tsvector.

master
Snakes 2022-10-02 04:55:39 -04:00
parent f28e605a58
commit 701f64c91b
Signed by: Snakes
GPG Key ID: E745A82778055C7E
4 changed files with 13 additions and 28 deletions

View File

@ -4,6 +4,7 @@ from urllib.parse import urlencode, urlparse, parse_qs
from flask import *
from sqlalchemy import *
from sqlalchemy.orm import relationship
from sqlalchemy.dialects.postgresql import TSVECTOR
from files.__main__ import Base
from files.classes.votes import CommentVote
from files.helpers.const import *
@ -59,6 +60,7 @@ class Comment(Base):
realupvotes = Column(Integer, default=1)
body = Column(String)
body_html = Column(String)
body_ts = Column(TSVECTOR)
ban_reason = Column(String)
wordle_result = Column(String)
treasure_amount = Column(String)

View File

@ -14,11 +14,10 @@ valid_params = [
'author',
'domain',
'over18',
"post",
"before",
"after",
"title",
"exact",
'post',
'before',
'after',
'title',
search_operator_hole,
]
@ -90,14 +89,7 @@ def searchposts(v):
)
else: posts = posts.filter(Submission.author_id == author.id)
if 'exact' in criteria and 'full_text' in criteria:
regex_str = '[[:<:]]'+criteria['full_text']+'[[:>:]]' # https://docs.oracle.com/cd/E17952_01/mysql-5.5-en/regexp.html "word boundaries"
if 'title' in criteria:
words = [Submission.title.regexp_match(regex_str)]
else:
words = [or_(Submission.title.regexp_match(regex_str), Submission.body.regexp_match(regex_str))]
posts = posts.filter(*words)
elif 'q' in criteria:
if 'q' in criteria:
if('title' in criteria):
words = [or_(Submission.title.ilike('%'+x+'%')) \
for x in criteria['q']]
@ -183,9 +175,6 @@ def searchposts(v):
@app.get("/search/comments")
@auth_required
def searchcomments(v):
return {"error": "Searching comments is disabled temporarily."}, 403
query = request.values.get("q", '').strip()
try: page = max(1, int(request.values.get("page", 1)))
@ -217,14 +206,9 @@ def searchcomments(v):
else: comments = comments.filter(Comment.author_id == author.id)
if 'exact' in criteria and 'full_text' in criteria:
regex_str = '[[:<:]]'+criteria['full_text']+'[[:>:]]' # https://docs.oracle.com/cd/E17952_01/mysql-5.5-en/regexp.html "word boundaries"
words = [Comment.body.regexp_match(regex_str)]
comments = comments.filter(*words)
elif 'q' in criteria:
words = [or_(Comment.body.ilike('%'+x+'%')) \
for x in criteria['q']]
comments = comments.filter(*words)
if 'q' in criteria:
comments = comments.filter(Comment.body_ts.op('@@')(
func.plainto_tsquery(' & '.join(criteria['q']))))
if 'over18' in criteria: comments = comments.filter(Comment.over_18 == True)

View File

@ -76,10 +76,6 @@
<button onClick="addParam()" class="searchparam mb-1">title:true</button>
</div>
{% endif %}
<div>
<div style="display: inline-block; width: 150px; text-align: center;">Exact Match Only:</div>
<button onClick="addParam()" class="searchparam mb-1">exact:true</button>
</div>
</div>
</div>
<br>

View File

@ -0,0 +1,3 @@
ALTER TABLE public.comments ADD COLUMN body_ts tsvector
GENERATED ALWAYS AS (to_tsvector('english', body)) STORED;
CREATE INDEX comments_body_ts_idx ON public.comments USING GIN (body_ts);