2021-03-18 20:25:21 +00:00
|
|
|
|
use crate::{settings::structs::Settings, ApiError, IpAddr};
|
2020-09-14 15:29:50 +00:00
|
|
|
|
use actix_web::dev::ConnectionInfo;
|
2020-12-17 19:01:33 +00:00
|
|
|
|
use chrono::{DateTime, FixedOffset, NaiveDateTime};
|
2020-09-14 15:29:50 +00:00
|
|
|
|
use itertools::Itertools;
|
|
|
|
|
use rand::{distributions::Alphanumeric, thread_rng, Rng};
|
|
|
|
|
use regex::{Regex, RegexBuilder};
|
2021-06-18 18:38:34 +00:00
|
|
|
|
use url::Url;
|
2020-09-14 15:29:50 +00:00
|
|
|
|
|
|
|
|
|
lazy_static! {
|
2021-03-01 12:56:07 +00:00
|
|
|
|
static ref EMAIL_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$").expect("compile regex");
|
2021-05-26 13:39:38 +00:00
|
|
|
|
static ref SLUR_REGEX: Regex = {
|
|
|
|
|
let mut slurs = r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|ni((g{2,}|q)+|[gq]{2,})[e3r]+(s|z)?|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)".to_string();
|
|
|
|
|
if let Some(additional_slurs) = Settings::get().additional_slurs {
|
|
|
|
|
slurs.push('|');
|
|
|
|
|
slurs.push_str(&additional_slurs);
|
|
|
|
|
};
|
2021-07-05 16:07:26 +00:00
|
|
|
|
RegexBuilder::new(&slurs).case_insensitive(true).build().expect("compile regex")
|
2021-05-26 13:39:38 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2021-03-01 12:56:07 +00:00
|
|
|
|
static ref USERNAME_MATCHES_REGEX: Regex = Regex::new(r"/u/[a-zA-Z][0-9a-zA-Z_]*").expect("compile regex");
|
|
|
|
|
// TODO keep this old one, it didn't work with port well tho
|
|
|
|
|
// static ref MENTIONS_REGEX: Regex = Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)").expect("compile regex");
|
|
|
|
|
static ref MENTIONS_REGEX: Regex = Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex");
|
2021-07-23 01:53:44 +00:00
|
|
|
|
static ref VALID_ACTOR_NAME_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex");
|
2021-03-01 12:56:07 +00:00
|
|
|
|
static ref VALID_POST_TITLE_REGEX: Regex = Regex::new(r".*\S.*").expect("compile regex");
|
2021-04-07 11:38:00 +00:00
|
|
|
|
static ref VALID_MATRIX_ID_REGEX: Regex = Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex");
|
2021-06-18 18:38:34 +00:00
|
|
|
|
// taken from https://en.wikipedia.org/wiki/UTM_parameters
|
|
|
|
|
static ref CLEAN_URL_PARAMS_REGEX: Regex = Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$").expect("compile regex");
|
2020-09-14 15:29:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn naive_from_unix(time: i64) -> NaiveDateTime {
|
|
|
|
|
NaiveDateTime::from_timestamp(time, 0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
|
2020-12-17 19:01:33 +00:00
|
|
|
|
DateTime::<FixedOffset>::from_utc(datetime, FixedOffset::east(0))
|
2020-09-14 15:29:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn remove_slurs(test: &str) -> String {
|
|
|
|
|
SLUR_REGEX.replace_all(test, "*removed*").to_string()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub(crate) fn slur_check(test: &str) -> Result<(), Vec<&str>> {
|
|
|
|
|
let mut matches: Vec<&str> = SLUR_REGEX.find_iter(test).map(|mat| mat.as_str()).collect();
|
|
|
|
|
|
|
|
|
|
// Unique
|
|
|
|
|
matches.sort_unstable();
|
|
|
|
|
matches.dedup();
|
|
|
|
|
|
|
|
|
|
if matches.is_empty() {
|
|
|
|
|
Ok(())
|
|
|
|
|
} else {
|
|
|
|
|
Err(matches)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-22 18:04:32 +00:00
|
|
|
|
pub fn check_slurs(text: &str) -> Result<(), ApiError> {
|
2020-09-14 15:29:50 +00:00
|
|
|
|
if let Err(slurs) = slur_check(text) {
|
2021-02-22 18:04:32 +00:00
|
|
|
|
Err(ApiError::err(&slurs_vec_to_str(slurs)))
|
2020-09-14 15:29:50 +00:00
|
|
|
|
} else {
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-22 18:04:32 +00:00
|
|
|
|
pub fn check_slurs_opt(text: &Option<String>) -> Result<(), ApiError> {
|
2020-09-14 15:29:50 +00:00
|
|
|
|
match text {
|
|
|
|
|
Some(t) => check_slurs(t),
|
|
|
|
|
None => Ok(()),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub(crate) fn slurs_vec_to_str(slurs: Vec<&str>) -> String {
|
|
|
|
|
let start = "No slurs - ";
|
|
|
|
|
let combined = &slurs.join(", ");
|
|
|
|
|
[start, combined].concat()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn generate_random_string() -> String {
|
2020-12-21 14:34:59 +00:00
|
|
|
|
thread_rng()
|
|
|
|
|
.sample_iter(&Alphanumeric)
|
|
|
|
|
.map(char::from)
|
|
|
|
|
.take(30)
|
|
|
|
|
.collect()
|
2020-09-14 15:29:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn markdown_to_html(text: &str) -> String {
|
|
|
|
|
comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO nothing is done with community / group webfingers yet, so just ignore those for now
|
|
|
|
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
|
|
|
|
pub struct MentionData {
|
|
|
|
|
pub name: String,
|
|
|
|
|
pub domain: String,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl MentionData {
|
|
|
|
|
pub fn is_local(&self) -> bool {
|
2021-08-04 21:13:51 +00:00
|
|
|
|
Settings::get().hostname.eq(&self.domain)
|
2020-09-14 15:29:50 +00:00
|
|
|
|
}
|
|
|
|
|
pub fn full_name(&self) -> String {
|
|
|
|
|
format!("@{}@{}", &self.name, &self.domain)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
|
|
|
|
|
let mut out: Vec<MentionData> = Vec::new();
|
|
|
|
|
for caps in MENTIONS_REGEX.captures_iter(text) {
|
|
|
|
|
out.push(MentionData {
|
|
|
|
|
name: caps["name"].to_string(),
|
|
|
|
|
domain: caps["domain"].to_string(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
out.into_iter().unique().collect()
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-23 01:53:44 +00:00
|
|
|
|
pub fn is_valid_actor_name(name: &str) -> bool {
|
2021-08-04 21:13:51 +00:00
|
|
|
|
name.chars().count() <= Settings::get().actor_name_max_length
|
2021-07-23 01:53:44 +00:00
|
|
|
|
&& VALID_ACTOR_NAME_REGEX.is_match(name)
|
2020-09-14 15:29:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Can't do a regex here, reverse lookarounds not supported
|
2021-04-01 17:57:45 +00:00
|
|
|
|
pub fn is_valid_display_name(name: &str) -> bool {
|
2021-04-01 18:09:53 +00:00
|
|
|
|
!name.starts_with('@')
|
|
|
|
|
&& !name.starts_with('\u{200b}')
|
|
|
|
|
&& name.chars().count() >= 3
|
2021-08-04 21:13:51 +00:00
|
|
|
|
&& name.chars().count() <= Settings::get().actor_name_max_length
|
2020-09-14 15:29:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-07 11:38:00 +00:00
|
|
|
|
pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
|
|
|
|
|
VALID_MATRIX_ID_REGEX.is_match(matrix_id)
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-14 15:29:50 +00:00
|
|
|
|
pub fn is_valid_post_title(title: &str) -> bool {
|
|
|
|
|
VALID_POST_TITLE_REGEX.is_match(title)
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-18 20:25:21 +00:00
|
|
|
|
pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
|
|
|
|
|
IpAddr(
|
|
|
|
|
conn_info
|
|
|
|
|
.realip_remote_addr()
|
|
|
|
|
.unwrap_or("127.0.0.1:12345")
|
|
|
|
|
.split(':')
|
|
|
|
|
.next()
|
|
|
|
|
.unwrap_or("127.0.0.1")
|
|
|
|
|
.to_string(),
|
|
|
|
|
)
|
2020-09-14 15:29:50 +00:00
|
|
|
|
}
|
2021-06-18 18:38:34 +00:00
|
|
|
|
|
|
|
|
|
pub fn clean_url_params(mut url: Url) -> Url {
|
|
|
|
|
let new_query = url
|
|
|
|
|
.query_pairs()
|
|
|
|
|
.filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
|
|
|
|
|
.map(|q| format!("{}={}", q.0, q.1))
|
|
|
|
|
.join("&");
|
|
|
|
|
url.set_query(Some(&new_query));
|
|
|
|
|
url
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use crate::utils::clean_url_params;
|
|
|
|
|
use url::Url;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_clean_url_params() {
|
|
|
|
|
let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
|
|
|
|
|
let cleaned = clean_url_params(url);
|
|
|
|
|
let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
|
|
|
|
|
assert_eq!(expected.to_string(), cleaned.to_string());
|
|
|
|
|
}
|
|
|
|
|
}
|