tweaked logic to allow - in user names, added more commenting
parent
dbf25c6d5c
commit
de052304d0
|
@ -1,22 +1,32 @@
|
||||||
import { Comment } from '../models/Comment';
|
import { Comment } from '../models/Comment';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A utility class for parsing Reddit comments.
|
||||||
|
*/
|
||||||
export class CommentParser {
|
export class CommentParser {
|
||||||
/**
|
/**
|
||||||
* Extracts Reddit usernames from the body of a single comment.
|
* Extracts Reddit usernames from the body of a single comment.
|
||||||
* @param comment A single Comment object to be processed.
|
* This method looks for patterns that match Reddit usernames, which start with "/u/" or "u/",
|
||||||
* @returns An array of unique Reddit usernames found in the comment.
|
* and collects them into a Set to ensure uniqueness. It then returns an array of these unique usernames,
|
||||||
|
* all in lowercase to maintain consistency.
|
||||||
|
*
|
||||||
|
* @param {Comment} comment - A single Comment object to be processed.
|
||||||
|
* @returns {string[]} An array of unique Reddit usernames found in the comment.
|
||||||
*/
|
*/
|
||||||
public static extractUsernames(comment: Comment): string[] {
|
public static extractUsernames(comment: Comment): string[] {
|
||||||
const regexPattern: RegExp = /(^|\s|\\r\\n|\\t|[".,;(){}\[\]!?@#])(\/?u\/[a-zA-Z0-9_]+)/g;
|
// Define the regex pattern to match Reddit usernames. It looks for the "/u/" prefix,
|
||||||
|
// optionally preceded by certain characters (like whitespace or punctuation),
|
||||||
|
// followed by the username consisting of alphanumeric characters and underscores.
|
||||||
|
const regexPattern: RegExp = /(^|\s|\\r\\n|\\t|[".,;(){}\[\]!?@#])(\/?u\/[a-zA-Z0-9_\-]+)/g;
|
||||||
const foundUsernames: Set<string> = new Set();
|
const foundUsernames: Set<string> = new Set();
|
||||||
|
|
||||||
const matches = comment.body.match(regexPattern);
|
const matches = comment.body.match(regexPattern);
|
||||||
if (matches) {
|
if (matches) {
|
||||||
matches.forEach(match => {
|
matches.forEach(match => {
|
||||||
// Ensure the username is captured in a standardized format
|
// Extract the username part from the match, ensuring it's in a consistent format.
|
||||||
const usernameMatch = match.trim().match(/\/?u\/([a-zA-Z0-9_]+)/);
|
const usernameMatch = match.trim().match(/\/?u\/([a-zA-Z0-9_\-]+)/);
|
||||||
if (usernameMatch) {
|
if (usernameMatch) {
|
||||||
// Standardize to "username" format
|
// Convert the username to lowercase and add it to the Set to ensure uniqueness.
|
||||||
const username = `${usernameMatch[1].toLowerCase()}`;
|
const username = `${usernameMatch[1].toLowerCase()}`;
|
||||||
foundUsernames.add(username);
|
foundUsernames.add(username);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue