LoglineLeviathan/data/parser/generated_wordlist.py
2025-09-03 13:20:23 +02:00

23 lines
728 B
Python

import re
import os
def load_wordlist(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
return [line.strip() for line in file]
def parse(text):
wordlist_path = os.path.join(os.path.dirname(__file__), 'generated_wordlist.txt')
wordlist = load_wordlist(wordlist_path)
# Create a regex pattern that matches any word in the wordlist
pattern = '(' + '|'.join(re.escape(word).replace(' ', r'\s+') for word in wordlist) + ')'
matches = []
for match in re.finditer(pattern, text, re.IGNORECASE):
matched_word = match.group()
start_pos, end_pos = match.span()
matches.append((matched_word, start_pos, end_pos))
return matches