23 lines
728 B
Python
23 lines
728 B
Python
import re
|
|
import os
|
|
|
|
def load_wordlist(file_path):
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
return [line.strip() for line in file]
|
|
|
|
def parse(text):
|
|
wordlist_path = os.path.join(os.path.dirname(__file__), 'generated_wordlist.txt')
|
|
wordlist = load_wordlist(wordlist_path)
|
|
|
|
# Create a regex pattern that matches any word in the wordlist
|
|
pattern = '(' + '|'.join(re.escape(word).replace(' ', r'\s+') for word in wordlist) + ')'
|
|
|
|
matches = []
|
|
for match in re.finditer(pattern, text, re.IGNORECASE):
|
|
matched_word = match.group()
|
|
start_pos, end_pos = match.span()
|
|
matches.append((matched_word, start_pos, end_pos))
|
|
|
|
return matches
|
|
|