initial commit

This commit is contained in:
overcuriousity 2025-09-03 13:20:23 +02:00
parent 13855a70ae
commit 759acc855d
57 changed files with 7306 additions and 2 deletions

View File

@ -1,3 +1,53 @@
# LoglineLeviathan
## LoglineLeviathan // Analyze/Export-Module
Large-Scale Text Parser which extracts modular configurable artifacts with context
## **Installation**
### Windows:
Currently no .exe available yet. Follow the below Linux Instructions and adapt to your Windows shell.
If you installed via pip install -r requirements.txt, you should run
```
pip uninstall python-magic
pip install python-magic-bin==0.4.14
```
afterwards.
> Important: The directories "data" with the entities.yaml and "output" need to be present.
### Linux / Python-Sourcecode:
This guide applies to building the application from source on a Linux host.
1. Required prerequisites: python3 (3.11 or newer), python3-pip and python3.11-venv (or whatever version you have), git.
2. Clone the git repository:
git clone https://github.com/overcuriousity/LoglineLeviathan
3. Shell:
```
cd LoglineLeviathan && python3 -m venv venv && source venv/bin/activate && pip install -r requirements.txt
```
4. Start:
```
python3 run.py
```
## **Usage**
### Analysis:
> On startup, a new database will be created by default and populated with the available entities. If a database from a prior session is present, it will be used.
After startup, no files are selected for ingestion. Starting from there, you have the following possibilities:
- Select files with "Add Files to Selection": Opens a file browser and lets you select one or more files.
- Choose directory with "Add Directory and Subdirectories": Recursively adds all files in all subdirectories of .
> Resetting the file selection is only possible via the "Clear Files from Selection"-Button.
- Choose existing database.
Button "Start/Resume File Analysis" strats the file ingestion and database population.

226
data/entities.yaml Normal file
View File

@ -0,0 +1,226 @@
bip39:
entity_type: bip39
gui_name: BIP39 Wordlist
gui_tooltip: Outputs BIP39 wordlists, which is parsed from the text by the required
length, with 0-5 characters in between the words.
parent_type: category_cryptocurrency
parser_enabled: true
regex_pattern: null
script_parser: bip39.py
btcaddr:
entity_type: btcaddr
gui_name: Bitcoin Address
gui_tooltip: Outputs BTC addresses of the common formats P2PKH, P2SH and Bech32.
parent_type: category_bitcoin
parser_enabled: true
regex_pattern: \b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b
script_parser: btcaddr.py
btctxid:
entity_type: btctxid
gui_name: Bitcoin TXID
gui_tooltip: Outputs BTC TXIDs.
parent_type: category_bitcoin
parser_enabled: true
regex_pattern: \b[a-fA-F0-9]{64}\b
script_parser: null
category_bitcoin:
entity_type: category_bitcoin
gui_name: Bitcoin
gui_tooltip: Bitcoin related entities.
parent_type: category_cryptocurrency
parser_enabled: true
regex_pattern: null
script_parser: null
category_communication:
entity_type: category_communication
gui_name: Communication
gui_tooltip: Communication related entities.
parent_type: root
parser_enabled: true
regex_pattern: null
script_parser: null
category_cryptocurrency:
entity_type: category_cryptocurrency
gui_name: Cryptocurrency
gui_tooltip: Cryptocurrency related entities.
parent_type: root
parser_enabled: true
regex_pattern: null
script_parser: null
category_cybersecurity:
entity_type: category_cybersecurity
gui_name: Cybersecurity
gui_tooltip: Cybersecurity related entities.
parent_type: root
parser_enabled: true
regex_pattern: null
script_parser: null
category_internet:
entity_type: category_internet
gui_name: Internet
gui_tooltip: Internet related entities.
parent_type: root
parser_enabled: true
regex_pattern: null
script_parser: null
category_monero:
entity_type: category_monero
gui_name: Monero
gui_tooltip: Monero related entities.
parent_type: category_cryptocurrency
parser_enabled: true
regex_pattern: null
script_parser: null
category_networking:
entity_type: category_networking
gui_name: Networking
gui_tooltip: Networking related entities.
parent_type: root
parser_enabled: true
regex_pattern: null
script_parser: null
category_special:
entity_type: category_special
gui_name: Special Parsers
gui_tooltip: Special parsers, e.g. created wordlists.
parent_type: root
parser_enabled: true
regex_pattern: null
script_parser: null
gdocurl:
entity_type: gdocurl
gui_name: Google Docs URL
gui_tooltip: Outputs any possible Google Docs URLs.
parent_type: url
parser_enabled: true
regex_pattern: \bhttps:\/\/docs\.google\.com\/[\w\/.-]*\/d\/[a-zA-Z0-9_-]+(?:\/\S*)?
script_parser: null
generated_wordlist_match:
entity_type: generated_wordlist_match
gui_name: Generated Wordlist Match
gui_tooltip: Outputs any wordlist matches which are specified by the generated wordlist
present in the parser directory.
parent_type: category_special
parser_enabled: true
regex_pattern: null
script_parser: generated_wordlist.py
github:
entity_type: github
gui_name: GitHub
gui_tooltip: Outputs any possible GitHub repositories.
parent_type: url
parser_enabled: true
regex_pattern: \bhttps?:\/\/github\.com\/[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+\/?\S*
script_parser: null
ipv4:
entity_type: ipv4
gui_name: IPv4 Address
gui_tooltip: Outputs any IPv4 addresses.
parent_type: category_networking
parser_enabled: true
regex_pattern: \b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b
script_parser: ipv4.py
ipv4pr:
entity_type: ipv4pr
gui_name: Private Address Range
gui_tooltip: Outputs any IPv4 addresses of the private address range.
parent_type: ipv4
parser_enabled: true
regex_pattern: \b(10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(1[6-9]|2[0-9]|3[0-1])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3})\b
script_parser: ipv4pr.py
ipv4pu:
entity_type: ipv4pu
gui_name: Public Address Range
gui_tooltip: Outputs any IPv4 addresses of the public address range.
parent_type: ipv4
parser_enabled: true
regex_pattern: \b((?!10\.)(?!172\.(1[6-9]|2[0-9]|3[0-1]))(?!192\.168)(?:[0-9]{1,3}\.){3}[0-9]{1,3})\b
script_parser: ipv4pu.py
ipv6:
entity_type: ipv6
gui_name: IPv6 Address
gui_tooltip: Outputs any IPv6 addresses.
parent_type: category_networking
parser_enabled: true
regex_pattern: (([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))
script_parser: ipv6.py
macaddr:
entity_type: macaddr
gui_name: MAC Address
gui_tooltip: Outputs any possible MAC addresses.
parent_type: category_networking
parser_enabled: true
regex_pattern: \b(?:[0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}\b
script_parser: null
mailaddr:
entity_type: mailaddr
gui_name: EMail Address
gui_tooltip: Outputs any possible email-addresses.
parent_type: category_communication
parser_enabled: true
regex_pattern: \b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b
script_parser: null
onionurl:
entity_type: onionurl
gui_name: Onion URL
gui_tooltip: Outputs any possible onion URL.
parent_type: category_internet
parser_enabled: true
regex_pattern: \bhttps?:\/\/[a-z2-7]{16,56}\.onion(?:\/\S*)?
script_parser: null
telnum:
entity_type: telnum
gui_name: Possible Telephone Number
gui_tooltip: Outputs any possible telephone numbers, this may have some 0-positives.
parent_type: category_communication
parser_enabled: true
regex_pattern: \b(?:\+\d{1,4}\s?)?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b
script_parser: telnum.py
toxid:
entity_type: toxid
gui_name: Tox ID
gui_tooltip: Outputs any possible tox ID, including QTOX. Unverified Regex Pattern.
parent_type: category_communication
parser_enabled: true
regex_pattern: (?<![0-9a-fA-F])[0-9a-fA-F]{76}(?![0-9a-fA-F])
script_parser: null
url:
entity_type: url
gui_name: URL
gui_tooltip: Outputs any possible URL.
parent_type: category_internet
parser_enabled: true
regex_pattern: \b(?:https?|s?ftp):\/\/[\w\/.-]+(?:\.[a-z]{2,})+\S*
script_parser: url.py
vulnerability_CVE:
entity_type: vulnerability_CVE
gui_name: CVE String
gui_tooltip: Outputs any possible CVE Vulnerability Identifier.
parent_type: category_cybersecurity
parser_enabled: true
regex_pattern: cve-\d{4}-\d+
script_parser: null
xmraddr:
entity_type: xmraddr
gui_name: Monero Address
gui_tooltip: Outputs Monero addresses.
parent_type: category_monero
parser_enabled: true
regex_pattern: \b4[0-9AB][1-9A-HJ-NP-Za-km-z]{93}\b
script_parser: xmraddr.py
category_metadata:
entity_type: category_metadata
gui_name: Metadata
gui_tooltip: Metadata related entities.
parent_type: root
parser_enabled: false
regex_pattern: null
script_parser: null
timestamp:
entity_type: timestamp
gui_name: Timestamp
gui_tooltip: Timestamp-like entities.
parent_type: category_metadata
parser_enabled: false
regex_pattern: \b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\b
script_parser: timestamp.py

0
data/parser/__init__.py Normal file
View File

26
data/parser/bip39 copy.py Normal file

File diff suppressed because one or more lines are too long

28
data/parser/bip39.py Normal file

File diff suppressed because one or more lines are too long

24
data/parser/btcaddr.py Normal file
View File

@ -0,0 +1,24 @@
import re
def parse(text):
# Regular expressions for different Bitcoin address formats
p2pkh_regex = r'\b1[1-9A-HJ-NP-Za-km-z]{25,34}\b'
p2sh_regex = r'\b3[1-9A-HJ-NP-Za-km-z]{25,34}\b'
bech32_regex = r'\bbc1[q,p,z][0-9a-z]{39,59}\b'
bech32_regex1 = r'\bbc1[qpz0-9ac-hj-np-z]{38,58}\b'
less_common_regex = r'\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b'
# Combine all regexes
combined_regex = f'({p2pkh_regex})|({p2sh_regex})|({bech32_regex})|({less_common_regex}) | ({bech32_regex1})'
matches = []
for match in re.finditer(combined_regex, text):
for addr in match.groups():
if addr: # Check if the captured group is not None
start_pos, end_pos = match.span()
matches.append((addr, start_pos, end_pos))
return matches
# integrate regexes xpub, ypub, zpub
# checksumme check

View File

@ -0,0 +1,22 @@
import re
import os
def load_wordlist(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
return [line.strip() for line in file]
def parse(text):
wordlist_path = os.path.join(os.path.dirname(__file__), 'generated_wordlist.txt')
wordlist = load_wordlist(wordlist_path)
# Create a regex pattern that matches any word in the wordlist
pattern = '(' + '|'.join(re.escape(word).replace(' ', r'\s+') for word in wordlist) + ')'
matches = []
for match in re.finditer(pattern, text, re.IGNORECASE):
matched_word = match.group()
start_pos, end_pos = match.span()
matches.append((matched_word, start_pos, end_pos))
return matches

22
data/parser/ipv4.py Normal file
View File

@ -0,0 +1,22 @@
import re
import ipaddress
def is_valid_ipv4_address(ip_addr):
try:
# This will return True for both public and private IPv4 addresses
return isinstance(ipaddress.ip_address(ip_addr), ipaddress.IPv4Address)
except ValueError:
return False
def parse(text):
ipv4_regex = r'(?<!\d)(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?!\d)'
matches = []
for match in re.finditer(ipv4_regex, text):
ip_addr = match.group()
if is_valid_ipv4_address(ip_addr):
start_pos, end_pos = match.span()
matches.append((ip_addr, start_pos, end_pos))
return matches

22
data/parser/ipv4pr.py Normal file
View File

@ -0,0 +1,22 @@
import re
import ipaddress
def is_private_ip(ip_addr):
try:
return ipaddress.ip_address(ip_addr).is_private
except ValueError:
return False
def parse(text):
ipv4_regex = r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
matches = []
for match in re.finditer(ipv4_regex, text):
ip_addr = match.group()
if is_private_ip(ip_addr):
start_pos, end_pos = match.span()
matches.append((ip_addr, start_pos, end_pos))
return matches

21
data/parser/ipv4pu.py Normal file
View File

@ -0,0 +1,21 @@
import re
import ipaddress
def is_public_ip(ip_addr):
try:
ip_obj = ipaddress.ip_address(ip_addr)
return not ip_obj.is_private and not ip_obj.is_reserved and not ip_obj.is_loopback
except ValueError:
return False
def parse(text):
ipv4_regex = r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
matches = []
for match in re.finditer(ipv4_regex, text):
ip_addr = match.group()
if is_public_ip(ip_addr):
start_pos, end_pos = match.span()
matches.append((ip_addr, start_pos, end_pos))
return matches

20
data/parser/ipv6.py Normal file
View File

@ -0,0 +1,20 @@
import re
import ipaddress
def is_valid_ipv6_address(ip_addr):
try:
return isinstance(ipaddress.ip_address(ip_addr), ipaddress.IPv6Address)
except ValueError:
return False
def parse(text):
ipv6_regex = r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))'
matches = []
for match in re.finditer(ipv6_regex, text, re.IGNORECASE):
ip_addr = match.group()
if is_valid_ipv6_address(ip_addr):
start_pos, end_pos = match.span()
matches.append((ip_addr, start_pos, end_pos))
return matches

100
data/parser/telnum.py Normal file
View File

@ -0,0 +1,100 @@
import phonenumbers
import logging
import re
def parse(text, default_regions = [
'US', 'GB', 'DE', 'FR', 'ES', 'IT', 'RU', 'CN', 'IN', 'JP',
'BR', 'ZA', 'NG', 'EG', 'TR', 'ID', 'AU', 'CA', 'MX', 'AR',
'KR', 'TH', 'VN', 'PH', 'MY', 'SA', 'IR', 'PK', 'BD', 'UA',
'PL', 'NL', 'BE', 'CH', 'AT', 'SE', 'NO', 'DK', 'FI', 'IL',
'SG', 'HK', 'NZ', 'AE', 'KE', 'CO', 'VE', 'PE', 'CL', 'GR',
'PT', 'CZ', 'RO', 'HU', 'BG', 'SK', 'SI', 'HR', 'RS', 'LT',
'LV', 'EE', 'CY', 'LU', 'MT', 'IS', 'KZ', 'UZ', 'AM', 'AZ',
'GE', 'MN', 'KG', 'TJ', 'TM', 'BT', 'NP', 'LK', 'MM', 'KH',
'LA', 'BN', 'FJ', 'PW', 'SB', 'VU', 'FM', 'WS', 'TO', 'TV',
'KI', 'NR', 'MQ', 'GF', 'RE', 'YT', 'PF', 'NC', 'WF', 'TF',
'AI', 'AG', 'AW', 'BS', 'BB', 'BZ', 'BM', 'VG', 'KY', 'CU',
'CW', 'DM', 'DO', 'GD', 'GP', 'HT', 'JM', 'MQ', 'MS', 'PR',
'KN', 'LC', 'VC', 'SX', 'TT', 'TC', 'VI', 'BO', 'BQ', 'EC',
'GY', 'PY', 'SR', 'UY', 'DZ', 'AO', 'BJ', 'BW', 'BF', 'BI',
'CV', 'CM', 'CF', 'TD', 'KM', 'CG', 'CD', 'DJ', 'GQ', 'ER',
'SZ', 'ET', 'GA', 'GM', 'GH', 'GN', 'GW', 'CI', 'LS', 'LR',
'LY', 'MG', 'MW', 'ML', 'MR', 'MU', 'MA', 'MZ', 'NA', 'NE',
'NG', 'RW', 'ST', 'SN', 'SC', 'SL', 'SO', 'SS', 'SD', 'TZ',
'TG', 'TN', 'UG', 'ZM', 'ZW'
]
):
matches = []
timestamp_patterns = [
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
# Add more patterns as needed
]
unlikely_phone_patterns = [
r'\d{5,}\s?bytes', # File size in bytes
r'https?://\S+', # URLs
r'\bversion \d+', # 'version' followed by numbers
r'cve-\d{4}-\d+', # CVE identifiers
r'\S+\.onion\S*', # Onion addresses
r'Product ID: \S+', # Product IDs
r'\|\s*[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\s*\|', # UUIDs
r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', # IP addresses
r'Mem: \d+\s+\d+\s+\d+', # Memory sizes
r'Total: \d+\s+\d+\s+\d+', # Total memory
r'block_size=\d+', # Block size
r'-rw-------\s+\d+\s+\S+\s+\S+\s+\d+\s+\S+\s+\d{1,2}\s+\d{1,2}:\d{2}', # File details
r'\d+\.\d+\.\d+\.\d+\s+\d+\s+\S+\s+\d+', # IP and port patterns
# Add more patterns as needed
]
# More specific regex for phone numbers
def is_unlikely_phone_context(extended_text):
# Check against timestamp patterns
for pattern, _ in timestamp_patterns:
if re.search(pattern, extended_text):
return True
# Check against other unlikely phone patterns
for pattern in unlikely_phone_patterns:
if re.search(pattern, extended_text):
return True
return False
# More specific regex for phone numbers
phone_regex = r'\b(\+?\d{1,3}[\s-]?)?(\(?\d{1,4}\)?[\s-]?)?\d{3,5}[\s-]?\d{3,5}\b'
for number_match in re.finditer(phone_regex, text):
raw_number = number_match.group()
start_pos, end_pos = number_match.span()
# Extend the search window for additional context
extended_start = max(0, start_pos - 50)
extended_end = min(len(text), end_pos + 50)
extended_text = text[extended_start:extended_end]
if is_unlikely_phone_context(extended_text):
continue # Skip if the context indicates it's not a phone number
valid_number_found = False
for region in default_regions:
try:
parsed_number = phonenumbers.parse(raw_number, region)
if phonenumbers.is_valid_number(parsed_number):
matches.append((raw_number, start_pos, end_pos))
valid_number_found = True
break
except phonenumbers.NumberParseException:
continue
if not valid_number_found:
logging.debug(f"Failed to parse number: {raw_number}")
return matches

26
data/parser/timestamp.py Normal file
View File

@ -0,0 +1,26 @@
import re
# List of timestamp patterns
timestamp_patterns = [
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
# Add more patterns as needed
]
def parse(text):
matches = []
for pattern, _ in timestamp_patterns:
for match in re.finditer(pattern, text):
timestamp_str = match.group()
start_pos, end_pos = match.span()
matches.append((timestamp_str, start_pos, end_pos))
return matches

21
data/parser/url.py Normal file
View File

@ -0,0 +1,21 @@
import tldextract
import re
def parse(text):
# Regular expression for detecting potential URLs
url_regex = r'\b(?:https?|ftp):\/\/[^\s]+'
matches = []
for url_match in re.finditer(url_regex, text):
full_url = url_match.group()
# Use tldextract to validate the domain and suffix
extracted = tldextract.extract(full_url)
if extracted.domain and extracted.suffix:
start_pos, end_pos = url_match.span()
matches.append((full_url, start_pos, end_pos))
return matches

12
data/parser/xmraddr.py Normal file
View File

@ -0,0 +1,12 @@
import re
def parse(text):
xmr_regex = r'\b4[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]{94}\b|\b8[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]{94}\b'
matches = []
for match in re.finditer(xmr_regex, text):
match_text = match.group()
start_pos, end_pos = match.span()
matches.append((match_text, start_pos, end_pos))
return matches

BIN
icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -0,0 +1 @@
from .__main__ import main

View File

@ -0,0 +1,32 @@
"""
You're welcome! I'm glad you like the name "Logline Leviathan". It's a fitting name for a program that can delve into the depths of unstructured text data like a leviathan, extracting valuable insights from the chaotic ocean of information. I hope your program is successful in its mission to help investigators navigate the dark, digital realm of cyberpunk."""
import sys
from PyQt5.QtWidgets import QApplication
from pathlib import Path
import argparse
# Add the parent directory of 'logline_leviathan' to sys.path
parent_dir = str(Path(__file__).resolve().parent.parent)
if parent_dir not in sys.path:
sys.path.append(parent_dir)
from logline_leviathan.gui.mainwindow import MainWindow
from logline_leviathan.database.database_manager import create_database
def initialize_database():
create_database()
def main():
parser = argparse.ArgumentParser(description='Analyze Export')
parser.add_argument('directory', nargs='?', default='', help='Directory to analyze')
args = parser.parse_args()
app = QApplication(sys.argv)
main_window = MainWindow(app, initialize_database, args.directory) # Pass the function as an argument
main_window.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()

View File

View File

@ -0,0 +1,107 @@
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Text, DateTime, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm import sessionmaker
from contextlib import contextmanager
import logging
SessionFactory = sessionmaker(bind=create_engine('sqlite:///entities.db'))
Base = declarative_base()
class DistinctEntitiesTable(Base):
__tablename__ = 'distinct_entities_table'
distinct_entities_id = Column(Integer, primary_key=True) #is the primary key of the distinct_entities_table
distinct_entity = Column(String, index=True) # is the distinct entity iself, e.g. 192.168.1.1, 192.168.1.1, etc., bc1qy3h5l8n9, etc.
entity_types_id = Column(Integer, ForeignKey('entity_types_table.entity_type_id')) # is the foreign key of the entity_types_table
regex_library = relationship("EntityTypesTable")
individual_entities = relationship("EntitiesTable", back_populates="entity")
class EntitiesTable(Base):
__tablename__ = 'entities_table'
entities_id = Column(Integer, primary_key=True) # is the primary key of the entities_table
distinct_entities_id = Column(Integer, ForeignKey('distinct_entities_table.distinct_entities_id')) # is the foreign key of the distinct_entities_table
entity_types_id = Column(Integer, ForeignKey('entity_types_table.entity_type_id')) # is the foreign key of the entity_types_table
regex_library = relationship("EntityTypesTable")
file_id = Column(Integer, ForeignKey('file_metadata.file_id')) # is the foreign key of the file_metadata
line_number = Column(Integer) # is the line number - the line inside the file which is available in the file_metadata
entry_timestamp = Column(DateTime) # the timestamp which was obtained via regex from the original input file
flag = Column(Boolean, default=False, index=True) # allows a flag to be set by the user and customize data inspection by the flag presence
entity = relationship("DistinctEntitiesTable", back_populates="individual_entities")
file = relationship("FileMetadata")
context = relationship("ContextTable", uselist=False, back_populates="individual_entity")
class ContextTable(Base):
__tablename__ = 'context_table'
context_id = Column(Integer, primary_key=True) # is the primary key of the context_table
entities_id = Column(Integer, ForeignKey('entities_table.entities_id')) # is the foreign key of the entities_table
context_small = Column(Text) # is the context of the entity which was parsed from the original file, by a specific number of lines before and after the entity
context_medium = Column(Text) # is the context of the entity which was parsed from the original file, by a specific number of lines before and after the entity
context_large = Column(Text, index=True)
#context_indexed = Column(Text, index=True) # is the context of the entity which was parsed from the original file, by a specific number of lines before and after the entity
individual_entity = relationship("EntitiesTable", back_populates="context")
class FileMetadata(Base):
__tablename__ = 'file_metadata'
# all stays as it is
file_id = Column(Integer, primary_key=True) # is the primary key of the file_metadata
file_name = Column(String, index=True) # is the name of the original input file
file_path = Column(String) # is the path of the original input file
file_mimetype = Column(String) # is the MIME type of the original input file
class EntityTypesTable(Base):
__tablename__ = 'entity_types_table'
entity_type_id = Column(Integer, primary_key=True) # is the primary key of the entity_types_table
entity_type = Column(String) # is the entity type short form, e.g. ipv4, ipv6, btcaddr, etc
regex_pattern = Column(String) # a regex pattern which could be used for parsing the files
script_parser = Column(String) # the name of the python script which could be used for parsing the files
gui_tooltip = Column(String) # the GUI tooltip
gui_name = Column(String) # the GUI name which is more descriptive than entity_type
parent_type = Column(String, default='root') # hierarchical structure from yaml specs
parser_enabled = Column(Boolean, default=True) # is the parser enabled
def create_database(db_path='sqlite:///entities.db'):
engine = create_engine(db_path)
logging.debug(f"Create Database Engine")
Base.metadata.create_all(engine)
logging.debug(f"Created all Metadata")
engine.dispose()
logging.debug(f"Disposed Engine")
# Start a new session
session = SessionFactory()
logging.debug(f"Started new session with session factory")
# Check if EntityTypesTable is empty
if not session.query(EntityTypesTable).first():
# Populate EntityTypesTable from the YAML file
logging.debug(f"Didnt find the EntityTypesTable, running populate_entity_types_table")
#populate_entity_types_table(session)
session.close()
def get_db_session():
return SessionFactory()
if __name__ == "__main__":
create_database()
@contextmanager
def session_scope():
"""Provide a transactional scope around a series of operations."""
session = SessionFactory()
try:
yield session
session.commit()
except Exception as e:
session.rollback()
raise e
finally:
session.close()

View File

@ -0,0 +1,357 @@
import logging
import os
import yaml
from PyQt5.QtWidgets import QDialog, QVBoxLayout, QMessageBox, QLabel, QRadioButton, QPushButton
from logline_leviathan.gui.ui_helper import UIHelper
from logline_leviathan.database.database_manager import *
class DatabaseOperations:
def __init__(self, main_window, db_init_func):
self.main_window = main_window
self.db_init_func = db_init_func
self.selected_resolutions = []
def ensureDatabaseExists(self):
db_path = 'entities.db'
db_exists = os.path.exists(db_path)
if not db_exists:
logging.info("Database does not exist. Creating new database...")
self.db_init_func() # This should call create_database
else:
logging.info("Database exists.")
def loadRegexFromYAML(self):
with open('./data/entities.yaml', 'r') as file:
yaml_data = yaml.safe_load(file)
clean_yaml_data = self.notify_duplicates_from_yaml(yaml_data)
return clean_yaml_data
def notify_duplicates_from_yaml(self, yaml_data):
duplicates = []
seen_fields = {'entity_type': {}, 'gui_name': {}, 'gui_tooltip': {}, 'regex_pattern': {}, 'script_parser': {}}
for entity_name, entity_data in yaml_data.items():
# Iterate through each field and check for duplicates
for field in seen_fields:
value = entity_data.get(field)
if value: # Only check non-empty values
if value in seen_fields[field]:
duplicates.append({
"duplicate_field": field,
"entity_name": entity_name,
"original_entity_name": seen_fields[field][value]
})
seen_fields[field][value] = entity_name
if duplicates:
self.show_duplicate_error_dialog(duplicates)
raise ValueError("Duplicate entries found in YAML file. Aborting.")
return yaml_data
def show_duplicate_error_dialog(self, duplicates):
dialog = DuplicateErrorDialog(duplicates)
dialog.exec_()
def show_resolve_inconsistencies_dialog(self, db_entity, yaml_entity):
dialog = ResolveInconsistenciesDialog([(db_entity, yaml_entity)])
result = dialog.exec_()
if result == QDialog.Accepted:
resolutions = dialog.getSelectedResolutions()
if resolutions:
return resolutions[0] # Return the first (and only) resolution
return None
def populate_and_update_entities_from_yaml(self, yaml_data):
with session_scope() as session:
db_entities = session.query(EntityTypesTable).all()
db_entity_dict = {entity.entity_type: entity for entity in db_entities}
for entity_name, entity_data in yaml_data.items():
entity_type = entity_data['entity_type']
db_entity = db_entity_dict.get(entity_type)
if db_entity is None:
db_entity = self.find_potentially_modified_entity(db_entities, entity_data)
if db_entity:
parser_enabled_db = db_entity.parser_enabled
entity_data['parser_enabled'] = parser_enabled_db
if self.is_duplicate_or_inconsistent(db_entity, entity_data, db_entities):
logging.warning(f"Issue found with entity {db_entity} and {entity_data}. Handling resolution.")
resolution = self.show_resolve_inconsistencies_dialog(db_entity, entity_data)
if resolution:
self.apply_resolution([(resolution, db_entity)], session) # Pass db_entity as part of the resolution
else:
for key, value in entity_data.items():
setattr(db_entity, key, value)
else:
new_entity = EntityTypesTable(**entity_data)
session.add(new_entity)
session.commit()
def find_potentially_modified_entity(self, db_entities, yaml_entity):
for db_ent in db_entities:
if any(
getattr(db_ent, key) == yaml_entity[key]
for key in ['entity_type', 'gui_name', 'gui_tooltip', 'regex_pattern', 'script_parser', 'parser_enabled']
if yaml_entity[key]
):
return db_ent
return None
def is_duplicate_or_inconsistent(self, db_entity, yaml_entity, db_entities):
if db_entity:
# Exclude 'parser_enabled' from the inconsistency check
keys_to_check = ['entity_type', 'gui_name', 'gui_tooltip', 'regex_pattern', 'script_parser']
for key in keys_to_check:
if getattr(db_entity, key, None) != yaml_entity.get(key) and yaml_entity.get(key) is not None:
logging.debug(f"Found inconsistent entity: DB-Entity: {db_entity} YAML-Entity: {yaml_entity}")
return True
# Check for duplicate across all entities
for db_ent in db_entities:
if db_ent.entity_type == yaml_entity['entity_type']:
continue
if any(
getattr(db_ent, key) == yaml_entity[key] and yaml_entity[key] is not None
for key in ['entity_type', 'gui_name', 'gui_tooltip', 'regex_pattern', 'script_parser',]
):
logging.debug(f"Found duplicate entity: {db_ent}")
return True
return False
def update_database_entry(self, db_entity, yaml_entity):
for key, value in yaml_entity.items():
setattr(db_entity, key, value)
def apply_resolution(self, resolutions, session):
with open('./data/entities.yaml', 'r') as file:
yaml_data = yaml.safe_load(file)
for (resolution, entity), db_entity in resolutions:
if resolution == 'yaml':
logging.debug(f"Resolving YAML entity: {entity} with resolution: yaml and db_entity: {db_entity}")
if db_entity:
foreign_keys = self.capture_foreign_keys(db_entity.entity_type_id, session)
session.delete(db_entity)
new_entity = EntityTypesTable(**entity)
session.add(new_entity)
session.flush()
self.reassign_foreign_keys(new_entity, foreign_keys, session)
elif resolution == 'db':
if entity: # Existing database entity is chosen
yaml_data[entity.entity_type] = {
'entity_type': entity.entity_type,
'gui_name': entity.gui_name,
'gui_tooltip': entity.gui_tooltip,
'parent_type': entity.parent_type,
'regex_pattern': entity.regex_pattern,
'script_parser': entity.script_parser,
'parser_enabled': entity.parser_enabled
}
with open('./data/entities.yaml', 'w') as file:
yaml.dump(yaml_data, file)
def capture_foreign_keys(self, entity_id, session):
foreign_keys = {}
# Use entity_id to capture references
distinct_entities_refs = session.query(DistinctEntitiesTable).filter_by(entity_types_id=entity_id).all()
foreign_keys['distinct_entities'] = [ref.distinct_entities_id for ref in distinct_entities_refs]
entities_refs = session.query(EntitiesTable).filter_by(entity_types_id=entity_id).all()
foreign_keys['entities'] = [ref.entities_id for ref in entities_refs]
return foreign_keys
def reassign_foreign_keys(self, new_entity, foreign_keys, session):
# Reassigning references in DistinctEntitiesTable
for distinct_id in foreign_keys.get('distinct_entities', []):
distinct_entity = session.query(DistinctEntitiesTable).get(distinct_id)
distinct_entity.entity_types_id = new_entity.entity_type_id
# Reassigning references in EntitiesTable
for entity_id in foreign_keys.get('entities', []):
entity = session.query(EntitiesTable).get(entity_id)
entity.entity_types_id = new_entity.entity_type_id
def checkScriptPresence(self):
parser_directory = './data/parser'
missing_scripts = []
with session_scope() as session:
all_entities = session.query(EntityTypesTable).all()
for entity in all_entities:
script_name = entity.script_parser
if script_name:
script_path = os.path.join(parser_directory, script_name)
if not os.path.exists(script_path):
missing_scripts.append(script_name)
if missing_scripts:
missing_scripts_str = "\n".join(missing_scripts)
msg = QMessageBox()
msg.setIcon(QMessageBox.Warning)
msg.setWindowTitle("Fehlende Skripte")
msg.setText(".\nDas ist nicht zwingend ein Problem, aber falls nötig,\nsollten die Skritpte in ./data/parser/ ergänzt werden.\nListe der erwarteten Skripte:")
msg.setInformativeText(missing_scripts_str)
msg.exec_() # Display the message box
return missing_scripts
def purgeWordlistEntries(self):
try:
with session_scope() as session:
# Identify the entity_type_id for 'generated_wordlist_match'
wordlist_entity_type = session.query(EntityTypesTable).filter_by(entity_type='generated_wordlist_match').one_or_none()
if not wordlist_entity_type:
logging.info("No 'generated_wordlist_match' entity type found. No action taken.")
return
# Find all distinct entities associated with the wordlist entity type
distinct_entities_to_remove = session.query(DistinctEntitiesTable).filter_by(entity_types_id=wordlist_entity_type.entity_type_id).all()
for distinct_entity in distinct_entities_to_remove:
# Remove all related entities entries and their context
entities_to_remove = session.query(EntitiesTable).filter_by(distinct_entities_id=distinct_entity.distinct_entities_id).all()
for entity in entities_to_remove:
# Remove related context entries
session.query(ContextTable).filter_by(entities_id=entity.entities_id).delete()
# Remove the entity itself
session.delete(entity)
# Commit the changes
session.commit()
except Exception as e:
logging.error(f"Error during wordlist entries purge: {str(e)}")
raise
class ResolveInconsistenciesDialog(QDialog):
def __init__(self, inconsistencies, parent=None):
super().__init__(parent)
self.setWindowTitle("Inkonsistenzen auflösen")
self.inconsistencies = inconsistencies
self.resolution_choices = []
self.selected_entity = None
self.selected_entities = []
self.selected_resolutions = []
self.initUI()
def initUI(self):
layout = QVBoxLayout(self)
for db_entity, yaml_entity in self.inconsistencies:
db_entity_str = self.format_entity_for_display(db_entity)
yaml_entity_str = self.format_entity_for_display(yaml_entity)
# Create labels and radio buttons for each inconsistency
db_label = QLabel(f"Datenbank-Eintrag: {db_entity_str}")
yaml_label = QLabel(f"YAML-Eintrag: {yaml_entity_str}")
db_radio = QRadioButton("Datenbank-Eintrag behalten")
yaml_radio = QRadioButton("YAML-Eintrag behalten")
layout.addWidget(db_label)
layout.addWidget(db_radio)
layout.addWidget(yaml_label)
layout.addWidget(yaml_radio)
self.resolution_choices.append((db_radio, yaml_radio))
# Buttons for OK and Cancel
btn_ok = QPushButton("OK", self)
btn_ok.clicked.connect(self.on_ok)
btn_cancel = QPushButton("Abbruch", self)
btn_cancel.clicked.connect(self.reject)
layout.addWidget(btn_ok)
layout.addWidget(btn_cancel)
def on_ok(self):
self.selected_resolutions = [] # Reset the list before storing new selections
for (db_radio, yaml_radio), (db_entity, yaml_entity) in zip(self.resolution_choices, self.inconsistencies):
if db_radio.isChecked():
self.selected_resolutions.append(('db', db_entity))
elif yaml_radio.isChecked():
self.selected_resolutions.append(('yaml', yaml_entity))
else:
self.selected_resolutions.append((None, None))
self.accept()
def getSelectedResolutions(self):
return self.selected_resolutions
def format_entity_for_display(self, entity):
if isinstance(entity, dict):
# YAML entity is already a dictionary
return "\n".join(f"{key}: {value}" for key, value in entity.items())
else:
# Database entity needs to be formatted
return "\n".join(f"{attr}: {getattr(entity, attr)}" for attr in ['entity_type', 'gui_name', 'gui_tooltip', 'parent_type', 'regex_pattern', 'script_parser', 'parser_enabled'])
class DuplicateErrorDialog(QDialog):
def __init__(self, duplicates, parent=None):
super().__init__(parent)
self.setWindowTitle("Duplikate gefunden")
self.duplicates = duplicates
self.initUI()
def initUI(self):
layout = QVBoxLayout(self)
# Display duplicate entries
error_label = QLabel("Duplikate wurden in ./data/entities.yaml gefunden. Diese sollten manuell aufgelöst werden:")
layout.addWidget(error_label)
for dup in self.duplicates:
dup_str = self.format_entity_for_display(dup)
dup_label = QLabel(dup_str)
layout.addWidget(dup_label)
# Buttons
open_button = QPushButton("YAML-Datei oeffnen", self)
open_button.clicked.connect(self.openYAML)
exit_button = QPushButton("Abbruch", self)
exit_button.clicked.connect(self.close)
layout.addWidget(open_button)
layout.addWidget(exit_button)
def format_entity_for_display(self, entity):
if isinstance(entity, dict):
return "\n".join(f"{key}: {value}" for key, value in entity.items())
def openYAML(self):
ui_helper = UIHelper(main_window=self)
ui_helper.openFile('data/entities.yaml')

View File

@ -0,0 +1,89 @@
import shutil
import logging
import time
import os
from PyQt5.QtWidgets import QFileDialog
from sqlalchemy.exc import SQLAlchemyError
from logline_leviathan.database.database_manager import session_scope
from logline_leviathan.database.database_operations import DatabaseOperations
class DatabaseUtility():
def __init__(self, main_window):
self.main_window = main_window
self.database_operations = DatabaseOperations(self, main_window.db_init_func)
def purgeDatabase(self):
if self.main_window.isProcessing():
self.main_window.showProcessingWarning()
return
try:
with session_scope() as db_session:
# Close and dispose of any existing database session
if db_session:
db_session.close()
db_session.bind.dispose()
# Attempt to delete the database file with retries
retries = 3
for attempt in range(retries):
try:
if os.path.exists('entities.db'):
os.remove('entities.db')
break
except OSError as e:
if attempt < retries - 1:
time.sleep(0.1)
else:
raise e
# Reinitialize the database
self.main_window.db_init_func()
self.main_window.statusLabel.setText(" Leere Datenbank initalisiert. Mit der Analyse fortfahren.")
logging.debug("Database created.")
yaml_data = self.database_operations.loadRegexFromYAML()
self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
self.main_window.refreshApplicationState()
self.main_window.generate_report_window.updateCheckboxes() # Add this line to update the checkboxes
self.main_window.generate_wordlist_window.updateCheckboxes()
except SQLAlchemyError as e:
logging.error(f"Error creating database: {e}")
except Exception as e:
logging.error(f"General error: {e}")
def importDatabase(self):
if self.main_window.isProcessing():
self.main_window.showProcessingWarning()
return
options = QFileDialog.Options()
db_file, _ = QFileDialog.getOpenFileName(self.main_window, "Select External Database", "", "Database Files (*.db);;All Files (*)", options=options)
if db_file and db_file.endswith(".db"):
try:
shutil.copy(db_file, 'entities.db')
self.main_window.current_db_path = db_file
self.main_window.statusLabel.setText(" Bestehende Datenbank für diese Sitzung ausgewählt.")
self.main_window.refreshApplicationState()
self.main_window.generate_report_window.updateCheckboxes() # Add this line to update the checkboxes
self.main_window.generate_wordlist_window.updateCheckboxes()
except Exception as e:
logging.error(f"Error selecting external database: {e}")
self.main_window.statusLabel.setText(f" Fehler bei der Auswahl der Datenbank: {e}")
else:
self.main_window.statusLabel.setText(" Keine gueltige Datenbank ausgewählt.")
def exportDatabase(self):
if self.main_window.isProcessing():
self.main_window.showProcessingWarning()
return
options = QFileDialog.Options()
default_filename = "entities_" + time.strftime('%Y%m%d_%H%M%S') + ".db"
save_path, _ = QFileDialog.getSaveFileName(self.main_window, "Save Database File", default_filename, "Database Files (*.db);;All Files (*)", options=options)
if save_path:
try:
shutil.copy('entities.db', save_path)
self.main_window.statusLabel.setText(f" Datenbank erfolgreich exportiert nach {save_path}")
except Exception as e:
logging.error(f"Error exporting database: {e}")
self.main_window.statusLabel.setText(f" Fehler beim Exportieren der Datenbank: {e}")

View File

@ -0,0 +1,816 @@
from sqlalchemy import or_, and_, not_, String
from PyQt5.QtWidgets import QProgressBar, QMainWindow, QTableWidget, QTableWidgetItem, QLineEdit, QStyledItemDelegate, QTextEdit, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QComboBox, QStyle, QLabel
from logline_leviathan.database.database_manager import get_db_session, EntitiesTable, DistinctEntitiesTable, EntityTypesTable, ContextTable, FileMetadata, session_scope
from PyQt5.QtCore import pyqtSignal, Qt, QThread, pyqtSignal, QTimer
from PyQt5.QtGui import QTextDocument, QTextOption
from fuzzywuzzy import fuzz
import re
import logging
import html
class QueryThread(QThread):
queryCompleted = pyqtSignal(list, list) # Signal to indicate completion
def __init__(self, db_query_instance, query_text):
super(QueryThread, self).__init__()
self.db_query_instance = db_query_instance
self.query_text = query_text
def run(self):
base_query, search_terms = self.db_query_instance.prepare_query(self.query_text)
query_lambda = self.db_query_instance.parse_query(self.query_text)
# Pass the lambda function directly to filter
results = base_query.filter(query_lambda).all()
# Calculate scored results
scored_results = [(result, self.db_query_instance.calculate_match_score(result, self.query_text)) for result in results]
self.queryCompleted.emit(scored_results, search_terms)
class DatabaseGUIQuery:
def __init__(self):
self.db_session = get_db_session()
self.entity_types = EntityTypesTable
self.entities = EntitiesTable
self.distinct_entities = DistinctEntitiesTable
self.context = ContextTable
self.file_metadata = FileMetadata
def parse_query(self, query):
if not query.strip():
return lambda _: False
# Split and strip special characters for database query
tokens = re.findall(r'"[^"]+"|\S+', query)
stripped_tokens = [token.strip('+-"') for token in tokens]
filters = []
for token in stripped_tokens:
search_condition = f'%{token.replace("*", "%")}%'
condition = or_(
self.distinct_entities.distinct_entity.like(search_condition),
self.entity_types.entity_type.like(search_condition),
self.entity_types.gui_name.like(search_condition),
self.entity_types.gui_tooltip.like(search_condition),
self.entity_types.script_parser.like(search_condition),
self.file_metadata.file_name.like(search_condition),
self.file_metadata.file_path.like(search_condition),
self.file_metadata.file_mimetype.like(search_condition),
self.entities.line_number.cast(String).like(search_condition),
self.context.context_large.like(search_condition)
# Add other fields as needed
)
filters.append(condition)
return lambda: or_(*filters)
def parse_search_terms(self, query):
tokens = query.split()
search_terms = [token.lstrip('+-') for token in tokens if not token.startswith('-') and not token.startswith('+')]
return search_terms
def prepare_query(self, query):
search_terms = self.parse_search_terms(query)
# Construct the base query with proper joins
base_query = self.db_session.query(
self.distinct_entities.distinct_entity,
self.entity_types.gui_name,
self.file_metadata.file_name,
self.entities.line_number,
self.entities.entry_timestamp,
self.context.context_large
).join(
self.entities, self.distinct_entities.distinct_entities_id == self.entities.distinct_entities_id
).join(
self.file_metadata, self.entities.file_id == self.file_metadata.file_id
).join(
self.context, self.entities.entities_id == self.context.entities_id
).join(
self.entity_types, self.entities.entity_types_id == self.entity_types.entity_type_id
).distinct()
# Apply filters and return results
return base_query, search_terms
def display_results(self, results, search_terms):
self.results_window = ResultsWindow(results, search_terms)
self.results_window.show()
def calculate_match_score(self, result, query):
# Adjusted weights and thresholds
distinct_entity_weight = 4
file_name_weight = 4
timestamp_weight = 1
line_number_weight = 1
context_weight = 5
multiple_term_weight = 1
order_weight = 8 # Increased weight for exact order of terms
fuzzy_match_weight = 0.3 # More discerning fuzzy match
threshold_for_fuzzy = 90 # Higher threshold for fuzzy matches
proximity_weight = 2 # Increased weight for proximity
positive_operand_weight = 10 # Weight for terms with '+'
negative_operand_penalty = -5 # Penalty for terms with '-'
exact_match_weight = 10 # Increased weight for exact sequence match
score = 0
# Extracting operands and terms
tokens = re.findall(r'"[^"]+"|\S+', query)
processed_terms = [(token.startswith('+'), token.startswith('-'), token.strip('+-"').lower()) for token in tokens]
# Normalize result fields
lower_distinct_entity = result.distinct_entity.lower()
lower_file_name = result.file_name.lower()
timestamp_str = str(result.entry_timestamp).lower()
line_number_str = str(result.line_number).lower()
words_in_context = result.context_large.lower().split()
# Check matches in various fields with operand consideration
for is_positive, is_negative, term in processed_terms:
if term in lower_distinct_entity:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else distinct_entity_weight)
if term in lower_file_name:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else file_name_weight)
if term in timestamp_str:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else timestamp_weight)
if term in line_number_str:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else line_number_weight)
if term in words_in_context:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else context_weight)
# Creating a cleaned substring of search terms in the exact order they appear in the query
exact_terms_substring = ' '.join([token.strip('+-"').lower() for token in tokens])
# Check for exact order of terms in the context
if exact_terms_substring and exact_terms_substring in ' '.join(words_in_context):
score += exact_match_weight
# Check for exact order of terms
if '"' in query:
exact_query = ' '.join(term for _, _, term in processed_terms)
if exact_query in ' '.join(words_in_context):
score += order_weight
# Additional weight for multiple different terms
unique_terms = set(term for _, _, term in processed_terms)
score += len(unique_terms) * multiple_term_weight
# Proximity score calculation
for _, _, term in processed_terms:
if term in words_in_context:
# Find the positions of the term and the entity in the context
term_pos = words_in_context.index(term)
entity_pos = words_in_context.index(lower_distinct_entity) if lower_distinct_entity in words_in_context else 0
# Calculate the distance and adjust the score
distance = abs(term_pos - entity_pos)
proximity_score = max(0, proximity_weight - distance * 0.01) # Reduce score based on distance
score += proximity_score
# Fuzzy matching
all_text = f"{result.distinct_entity} {result.file_name} {result.entry_timestamp} {result.line_number} {result.context_large}".lower()
for _, _, term in processed_terms:
fuzzy_score = max(fuzz.partial_ratio(term, word) for word in all_text.split())
if fuzzy_score > threshold_for_fuzzy:
score += (fuzzy_score / 100) * fuzzy_match_weight
# Normalize the score
max_possible_positive_score = (
distinct_entity_weight + file_name_weight +
timestamp_weight + line_number_weight +
context_weight * len(processed_terms) + # Assuming each term can match in the context
order_weight + exact_match_weight +
len(processed_terms) * multiple_term_weight + # Each term contributes to multiple_term_weight
len(processed_terms) * positive_operand_weight # Each term could have a positive operand
)
# Considering the negative operand penalty
max_possible_negative_score = len(processed_terms) * negative_operand_penalty
# The maximum score is the sum of the possible positive score and the absolute value of the possible negative score
max_possible_score = max_possible_positive_score + abs(max_possible_negative_score)
# Normalizing the score to a scale of 100
score = (score / max_possible_score) * 100
return score
def get_entity_types(self):
with session_scope() as session:
# Query to filter entity types that have either regex_pattern or script_parser
return [entity_type.gui_name for entity_type in session.query(EntityTypesTable)
.filter(or_(EntityTypesTable.regex_pattern.isnot(None),
EntityTypesTable.script_parser.isnot(None)))
.all()]
COLUMN_WIDTHS = [200, 100, 250, 100, 120, 600, 80] # Adjust these values as needed
COLUMN_NAMES = ['Distinct Entity', 'Entity Type', 'File Name', 'Line Number', 'Timestamp', 'Context', 'Match Score']
DEFAULT_ROW_HEIGHT = 120
FILTER_EDIT_WIDTH = 150
class ResultsWindow(QMainWindow):
def __init__(self, db_query_instance, parent=None):
super(ResultsWindow, self).__init__(parent)
self.db_query_instance = db_query_instance
self.loaded_data_count = 0
self.total_data = []
self.current_filters = {}
self.setWindowTitle("Suchergebnis")
self.setGeometry(800, 600, 1500, 600) # Adjust size as needed
# Create central widget and set layout
centralWidget = QWidget(self)
self.setCentralWidget(centralWidget)
mainLayout = QVBoxLayout(centralWidget)
queryFieldLayout = QHBoxLayout()
self.databaseQueryLineEdit = QueryLineEdit(self)
self.databaseQueryLineEdit.setPlaceholderText(" Suchbegriff eingeben...")
self.databaseQueryLineEdit.returnPressed.connect(self.execute_query_from_results_window)
self.databaseQueryLineEdit.setStyleSheet("""
QLineEdit {
background-color: #3C4043;
color: white;
min-height: 20px;
}
""")
queryFieldLayout.addWidget(self.databaseQueryLineEdit)
# Create a progress bar for query in progress
self.queryProgressBar = QProgressBar(self)
self.queryProgressBar.setRange(0, 1) # Indeterminate mode
self.queryProgressBar.setFixedWidth(100) # Initially hidden
queryFieldLayout.addWidget(self.queryProgressBar)
executeQueryButton = QPushButton("Suche ausführen", self)
executeQueryButton.clicked.connect(self.execute_query_from_results_window)
queryFieldLayout.addWidget(executeQueryButton)
mainLayout.addLayout(queryFieldLayout)
# Create a horizontal layout for filter options
filterLayout = QHBoxLayout()
mainLayout.addLayout(filterLayout)
# Add the table widget to the main layout
self.tableWidget = QTableWidget()
mainLayout.addWidget(self.tableWidget)
# Updated stylesheet for the entire ResultsWindow
stylesheet = """
/* Styles for QTableWidget and headers */
QTableWidget, QHeaderView::section {
background-color: #2A2F35;
color: white;
border: 1px solid #4A4A4A;
}
/* Style for QLineEdit */
QLineEdit {
background-color: #3A3F44;
color: white;
border: 1px solid #4A4A4A;
}
/* Style for QPushButton */
QPushButton {
background-color: #4B5563;
color: white;
border-radius: 4px;
padding: 5px;
margin: 5px;
}
QPushButton:hover {
background-color: #5C677D;
}
QPushButton:pressed {
background-color: #2A2F35;
}
/* Style for empty rows and other areas */
QWidget {
background-color: #2A2F35;
color: white;
}
"""
self.setStyleSheet(stylesheet)
# Apply default row height after setting up the table
self.tableWidget.verticalHeader().setDefaultSectionSize(DEFAULT_ROW_HEIGHT)
self.clearAllButton = QPushButton("Alle Filteroptionen loeschen", self)
self.clearAllButton.clicked.connect(self.clear_all_filters)
filterLayout.addWidget(self.clearAllButton)
# Adding filter options after table setup
self.entityTypeComboBox = QComboBox()
filterLayout.addWidget(self.entityTypeComboBox)
# Initialize filterWidgets before calling setup_table
self.filterWidgets = []
# Create and add QLineEdit widgets to the filter layout
for i, column_name in enumerate(COLUMN_NAMES):
# Skipping the filter creation for certain columns
if column_name in ['Entity Type', 'Context']:
continue
filter_edit = QLineEdit(self)
filter_edit.setFixedWidth(FILTER_EDIT_WIDTH)
filter_edit.setPlaceholderText(f"Filtern nach {column_name}")
filter_edit.textChanged.connect(lambda text, col=i: self.apply_filter(text, col))
self.filterWidgets.append(filter_edit)
filterLayout.addWidget(filter_edit)
self.dataLoadTimer = QTimer(self)
self.dataLoadTimer.timeout.connect(self.load_more_data)
# Create and add the Dismiss button
self.dismissButton = QPushButton("Schließen", self)
self.dismissButton.clicked.connect(self.dataLoadTimer.stop)
self.dismissButton.clicked.connect(self.close)
mainLayout.addWidget(self.dismissButton)
self.populate_entity_type_combobox()
# Adjust column widths and filter widgets' widths
self.adjust_column_widths()
#self.tableWidget.verticalScrollBar().valueChanged.connect(self.check_scroll)
def populate_entity_type_combobox(self):
entity_types = DatabaseGUIQuery().get_entity_types()
self.entityTypeComboBox.addItem("Alle verfügbaren Typen", None) # Default option
for entity_type in entity_types:
self.entityTypeComboBox.addItem(entity_type, entity_type)
self.entityTypeComboBox.currentIndexChanged.connect(self.filter_by_entity_type)
def clear_table(self):
self.tableWidget.clear()
self.tableWidget.setRowCount(0)
self.tableWidget.setColumnCount(0)
def adjust_column_widths(self):
for column, width in enumerate(COLUMN_WIDTHS):
self.tableWidget.setColumnWidth(column, width)
def execute_query_from_results_window(self):
self.dataLoadTimer.start(2000)
query_text = self.databaseQueryLineEdit.text()
if not query_text:
return
self.clear_table()
self.queryProgressBar.setRange(0, 0)
self.query_thread = QueryThread(self.db_query_instance, query_text)
self.query_thread.queryCompleted.connect(self.on_query_completed)
self.query_thread.start()
def set_query_and_execute(self, query_text):
self.databaseQueryLineEdit.setText(query_text)
self.execute_query_from_results_window()
def on_query_completed(self, results, search_terms):
logging.debug(f"Query completed with {len(results)} results") # Debug statementself.queryProgressBar.setRange(0, 1)
self.total_data = results
self.search_terms = search_terms
self.loaded_data_count = 0
self.setup_table(search_terms)
self.apply_all_filters()
def setup_table(self, search_terms=[]):
# Set up the table columns and headers
self.tableWidget.setColumnCount(7)
self.tableWidget.setHorizontalHeaderLabels(['Distinct Entity', 'Entity Type', 'File Name', 'Line Number', 'Timestamp', 'Context', 'Match Score'])
highlight_delegate = HighlightDelegate(self, search_terms)
self.tableWidget.setItemDelegateForColumn(0, highlight_delegate)
self.tableWidget.setItemDelegateForColumn(1, highlight_delegate)
self.tableWidget.setItemDelegateForColumn(3, highlight_delegate)
# Apply column widths
self.adjust_column_widths()
# Disable sorting when initially populating data
self.tableWidget.setSortingEnabled(False)
# Load initial subset of data
self.load_more_data()
# Enable sorting by 'Match Score' after data is populated
self.tableWidget.setSortingEnabled(True)
self.tableWidget.sortItems(6, Qt.DescendingOrder)
def add_table_row(self, row_index, result, score):
self.tableWidget.insertRow(row_index)
# Distinct Entity with highlighting
distinct_entity_item = QTableWidgetItem(str(result[0]))
self.tableWidget.setItem(row_index, 0, distinct_entity_item)
# Entity Type
entity_type_item = QTableWidgetItem(str(result[1]))
self.tableWidget.setItem(row_index, 1, entity_type_item)
# File Name - using CellWidget
file_name_widget = CellWidget(str(result[2]), self.filterWidgets[1], self.search_terms)
self.tableWidget.setCellWidget(row_index, 2, file_name_widget)
file_name_item = QTableWidgetItem()
file_name_item.setData(Qt.UserRole, str(result[2]))
self.tableWidget.setItem(row_index, 2, file_name_item)
# Line Number
line_number_item = QTableWidgetItem(str(result[3]))
self.tableWidget.setItem(row_index, 3, line_number_item)
# Timestamp - using CellWidget
timestamp_widget = CellWidget(str(result[4]), self.filterWidgets[3], self.search_terms)
self.tableWidget.setCellWidget(row_index, 4, timestamp_widget)
timestamp_item = QTableWidgetItem()
timestamp_item.setData(Qt.UserRole, str(result[4]))
self.tableWidget.setItem(row_index, 4, timestamp_item)
# Context - using ScrollableTextWidget
scrollable_widget = ScrollableTextWidget(result[5], self.search_terms, str(result[0]))
self.tableWidget.setCellWidget(row_index, 5, scrollable_widget)
# Match Score
match_score_item = NumericTableWidgetItem("{:.4f}".format(float(score)))
self.tableWidget.setItem(row_index, 6, match_score_item)
# Apply highlight delegate if needed
highlight_delegate = HighlightDelegate(self, self.search_terms)
self.tableWidget.setItemDelegateForRow(row_index, highlight_delegate)
# Restore sorting, if it was enabled
self.tableWidget.setSortingEnabled(True)
# Check if total rows exceed 100 and remove the lowest 20% if so
if self.tableWidget.rowCount() > 500:
self.remove_lowest_scoring_rows(10) # 20% to be removed
def load_more_data(self):
if not self.is_new_data_available():
return # No new data available, just return
start_index = self.loaded_data_count
chunk_size = 50 # Adjust this number based on performance
end_index = min(start_index + chunk_size, len(self.total_data))
# Calculate the average match score of the current items
average_score = self.calculate_average_score()
# Sort the chunk by match score in descending order
sorted_chunk = sorted(self.total_data[start_index:end_index], key=lambda x: x[1], reverse=True)
for row_data in sorted_chunk:
score = row_data[1]
if score > average_score:
row_index = start_index + len(sorted_chunk) # Adjust index based on the sorted chunk
if self.matches_current_filters(row_index, row_data):
self.insert_row_in_sorted_order(row_data)
# Reapply filters after loading new data
self.apply_all_filters()
# Update loaded_data_count or other mechanism to keep track of processed data
self.update_data_tracking(end_index)
self.tableWidget.update() # Refresh the table
def remove_lowest_scoring_rows(self, percentage):
total_rows = self.tableWidget.rowCount()
rows_to_remove = total_rows * percentage // 100
# Collect scores and associated row indices
score_rows = []
for row in range(total_rows):
score_item = self.tableWidget.item(row, 6) # Assuming column 6 is Match Score
if score_item:
score_rows.append((float(score_item.text()), row))
# Sort by scores (ascending) and select the lowest ones
score_rows.sort(key=lambda x: x[0])
lowest_score_rows = score_rows[:rows_to_remove]
# Remove rows with the lowest scores
for _, row in sorted(lowest_score_rows, key=lambda x: x[1], reverse=True):
self.tableWidget.removeRow(row)
def is_new_data_available(self):
return self.loaded_data_count < len(self.total_data)
def calculate_average_score(self):
total_score = 0
row_count = self.tableWidget.rowCount()
for row_index in range(row_count):
score_item = self.tableWidget.item(row_index, 6) # Assuming column 6 is Match Score
total_score += float(score_item.text()) if score_item else 0
return total_score / row_count if row_count > 0 else 0
def update_data_tracking(self, end_index):
# Update loaded_data_count or implement other mechanism to keep track of processed data
self.loaded_data_count = end_index
def insert_row_in_sorted_order(self, row_data):
row_index = 0
score = row_data[1]
# Find the correct position based on match score
while row_index < self.tableWidget.rowCount():
current_score_item = self.tableWidget.item(row_index, 6) # Assuming column 6 is Match Score
current_score = float(current_score_item.text()) if current_score_item else 0
if score > current_score:
break
row_index += 1
self.add_table_row(row_index, row_data[0], score)
def matches_current_filters(self, row_index, row_data):
for column, filter_text in self.current_filters.items():
if not self.is_match(row_index, column, filter_text, row_data):
return False
return True
def is_match(self, row_index, column, filter_text, row_data):
# Extract text from the cell or widget
widget = self.tableWidget.cellWidget(row_index, column)
if isinstance(widget, CellWidget):
# CellWidget contains a QLabel with HTML-formatted text
document = QTextDocument()
document.setHtml(widget.label.text())
text = document.toPlainText()
elif isinstance(widget, ScrollableTextWidget):
# ScrollableTextWidget contains a QTextEdit with HTML-formatted text
text = widget.text_edit.toPlainText()
else:
# Standard QTableWidgetItem
item = self.tableWidget.item(row_index, column)
text = item.text() if item else ""
# Compare the extracted plain text with the filter text
return filter_text.lower() in text.lower()
def apply_filter(self, text, column):
self.current_filters[column] = text.lower()
self.apply_all_filters()
def extract_row_data(self, row_index):
# Construct row_data from the table content
row_data = []
for column in range(self.tableWidget.columnCount()):
cell_data = self.get_cell_data(row_index, column)
row_data.append(cell_data)
return row_data
def get_cell_data(self, row_index, column):
widget = self.tableWidget.cellWidget(row_index, column)
if isinstance(widget, CellWidget):
document = QTextDocument()
document.setHtml(widget.label.text())
return document.toPlainText()
elif isinstance(widget, ScrollableTextWidget):
return widget.text_edit.toPlainText()
else:
item = self.tableWidget.item(row_index, column)
return item.text() if item else ""
def apply_all_filters(self):
for row_index in range(self.tableWidget.rowCount()):
row_data = self.extract_row_data(row_index)
if self.matches_current_filters(row_index, row_data):
self.tableWidget.showRow(row_index)
else:
self.tableWidget.hideRow(row_index)
def filter_by_entity_type(self):
selected_type = self.entityTypeComboBox.currentData()
#logging.debug(f"Filtering by entity type: {selected_type}")
# Update the current filters dictionary
entity_type_column = COLUMN_NAMES.index('Entity Type') # Assuming 'Entity Type' is one of the column names
if selected_type is None:
# Clear the filter for entity type if 'All Entity Types' is selected
if entity_type_column in self.current_filters:
del self.current_filters[entity_type_column]
else:
# Set the filter for entity type
self.current_filters[entity_type_column] = selected_type.lower()
# Reapply all filters including the entity type filter
self.apply_all_filters()
def on_filter_change(self):
# Reapply all filters
self.apply_all_filters()
def clear_all_filters(self):
for filter_widget in self.filterWidgets:
filter_widget.clear()
self.current_filters.clear() # Clear all filters
#logging.debug("All filters cleared")
for row in range(self.tableWidget.rowCount()):
self.tableWidget.showRow(row) # Show all rows
# Optionally reapply entity type filter if it should be independent
self.filter_by_entity_type()
@staticmethod
def strip_html_tags(text):
return re.sub('<[^<]+?>', '', text)
class QueryLineEdit(QLineEdit):
returnPressed = pyqtSignal()
def keyPressEvent(self, event):
if event.key() == Qt.Key_Return:
self.returnPressed.emit()
else:
super().keyPressEvent(event)
class HighlightDelegate(QStyledItemDelegate):
def __init__(self, parent=None, search_terms=None):
super().__init__(parent)
self.search_terms = search_terms or []
def paint(self, painter, option, index):
painter.save()
# Set text color and other options
options = QTextOption()
options.setWrapMode(QTextOption.WrapAtWordBoundaryOrAnywhere)
document = QTextDocument()
document.setDefaultTextOption(options)
document.setDefaultFont(option.font)
# Prepare highlighted text
text = index.model().data(index)
highlighted_text = self.get_highlighted_text(text)
document.setHtml(highlighted_text)
# Set the width of the document to the cell width
document.setTextWidth(option.rect.width())
# Draw the contents
painter.translate(option.rect.topLeft())
document.drawContents(painter)
painter.restore()
def get_highlighted_text(self, text):
if text is None:
text = ""
text_with_color = f"<span style='color: white;'>{text}</span>"
for term in self.search_terms:
# Retain the '+' at the beginning and strip other special characters
is_positive = term.startswith('+')
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-')).lower()
if is_positive and clean_term.lower() in text.lower():
# Use regex for case-insensitive search and replace
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
text_with_color = regex.sub(highlighted_term, text_with_color)
return text_with_color.replace("\n", "<br>")
class ScrollableTextWidget(QWidget):
def __init__(self, text, search_terms, distinct_entity, parent=None):
super().__init__(parent)
layout = QVBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
self.text_edit = CustomTextEdit(self)
self.text_edit.setReadOnly(True)
# Apply styles including scrollbar styles
self.text_edit.setStyleSheet("""
QTextEdit {
background-color: #2A2F35; /* Dark blue-ish background */
color: white; /* White text */
}
QTextEdit QScrollBar:vertical {
border: none;
background-color: #3A3F44; /* Dark scrollbar background */
width: 8px; /* Width of the scrollbar */
}
QTextEdit QScrollBar::handle:vertical {
background-color: #6E6E6E; /* Scroll handle color */
border-radius: 4px; /* Rounded corners for the handle */
}
QTextEdit QScrollBar::add-line:vertical, QTextEdit QScrollBar::sub-line:vertical {
background: none;
}
""")
# Set the text with highlighting
self.setHighlightedText(text, search_terms, distinct_entity)
layout.addWidget(self.text_edit)
# Scroll to the distinct entity
self.scroll_to_text(distinct_entity)
def setHighlightedText(self, text, search_terms, distinct_entity):
if text is None:
text = ""
# Wrap the original text in a span to maintain color
text_with_color = f"<span style='color: white;'>{text}</span>"
# Highlight distinct entity in a different color
if distinct_entity:
distinct_entity_escaped = html.escape(distinct_entity)
text_with_color = re.sub(
re.escape(distinct_entity_escaped),
lambda match: f"<span style='background-color: blue; color: white;'>{match.group()}</span>",
text_with_color,
flags=re.IGNORECASE
)
for term in search_terms:
# Check if the term starts with '+'
is_positive = term.startswith('+')
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-'))
# If the term starts with '+', highlight all matches regardless of case
if is_positive or clean_term.lower() in text.lower():
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
text_with_color = regex.sub(highlighted_term, text_with_color)
self.text_edit.setHtml(text_with_color.replace("\n", "<br>"))
def scroll_to_text(self, text):
if text:
cursor = self.text_edit.document().find(text)
self.text_edit.setTextCursor(cursor)
class CustomTextEdit(QTextEdit):
def __init__(self, parent=None):
super().__init__(parent)
self.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) # Enable vertical scrollbar as needed
def wheelEvent(self, event):
# Always handle the wheel event within QTextEdit
super().wheelEvent(event)
# Stop propagation of the event to parent
if self.verticalScrollBar().isVisible():
event.accept()
else:
event.ignore()
class CellWidget(QWidget):
def __init__(self, text, filter_edit, search_terms, parent=None):
super(CellWidget, self).__init__(parent)
self.layout = QHBoxLayout(self)
self.label = QLabel(text)
self.setHighlightedText(text, search_terms)
self.button = QPushButton()
icon = self.button.style().standardIcon(QStyle.SP_CommandLink) # Example of a standard icon
self.button.setIcon(icon)
self.button.setFixedSize(20, 20) # Adjust size as needed
self.button.clicked.connect(lambda: filter_edit.setText(text))
self.layout.addWidget(self.label)
self.layout.addWidget(self.button)
self.layout.setContentsMargins(0, 0, 0, 0)
self.setLayout(self.layout)
def setHighlightedText(self, text, search_terms):
if text is None:
text = ""
# Wrap the original text in a span to maintain color
text_with_color = f"<span style='color: white;'>{text}</span>"
for term in search_terms:
# Strip leading operands (+ or -) and special characters
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-'))
# Use regex for case-insensitive search and replace
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
text_with_color = regex.sub(highlighted_term, text_with_color)
self.label.setText(text_with_color)
class NumericTableWidgetItem(QTableWidgetItem):
def __lt__(self, other):
return float(self.text()) < float(other.text())

View File

@ -0,0 +1,228 @@
from sqlalchemy import or_, String
from logline_leviathan.database.database_manager import get_db_session, EntitiesTable, DistinctEntitiesTable, EntityTypesTable, ContextTable, FileMetadata, session_scope
from PyQt5.QtCore import pyqtSignal, QThread, pyqtSignal
from fuzzywuzzy import fuzz
import re
class QueryThread(QThread):
queryCompleted = pyqtSignal(dict) # Signal to indicate completion with a dictionary
def __init__(self, db_query_instance, query_text):
super(QueryThread, self).__init__()
self.db_query_instance = db_query_instance
self.query_text = query_text
def run(self):
base_query, search_terms = self.db_query_instance.prepare_query(self.query_text)
query_lambda = self.db_query_instance.parse_query(self.query_text)
results = base_query.filter(query_lambda).all()
# Calculate scored results and create a dictionary with entities_id as keys
scored_results = {result.entities_id: self.db_query_instance.calculate_match_score(result, self.query_text) for result in results}
self.queryCompleted.emit(scored_results)
class DatabaseGUIQuery:
def __init__(self):
self.db_session = get_db_session()
self.entity_types = EntityTypesTable
self.entities = EntitiesTable
self.distinct_entities = DistinctEntitiesTable
self.context = ContextTable
self.file_metadata = FileMetadata
def parse_query(self, query):
if not query.strip():
return lambda _: False
# Extract quoted and unquoted parts
quoted_parts = re.findall(r'"([^"]+)"', query)
unquoted_parts = re.split(r'"[^"]+"', query)
# Process unquoted parts (case-insensitive)
unquoted_tokens = []
for part in unquoted_parts:
unquoted_tokens.extend(re.findall(r'\S+', part))
filters = []
# Handling unquoted parts with 'ilike' for case-insensitive search
for token in unquoted_tokens:
search_condition = f'%{token.replace("*", "%")}%'
filters.append(
or_(
self.distinct_entities.distinct_entity.ilike(search_condition),
self.entity_types.entity_type.ilike(search_condition),
self.entity_types.gui_name.ilike(search_condition),
self.entity_types.gui_tooltip.ilike(search_condition),
self.file_metadata.file_name.ilike(search_condition),
self.file_metadata.file_path.ilike(search_condition),
self.file_metadata.file_mimetype.ilike(search_condition),
self.entities.line_number.cast(String).ilike(search_condition),
self.context.context_large.ilike(search_condition)
# ... [add other fields for ilike search]
)
)
# Handling quoted parts with 'like' for case-sensitive exact match
for token in quoted_parts:
exact_condition = f'%{token}%'
filters.append(
or_(
self.distinct_entities.distinct_entity.like(exact_condition),
self.entity_types.entity_type.like(exact_condition),
self.entity_types.gui_name.like(exact_condition),
self.entity_types.gui_tooltip.like(exact_condition),
self.file_metadata.file_name.like(exact_condition),
self.file_metadata.file_path.like(exact_condition),
self.file_metadata.file_mimetype.like(exact_condition),
self.entities.line_number.cast(String).like(exact_condition),
self.context.context_large.like(exact_condition)
# ... [add other fields for exact match search]
)
)
return lambda: or_(*filters)
def parse_search_terms(self, query):
tokens = query.split()
search_terms = [token.lstrip('+-') for token in tokens if not token.startswith('-') and not token.startswith('+')]
return search_terms
def prepare_query(self, query):
search_terms = self.parse_search_terms(query)
# Construct the base query with proper joins
base_query = self.db_session.query(
self.distinct_entities.distinct_entity,
self.entity_types.gui_name,
self.file_metadata.file_name,
self.entities.line_number,
self.entities.entry_timestamp,
self.context.context_large,
self.entities.flag,
self.entities.entities_id
).join(
self.entities, self.distinct_entities.distinct_entities_id == self.entities.distinct_entities_id
).join(
self.file_metadata, self.entities.file_id == self.file_metadata.file_id
).join(
self.context, self.entities.entities_id == self.context.entities_id
).join(
self.entity_types, self.entities.entity_types_id == self.entity_types.entity_type_id
).distinct()
# Apply filters and return results
return base_query, search_terms
def calculate_match_score(self, result, query):
# Adjusted weights and thresholds
distinct_entity_weight = 4
file_name_weight = 4
timestamp_weight = 1
line_number_weight = 1
context_weight = 5
multiple_term_weight = 1
order_weight = 8 # Increased weight for exact order of terms
fuzzy_match_weight = 0.3 # More discerning fuzzy match
threshold_for_fuzzy = 90 # Higher threshold for fuzzy matches
proximity_weight = 2 # Increased weight for proximity
positive_operand_weight = 10 # Weight for terms with '+'
negative_operand_penalty = -5 # Penalty for terms with '-'
exact_match_weight = 10 # Increased weight for exact sequence match
score = 0
# Extracting operands and terms
tokens = re.findall(r'"[^"]+"|\S+', query)
processed_terms = [(token.startswith('+'), token.startswith('-'), token.strip('+-"').lower()) for token in tokens]
# Normalize result fields
lower_distinct_entity = result.distinct_entity.lower()
lower_file_name = result.file_name.lower()
timestamp_str = str(result.entry_timestamp).lower()
line_number_str = str(result.line_number).lower()
words_in_context = result.context_large.lower().split()
# Check matches in various fields with operand consideration
for is_positive, is_negative, term in processed_terms:
if term in lower_distinct_entity:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else distinct_entity_weight)
if term in lower_file_name:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else file_name_weight)
if term in timestamp_str:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else timestamp_weight)
if term in line_number_str:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else line_number_weight)
if term in words_in_context:
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else context_weight)
# Creating a cleaned substring of search terms in the exact order they appear in the query
exact_terms_substring = ' '.join([token.strip('+-"').lower() for token in tokens])
# Check for exact order of terms in the context
if exact_terms_substring and exact_terms_substring in ' '.join(words_in_context):
score += exact_match_weight
# Check for exact order of terms
if '"' in query:
exact_query = ' '.join(term for _, _, term in processed_terms)
if exact_query in ' '.join(words_in_context):
score += order_weight
# Additional weight for multiple different terms
unique_terms = set(term for _, _, term in processed_terms)
score += len(unique_terms) * multiple_term_weight
# Proximity score calculation
for _, _, term in processed_terms:
if term in words_in_context:
# Find the positions of the term and the entity in the context
term_pos = words_in_context.index(term)
entity_pos = words_in_context.index(lower_distinct_entity) if lower_distinct_entity in words_in_context else 0
# Calculate the distance and adjust the score
distance = abs(term_pos - entity_pos)
proximity_score = max(0, proximity_weight - distance * 0.01) # Reduce score based on distance
score += proximity_score
# Fuzzy matching
all_text = f"{result.distinct_entity} {result.file_name} {result.entry_timestamp} {result.line_number} {result.context_large}".lower()
for _, _, term in processed_terms:
fuzzy_score = max(fuzz.partial_ratio(term, word) for word in all_text.split())
if fuzzy_score > threshold_for_fuzzy:
score += (fuzzy_score / 100) * fuzzy_match_weight
# Normalize the score
max_possible_positive_score = (
distinct_entity_weight + file_name_weight +
timestamp_weight + line_number_weight +
context_weight * len(processed_terms) + # Assuming each term can match in the context
order_weight + exact_match_weight +
len(processed_terms) * multiple_term_weight + # Each term contributes to multiple_term_weight
len(processed_terms) * positive_operand_weight # Each term could have a positive operand
)
# Considering the negative operand penalty
max_possible_negative_score = len(processed_terms) * negative_operand_penalty
# The maximum score is the sum of the possible positive score and the absolute value of the possible negative score
max_possible_score = max_possible_positive_score + abs(max_possible_negative_score)
# Normalizing the score to a scale of 100
score = (score / max_possible_score) * 100
return score
def get_entity_types(self):
with session_scope() as session:
# Query to filter entity types that have either regex_pattern or script_parser
return [entity_type.gui_name for entity_type in session.query(EntityTypesTable)
.filter(or_(EntityTypesTable.regex_pattern.isnot(None),
EntityTypesTable.script_parser.isnot(None)))
.all()]

View File

View File

@ -0,0 +1,115 @@
import logging
import pandas as pd
from datetime import datetime as dt
from PyQt5.QtCore import Qt
from sqlalchemy import func, cast, String, distinct
from logline_leviathan.database.database_manager import ContextTable, EntityTypesTable, DistinctEntitiesTable, EntitiesTable, FileMetadata
def generate_dataframe(db_session, tree_items, file_items, context_selection, only_crossmatches=False, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
if not db_session:
raise ValueError("Database session is None")
all_data = [] # List to accumulate data from all entity types
# Extract entity_type from selected tree items
selected_entity_types = [item.entity_type for item in tree_items if item.checkState(0) == Qt.Checked]
checked_files = [item for item in file_items.getCheckedFiles()]
logging.debug(f"Generating dataframe, selected entity types: {selected_entity_types}, passed timestamp range: {start_date} - {end_date}")
context_field = {
'Kompakte Zusammenfassung ohne Kontext': None,
'Kontext - gleiche Zeile': ContextTable.context_small,
'Kontext - mittelgroß': ContextTable.context_medium,
'Kontext - umfangreich': ContextTable.context_large
}.get(context_selection)
# Convert start_date and end_date to datetime objects if they are not None
if start_date and end_date:
start_datetime = dt.combine(start_date, dt.min.time())
end_datetime = dt.combine(end_date, dt.max.time())
# Creating a subquery to count distinct file IDs
file_count_subquery = db_session.query(
EntitiesTable.distinct_entities_id,
func.count(distinct(EntitiesTable.file_id)).label('file_count')
).group_by(EntitiesTable.distinct_entities_id)
if only_crossmatches:
file_count_subquery = file_count_subquery.having(func.count(distinct(EntitiesTable.file_id)) > 1)
file_count_subquery = file_count_subquery.subquery()
for entity_type in selected_entity_types:
if context_selection == 'Kompakte Zusammenfassung ohne Kontext':
query = db_session.query(
EntityTypesTable.entity_type,
DistinctEntitiesTable.distinct_entity,
func.count(EntitiesTable.entities_id).label('occurrences'),
func.group_concat(
FileMetadata.file_name + ':line' + cast(EntitiesTable.line_number, String)
).label('sources'),
func.group_concat(
cast(EntitiesTable.entry_timestamp, String)
).label('timestamps')
).join(EntityTypesTable, DistinctEntitiesTable.entity_types_id == EntityTypesTable.entity_type_id
).join(EntitiesTable, DistinctEntitiesTable.distinct_entities_id == EntitiesTable.distinct_entities_id
).join(FileMetadata, EntitiesTable.file_id == FileMetadata.file_id
).join(file_count_subquery, DistinctEntitiesTable.distinct_entities_id == file_count_subquery.c.distinct_entities_id
).filter(EntityTypesTable.entity_type == entity_type
).group_by(DistinctEntitiesTable.distinct_entity)
# Apply timestamp filter if start_date and end_date are provided
if start_date and end_date:
query = query.filter(EntitiesTable.entry_timestamp.between(start_datetime, end_datetime))
if checked_files:
query = query.filter(FileMetadata.file_name.in_(checked_files))
if include_flagged:
if only_flagged:
query = query.filter(EntitiesTable.flag == True)
elif only_unflagged:
query = query.filter(EntitiesTable.flag == False)
for row in query.all():
sources = row[3].replace(',', ' // ') if row[3] is not None else ''
timestamps = row[4].replace(',', ' // ') if row[4] is not None else ''
all_data.append([row[0], row[1], row[2], timestamps, sources, ''])
else:
query = db_session.query(
EntityTypesTable.entity_type,
DistinctEntitiesTable.distinct_entity,
func.count(EntitiesTable.entities_id).over(partition_by=DistinctEntitiesTable.distinct_entity).label('occurrences'),
FileMetadata.file_name,
EntitiesTable.line_number,
context_field,
EntitiesTable.entry_timestamp
).select_from(EntitiesTable
).join(DistinctEntitiesTable, EntitiesTable.distinct_entities_id == DistinctEntitiesTable.distinct_entities_id
).join(EntityTypesTable, DistinctEntitiesTable.entity_types_id == EntityTypesTable.entity_type_id
).join(FileMetadata, EntitiesTable.file_id == FileMetadata.file_id
).outerjoin(ContextTable, EntitiesTable.entities_id == ContextTable.entities_id
).join(file_count_subquery, DistinctEntitiesTable.distinct_entities_id == file_count_subquery.c.distinct_entities_id
).filter(EntityTypesTable.entity_type == entity_type)
# Apply timestamp filter if start_date and end_date are provided
if start_date and end_date:
query = query.filter(EntitiesTable.entry_timestamp.between(start_datetime, end_datetime))
if checked_files:
query = query.filter(FileMetadata.file_name.in_(checked_files))
if include_flagged:
if only_flagged:
query = query.filter(EntitiesTable.flag == True)
elif only_unflagged:
query = query.filter(EntitiesTable.flag == False)
for row in query.all():
file_name = row[3]
line_number = row[4]
entry_timestamp = row[6].strftime('%Y-%m-%d %H:%M:%S') if row[6] is not None else ''
context_info = row[5] if row[5] is not None else ''
all_data.append([row[0], row[1], row[2], entry_timestamp, file_name, line_number, context_info])
# Define the columns for the DataFrame based on context_selection
columns = ["Entity Type", "Entity", "Occurrences", "Timestamp", "Sources", "Context"] if context_selection == 'Kompakte Zusammenfassung ohne Kontext' else ["Entity Type", "Entity", "Occurrences", "Timestamp", "Source File", "Line Number", "Context"]
# Construct and return the DataFrame from all accumulated data
return pd.DataFrame(all_data, columns=columns)

View File

@ -0,0 +1,71 @@
from logline_leviathan.exporter.export_constructor import generate_dataframe
import re
import pandas as pd
def create_regex_pattern_from_entity(entity):
words = entity.split()
regex_pattern = "|".join(re.escape(word) for word in words)
return re.compile(regex_pattern, re.IGNORECASE)
def highlight_entities_in_context(context, entity_regex):
def replace_match(match):
return f"<mark>{match.group()}</mark>"
return re.sub(entity_regex, replace_match, context)
def generate_html_file(output_file_path, db_session, checkboxes, files, context_selection, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
# Fetch data using the new DataFrame constructor
df = generate_dataframe(db_session, checkboxes, files, context_selection, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
# Add line breaks for HTML formatting where needed
if context_selection == 'Kompakte Zusammenfassung ohne Kontext':
df['Sources'] = df['Sources'].apply(lambda x: x.replace(' // ', ' // <br>'))
df['Timestamp'] = df['Timestamp'].apply(lambda x: x.replace(' // ', ' // <br>'))
# Iterate over the DataFrame to apply regex-based highlighting
for index, row in df.iterrows():
entity_regex = create_regex_pattern_from_entity(row['Entity'])
df.at[index, 'Context'] = highlight_entities_in_context(row['Context'], entity_regex)
# Replace newline characters with HTML line breaks in the 'Context' column
df['Context'] = df['Context'].apply(lambda x: x.replace('\n', '<br>') if x else x)
# Convert DataFrame to HTML table
html_table = df.to_html(classes="table table-bordered", escape=False, index=False)
html_template = f"""
<!DOCTYPE html>
<html>
<head>
<title>Logline Leviathan Report</title>
<style>
.table {{
width: 100%;
max-width: 100%;
margin-bottom: 1rem;
background-color: transparent;
}}
.table th, .table td {{
padding: 0.75rem;
vertical-align: top;
border-top: 1px solid #dee2e6;
max-width: 300px; /* Max width */
word-wrap: break-word; /* Enable word wrapping */
}}
.table-bordered {{
border: 1px solid #dee2e6;
}}
.table-bordered th, .table-bordered td {{
border: 1px solid #dee2e6;
}}
</style>
</head>
<body>
{html_table}
</body>
</html>"""
# Write the HTML template to the file
with open(output_file_path, 'w', encoding='utf-8') as file:
file.write(html_template)

View File

@ -0,0 +1,94 @@
import re
from logline_leviathan.exporter.export_constructor import generate_dataframe
def create_regex_pattern_from_entity(entity):
words = entity.split()
regex_pattern = "|".join(re.escape(word) for word in words)
return re.compile(regex_pattern, re.IGNORECASE)
def highlight_entities_in_context(context, entity_regex):
def replace_match(match):
return f"<mark>{match.group()}</mark>"
return re.sub(entity_regex, replace_match, context)
def generate_niceoutput_file(output_file_path, db_session, checkboxes, files, context_selection, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
# Fetch data using the new DataFrame constructor
df = generate_dataframe(db_session, checkboxes, files, context_selection, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
# Add line breaks for HTML formatting where needed
if context_selection == 'Kompakte Zusammenfassung ohne Kontext':
df['Sources'] = df['Sources'].apply(lambda x: x.replace(' // ', ' // <br>'))
df['Timestamp'] = df['Timestamp'].apply(lambda x: x.replace(' // ', ' // <br>'))
# Iterate over the DataFrame to apply regex-based highlighting
for index, row in df.iterrows():
entity_regex = create_regex_pattern_from_entity(row['Entity'])
df.at[index, 'Context'] = highlight_entities_in_context(row['Context'], entity_regex)
# Replace newline characters with HTML line breaks in the 'Context' column
df['Context'] = df['Context'].apply(lambda x: x.replace('\n', '<br>') if x else x)
# Convert DataFrame to HTML table
html_table = df.to_html(classes="display responsive nowrap", table_id="example", escape=False, index=False)
# HTML template with doubled curly braces in JavaScript part and additional configurations
html_template = """
<!DOCTYPE html>
<html>
<head>
<title>Logline Leviathan Report</title>
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.11.5/css/jquery.dataTables.min.css"/>
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/buttons/2.2.2/css/buttons.dataTables.min.css"/>
<script type="text/javascript" src="https://code.jquery.com/jquery-3.5.1.js"></script>
<script type="text/javascript" src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
<script type="text/javascript" src="https://cdn.datatables.net/buttons/2.2.2/js/dataTables.buttons.min.js"></script>
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.1.3/jszip.min.js"></script>
<script type="text/javascript" src="https://cdn.datatables.net/buttons/2.2.2/js/buttons.html5.min.js"></script>
<script type="text/javascript" src="https://cdn.datatables.net/buttons/2.2.2/js/buttons.print.min.js"></script>
</head>
<body>
{0}
<script type="text/javascript">
$(document).ready(function () {{
// DataTables initialization
var table = $('#example').DataTable({{
"dom": 'Blfrtip',
"buttons": ['copy', 'csv', 'excel', 'pdf', 'print'],
"searching": true,
"fixedHeader": true,
"autoWidth": false,
"lengthChange": true,
"pageLength": 10,
"orderCellsTop": true,
}});
// Create dropdown filtering menus
$('#example thead tr').clone(true).appendTo('#example thead');
$('#example thead tr:eq(1) th').each(function (i) {{
var title = $(this).text();
if (title === 'Entity Type' || title === 'Entity' || title === 'Occurrences' || title === 'Timestamp' || title === 'Sources' || title === 'Source File' || title === 'Line Number') {{
var select = $('<select><option value=""></option></select>')
.appendTo($(this).empty())
.on('change', function () {{
var val = $(this).val();
table.column(i)
.search(val ? '^' + $(this).val() + '$' : val, true, false)
.draw();
}});
table.column(i).data().unique().sort().each(function (d, j) {{
select.append('<option value="'+d+'">'+d+'</option>')
}});
}} else {{
$(this).html('');
}}
}});
}});
</script>
</body>
</html>""".format(html_table)
# Write the HTML template to the file
with open(output_file_path, 'w', encoding='utf-8') as file:
file.write(html_template)

View File

@ -0,0 +1,46 @@
from logline_leviathan.database.database_manager import ContextTable, EntityTypesTable, DistinctEntitiesTable, EntitiesTable, FileMetadata
from sqlalchemy import func, distinct
from PyQt5.QtCore import Qt
def generate_wordlist(output_file_path, db_session, checkboxes, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
# Check if there are any checkboxes selected
if not checkboxes:
raise ValueError("No entities selected")
# Get selected entity types from checkboxes
selected_entity_types = [item.entity_type for item in checkboxes if item.checkState(0) == Qt.Checked]
# Prepare the initial query with proper joins
query = db_session.query(
DistinctEntitiesTable.distinct_entity
).join(
EntitiesTable, DistinctEntitiesTable.distinct_entities_id == EntitiesTable.distinct_entities_id
).join(
EntityTypesTable, EntitiesTable.entity_types_id == EntityTypesTable.entity_type_id
).filter(
EntityTypesTable.entity_type.in_(selected_entity_types)
)
# Add timestamp filtering if necessary
if start_date and end_date:
query = query.filter(EntitiesTable.entry_timestamp.between(start_date, end_date))
# Handle crossmatches, flagged, and unflagged conditions
if only_crossmatches:
query = query.group_by(DistinctEntitiesTable.distinct_entity).having(func.count(distinct(EntitiesTable.file_id)) > 1)
if include_flagged:
if only_flagged:
query = query.filter(EntitiesTable.flag == True)
elif only_unflagged:
query = query.filter(EntitiesTable.flag == False)
# Execute the query and fetch all results
results = query.all()
# Write the results to the file
with open(output_file_path, 'w', encoding='utf-8') as file:
for result in results:
file.write(result.distinct_entity + '\n')

View File

@ -0,0 +1,47 @@
import pandas as pd
from logline_leviathan.exporter.export_constructor import generate_dataframe
def ensure_utf8(s):
if isinstance(s, str):
return s.encode('utf-8', errors='replace').decode('utf-8')
return s
def generate_xlsx_file(output_file_path, db_session, checkboxes, files, context_selection, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
# Fetch data using the new DataFrame constructor
df = generate_dataframe(db_session, checkboxes, files, context_selection, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
# Process context field
if 'Context' in df.columns:
df['Context'] = df['Context'].str.strip() # Trim whitespaces
df['Context'] = df['Context'].str.replace(r'[^\x00-\x7F]+', '', regex=True) # Remove non-ASCII characters
df['Context'] = df['Context'].apply(lambda x: x[:32767] if isinstance(x, str) else x) # Truncate to 32767 characters (Excel limit)
# Reorder columns based on whether 'Sources' or 'Source File' and 'Line Number' columns are in the DataFrame
if 'Sources' in df.columns:
df = df[["Entity Type", "Entity", "Occurrences", "Timestamp", "Sources", "Context"]]
elif 'Source File' in df.columns and 'Line Number' in df.columns:
df = df[["Entity Type", "Entity", "Occurrences", "Timestamp", "Source File", "Line Number", "Context"]]
# Apply ensure_utf8 to all string columns in df
for col in df.select_dtypes(include=[object]):
df[col] = df[col].apply(ensure_utf8)
# Using pandas.ExcelWriter
with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
for entity_type in df['Entity Type'].unique():
df_filtered = df[df['Entity Type'] == entity_type]
df_filtered.to_excel(writer, sheet_name=entity_type, index=False)
# Get the xlsxwriter workbook and worksheet objects.
worksheet = writer.sheets[entity_type]
# Set column width and enable text wrapping
for idx, col in enumerate(df_filtered.columns):
# Adjust the column width if necessary
worksheet.column_dimensions[chr(65 + idx)].width = 20 # 65 is ASCII for 'A'
# Set alignment if needed
# for row in worksheet.iter_rows(min_row=2, max_col=len(df_filtered.columns), max_row=len(df_filtered) + 1):
# for cell in row:
# cell.alignment = Alignment(wrap_text=True)
# The file is saved automatically using the with context

View File

@ -0,0 +1,88 @@
import logging
from docx import Document
from datetime import datetime
from logline_leviathan.file_processor.parser_thread import parse_content
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_distinct_entity, handle_context_snippet
import re
def read_docx_content(file_path):
try:
doc = Document(file_path)
full_content = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
return full_content.splitlines(True) # Keep end-of-line characters
except Exception as e:
logging.error(f"Error reading DOCX file {file_path}: {e}")
return None
def get_line_numbers_from_pos(content, start_pos, end_pos):
# This function is similar to the one for text and xlsx files
# Adjustments might be needed for the nuances of docx content structure
start_line = end_line = 0
current_pos = 0
for i, line in enumerate(content):
current_pos += len(line)
if start_pos < current_pos:
start_line = i
break
for i, line in enumerate(content[start_line:], start=start_line):
current_pos += len(line)
if end_pos <= current_pos:
end_line = i
break
return start_line, end_line
def process_docx_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
try:
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype)
content = read_docx_content(file_path)
full_content = ''.join(content) # Join all lines into a single string
thread_instance.update_status.emit(f" Verarbeite DOCX-Datei: {file_path}")
# Call the parser and get matches along with entity types
parsed_entities = parse_content(full_content, abort_flag, db_session)
entity_count = 0
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
if not match_text.strip():
continue
timestamp = find_timestamp_before_match(full_content, start_pos)
match_start_line, match_end_line = get_line_numbers_from_pos(content, start_pos, end_pos)
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
if individual_entity:
entity_count += 1
handle_context_snippet(db_session, individual_entity, content, match_start_line, match_end_line)
return entity_count
except Exception as e:
db_session.rollback()
logging.error(f"Error processing DOCX file {file_path}: {e}")
return 0
def find_timestamp_before_match(content, match_start_pos):
search_content = content[:match_start_pos]
timestamp_patterns = [
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
# Add more patterns as needed
]
for pattern, date_format in timestamp_patterns:
for timestamp_match in reversed(list(re.finditer(pattern, search_content))):
try:
# Convert the matched timestamp to the standardized format
matched_timestamp = datetime.strptime(timestamp_match.group(), date_format)
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
except ValueError:
continue # If conversion fails, continue to the next pattern
return None

View File

@ -0,0 +1,136 @@
import logging
import os
from logline_leviathan.database.database_manager import FileMetadata, DistinctEntitiesTable, EntitiesTable, ContextTable, session_scope
from datetime import datetime
def handle_file_metadata(db_session, file_path, file_mimetype, sheet_name=None):
#with session_scope() as db_session:
try:
# Construct file name with or without sheet name
base_file_name = os.path.basename(file_path)
modified_file_name = f"{base_file_name}_{sheet_name}" if sheet_name else base_file_name
# Search for existing metadata using the modified file name
file_metadata = db_session.query(FileMetadata).filter_by(file_path=file_path, file_name=modified_file_name).first()
if not file_metadata:
logging.debug(f"File metadata {file_metadata} does not exist.")
file_metadata = FileMetadata(file_name=modified_file_name, file_path=file_path, file_mimetype=file_mimetype)
db_session.add(file_metadata)
else:
logging.debug(f"File metadata {file_metadata} already exists.")
# Update the MIME type if the record already exists
file_metadata.file_mimetype = file_mimetype
logging.debug(f"Updated file mimetype: {file_metadata.file_mimetype}")
logging.debug(f"committing file metadata {file_metadata}")
db_session.commit()
return file_metadata
except Exception as e:
logging.error(f"Error handling file metadata for {file_path}: {e}")
return None
def handle_distinct_entity(db_session, match_text, entity_type_id):
#with session_scope() as db_session:
try:
distinct_entity = db_session.query(DistinctEntitiesTable).filter_by(distinct_entity=match_text, entity_types_id=entity_type_id).first()
if not distinct_entity:
logging.debug(f"Distinct entity {match_text} does not exist.")
distinct_entity = DistinctEntitiesTable(distinct_entity=match_text, entity_types_id=entity_type_id)
db_session.add(distinct_entity)
logging.debug(f"committing distinct entity {distinct_entity}")
db_session.commit()
else:
logging.debug(f"Distinct entity {distinct_entity} already exists.")
return distinct_entity
except Exception as e:
logging.error(f"Error handling distinct entity {match_text}: {e}")
return None
def handle_individual_entity(db_session, entity, file_metadata, line_number, timestamp, entity_types_id, abort_flag, thread_instance):
#with session_scope() as db_session:
try:
if abort_flag():
return None
if timestamp and isinstance(timestamp, str):
try:
timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
except ValueError:
logging.warning(f"Invalid timestamp format: {timestamp}")
timestamp = None
individual_entity = db_session.query(EntitiesTable).filter_by(
distinct_entities_id=entity.distinct_entities_id,
file_id=file_metadata.file_id,
line_number=line_number
).first()
if not individual_entity:
logging.debug(f"Individual entity {individual_entity} does not exist.")
individual_entity = EntitiesTable(
distinct_entities_id=entity.distinct_entities_id,
file_id=file_metadata.file_id,
line_number=line_number,
entry_timestamp=timestamp,
entity_types_id=entity_types_id
)
db_session.add(individual_entity)
logging.debug(f"committing individual entity {individual_entity}")
db_session.commit()
thread_instance.total_entities_count_lock.lock() # Lock the mutex
try:
thread_instance.total_entities_count += 1
finally:
thread_instance.total_entities_count_lock.unlock() # Unlock the mutex
thread_instance.calculate_and_emit_rate()
else:
logging.debug(f"Individual entity {individual_entity} already exists.")
return individual_entity
except Exception as e:
logging.error(f"Error handling individual entity in {file_metadata.file_path}, line {line_number}: {e}")
return None
#def count_newlines(content, start, end):
# return content[start:end].count('\n')
def handle_context_snippet(db_session, individual_entity, content, start_line, end_line):
#with session_scope() as db_session:
try:
context_sizes = {
'Kontext - gleiche Zeile': 0,
'Kontext - mittelgroß': 8,
'Kontext - umfangreich': 15
#'Index Context': 30
}
context_snippets = {}
for size, lines in context_sizes.items():
context_start = max(0, start_line - lines)
context_end = min(len(content), end_line + lines + 1)
context_snippets[size] = "\n".join(content[context_start:context_end])
# Check if a similar context already exists
existing_context = db_session.query(ContextTable).filter_by(entities_id=individual_entity.entities_id).first()
if not existing_context:
context = ContextTable(entities_id=individual_entity.entities_id,
context_small=context_snippets['Kontext - gleiche Zeile'],
context_medium=context_snippets['Kontext - mittelgroß'],
context_large=context_snippets['Kontext - umfangreich']
)
db_session.add(context)
logging.debug(f"committing context {context}")
db_session.commit()
else:
logging.debug(f"Existing context {existing_context} already exists.")
except Exception as e:
logging.error(f"Error handling context snippet: {e}")

View File

@ -0,0 +1,218 @@
from multiprocessing.spawn import import_main_path
import sys
import time
import os
from PyQt5.QtCore import QThread, pyqtSignal, QMutex
from logline_leviathan.database.database_manager import session_scope
from logline_leviathan.gui.checkbox_panel import CheckboxPanel
from .text_processor import process_text_file
from .xlsx_processor import process_xlsx_file
from .pdf_processor import process_pdf_file
from .docx_processor import process_docx_file
import magic
import logging
import pathlib
class FileProcessorThread(QThread):
update_progress = pyqtSignal(int)
update_status = pyqtSignal(str)
update_tree_signal = pyqtSignal()
update_checkboxes_signal = pyqtSignal()
update_rate = pyqtSignal(float, int, float, int, float, float)
last_update_time = 0
def __init__(self, file_paths):
super().__init__()
self.start_time = time.time()
self.total_entities_count = 0
self.total_entities_count_lock = QMutex()
self.abort_mutex = QMutex()
self.abort_flag = False
self.file_paths = file_paths
self.unsupported_files_count = 0
self.processed_files_count = 0
self.total_data_processed_kb = 0
self.total_files_size_kb = sum(os.path.getsize(f) / 1024 for f in file_paths)
self.unsupported_files_list = []
self.all_unsupported_files = []
self.checkbox_panel = CheckboxPanel()
@property
def abort_flag(self):
# This is the getter method for the property
self.abort_mutex.lock()
flag = self._abort_flag
self.abort_mutex.unlock()
return flag
@abort_flag.setter
def abort_flag(self, value):
# This is the setter method for the property
self.abort_mutex.lock()
self._abort_flag = value
self.abort_mutex.unlock()
def classify_file_type(self, file_path):
# Mapping of file extensions to MIME types
mime_types = {
'.txt': 'text/plain',
'.pdf': 'application/pdf',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.csv': 'text/csv',
'.html': 'text/html',
'.htm': 'text/html',
'.xml': 'text/xml',
'.json': 'application/json',
'.yaml': 'text/yaml',
'.yml': 'text/yaml',
'.md': 'text/markdown',
'.rtf': 'application/rtf',
'.odt': 'application/vnd.oasis.opendocument.text',
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
'.odp': 'application/vnd.oasis.opendocument.presentation',
'.log': 'text/plain',
'.ini': 'text/plain',
'.conf': 'text/plain',
'.cfg': 'text/plain',
'.js': 'application/javascript',
'.css': 'text/css',
'.php': 'text/php',
'.py': 'text/x-python',
'.rb': 'text/x-ruby',
'.java': 'text/x-java-source',
'.c': 'text/x-c',
'.cpp': 'text/x-c++',
'.h': 'text/x-c-header',
'.hpp': 'text/x-c++-header',
'.sh': 'application/x-sh',
'.bat': 'application/x-bat',
'.ps1': 'application/x-powershell',
'.sql': 'text/x-sql',
# Add more mappings as needed
}
try:
mime = magic.Magic(mime=True)
file_type = mime.from_file(file_path)
return file_type
except FileNotFoundError as e:
logging.error(f"File not found: {file_path}. Encoding: {sys.getfilesystemencoding()}", exc_info=True)
except Exception as e:
try:
clean_file_path = pathlib.Path(file_path)
mime = magic.Magic(mime=True)
file_type = mime.from_file(clean_file_path)
return file_type
except Exception as e:
logging.error(f"The magic library failed classifying the file type: {e} // falling back to file extension")
_, file_extension = os.path.splitext(file_path)
return mime_types.get(file_extension.lower(), 'application/octet-stream') # Default to binary type if unknown
def run(self):
logging.debug("Thread run method started.")
try:
for index, file_path in enumerate(self.file_paths):
#if not self.debugFileProcessor(file_path):
# continue
file_size_kb = os.path.getsize(file_path) / 1024 # Get file size in KiB
self.total_data_processed_kb += file_size_kb
if self.abort_flag:
self.update_status.emit("Analyse abgebrochen")
return
logging.debug(f"Attempting to process file: {file_path}")
file_type = self.classify_file_type(file_path)
logging.info(f"ANALYZING {file_path} TYPE {file_type}")
with session_scope() as session:
if 'text/' in file_type:
process_text_file(file_path, file_type, self, session, lambda: self.abort_flag)
elif 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' in file_type:
process_xlsx_file(file_path, file_type, self, session, lambda: self.abort_flag)
elif 'application/pdf' in file_type or file_type == ".pdf":
process_pdf_file(file_path, file_type, self, session, lambda: self.abort_flag)
elif 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' in file_type:
# Handling DOCX file
process_docx_file(file_path, file_type, self, session, lambda: self.abort_flag)
else:
logging.info(f"Skipping unsupported file type: {file_type}")
self.all_unsupported_files.append(file_path)
self.unsupported_files_count += 1
if len(self.unsupported_files_list) < 20:
self.unsupported_files_list.append(f"{file_path} (Type: {file_type})")
continue
self.update_tree_signal.emit()
self.update_checkboxes_signal.emit()
self.processed_files_count = index + 1
self.update_progress.emit(index + 1)
self.update_status.emit(f" Verarbeitung abgeschlossen. {index + 1 - self.unsupported_files_count} von {len(self.file_paths)} Dateien verarbeitet.")
except Exception as e:
logging.error(f"Error processing files: {e}")
self.update_status.emit(f"Fehler beim Verarbeiten von Dateien {e}", exc_info=True)
def calculate_and_emit_rate(self):
current_time = time.time()
if current_time - self.last_update_time >= 1: # Check if 1 second has passed
entity_rate = self.calculate_rate()
file_rate = self.calculate_file_rate()
data_rate_kibs = self.calculate_data_rate()
estimated_time = self.calculate_estimated_time_to_completion(data_rate_kibs)
self.update_rate.emit(entity_rate, self.total_entities_count, file_rate, self.processed_files_count, estimated_time, data_rate_kibs)
self.last_update_time = current_time
def calculate_data_rate(self):
elapsed_time = time.time() - self.start_time
return self.total_data_processed_kb / elapsed_time if elapsed_time > 0 else 0
def calculate_estimated_time_to_completion(self, data_rate_kibs):
remaining_data_kb = self.total_files_size_kb - self.total_data_processed_kb
if data_rate_kibs > 0:
estimated_time = remaining_data_kb / data_rate_kibs
else:
estimated_time = float('inf') # Indefinite time if rate is zero
return estimated_time
def calculate_file_rate(self):
elapsed_time = time.time() - self.start_time
return self.processed_files_count / elapsed_time if elapsed_time > 0 else 0
def calculate_rate(self):
elapsed_time = time.time() - self.start_time
rate = self.total_entities_count / elapsed_time if elapsed_time > 0 else 0
return rate
def abort(self):
self.abort_flag = True
def getUnsupportedFilesCount(self):
return self.unsupported_files_count
def getUnsupportedFilesList(self):
return self.unsupported_files_list
def debugFileProcessor(self, file_path):
logging.debug(f"Attempting to process file: {file_path}")
if not os.path.exists(file_path):
logging.warning(f"File does not exist: {file_path}")
return False
elif not os.access(file_path, os.R_OK):
logging.warning(f"File is not accessible: {file_path}")
return False
try:
detected_encoding = magic.from_file(file_path, mime=True)
logging.debug(f"Detected encoding for {file_path}: {detected_encoding}")
except Exception as e:
logging.error(f"Failed to detect encoding for {file_path}: {e}", exc_info=True)
file_type = self.classify_file_type(file_path)
logging.debug(f"Classified file type for {file_path}: {file_type}")
return True

View File

@ -0,0 +1,240 @@
from multiprocessing.spawn import import_main_path
import sys
import time
import os
from PyQt5.QtCore import QThread, pyqtSignal, QMutex
from logline_leviathan.database.database_manager import session_scope
from logline_leviathan.gui.checkbox_panel import CheckboxPanel
from .text_processor import process_text_file
from .xlsx_processor import process_xlsx_file
from .pdf_processor import process_pdf_file
from .docx_processor import process_docx_file
import magic
import logging
import pathlib
from sqlalchemy import text
from sqlalchemy.exc import OperationalError
class FileProcessorThread(QThread):
update_progress = pyqtSignal(int)
update_status = pyqtSignal(str)
update_tree_signal = pyqtSignal()
update_checkboxes_signal = pyqtSignal()
update_rate = pyqtSignal(float, int, float, int, float, float)
last_update_time = 0
def __init__(self, file_paths):
super().__init__()
self.start_time = time.time()
self.total_entities_count = 0
self.total_entities_count_lock = QMutex()
self.abort_mutex = QMutex()
self.abort_flag = False
self.file_paths = file_paths
self.unsupported_files_count = 0
self.processed_files_count = 0
self.total_data_processed_kb = 0
self.total_files_size_kb = sum(os.path.getsize(f) / 1024 for f in file_paths)
self.unsupported_files_list = []
self.all_unsupported_files = []
self.checkbox_panel = CheckboxPanel()
@property
def abort_flag(self):
# This is the getter method for the property
self.abort_mutex.lock()
flag = self._abort_flag
self.abort_mutex.unlock()
return flag
@abort_flag.setter
def abort_flag(self, value):
# This is the setter method for the property
self.abort_mutex.lock()
self._abort_flag = value
self.abort_mutex.unlock()
def classify_file_type(self, file_path):
# Mapping of file extensions to MIME types
mime_types = {
'.txt': 'text/plain',
'.pdf': 'application/pdf',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.csv': 'text/csv',
'.html': 'text/html',
'.htm': 'text/html',
'.xml': 'text/xml',
'.json': 'application/json',
'.yaml': 'text/yaml',
'.yml': 'text/yaml',
'.md': 'text/markdown',
'.rtf': 'application/rtf',
'.odt': 'application/vnd.oasis.opendocument.text',
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
'.odp': 'application/vnd.oasis.opendocument.presentation',
'.log': 'text/plain',
'.ini': 'text/plain',
'.conf': 'text/plain',
'.cfg': 'text/plain',
'.js': 'application/javascript',
'.css': 'text/css',
'.php': 'text/php',
'.py': 'text/x-python',
'.rb': 'text/x-ruby',
'.java': 'text/x-java-source',
'.c': 'text/x-c',
'.cpp': 'text/x-c++',
'.h': 'text/x-c-header',
'.hpp': 'text/x-c++-header',
'.sh': 'application/x-sh',
'.bat': 'application/x-bat',
'.ps1': 'application/x-powershell',
'.sql': 'text/x-sql',
# Add more mappings as needed
}
try:
mime = magic.Magic(mime=True)
file_type = mime.from_file(file_path)
return file_type
except FileNotFoundError as e:
logging.error(f"File not found: {file_path}. Encoding: {sys.getfilesystemencoding()}", exc_info=True)
except Exception as e:
try:
clean_file_path = pathlib.Path(file_path)
mime = magic.Magic(mime=True)
file_type = mime.from_file(clean_file_path)
return file_type
except Exception as e:
logging.error(f"The magic library failed classifying the file type: {e} // falling back to file extension")
_, file_extension = os.path.splitext(file_path)
return mime_types.get(file_extension.lower(), 'application/octet-stream') # Default to binary type if unknown
def run(self):
logging.debug("Thread run method started.")
try:
for index, file_path in enumerate(self.file_paths):
#if not self.debugFileProcessor(file_path):
# continue
file_size_kb = os.path.getsize(file_path) / 1024 # Get file size in KiB
self.total_data_processed_kb += file_size_kb
if self.abort_flag:
self.update_status.emit("Analyse abgebrochen")
return
logging.debug(f"Attempting to process file: {file_path}")
file_type = self.classify_file_type(file_path)
logging.info(f"ANALYZING {file_path} TYPE {file_type}")
# Check and potentially re-establish the database connection
if not self.check_and_restore_db_connection():
logging.error(f"Database connection could not be established for {file_path}. Skipping file.")
continue
with session_scope() as session:
if 'text/' in file_type:
process_text_file(file_path, file_type, self, session, lambda: self.abort_flag)
elif 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' in file_type:
process_xlsx_file(file_path, file_type, self, session, lambda: self.abort_flag)
elif 'application/pdf' in file_type or file_type == ".pdf":
process_pdf_file(file_path, file_type, self, session, lambda: self.abort_flag)
elif 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' in file_type:
# Handling DOCX file
process_docx_file(file_path, file_type, self, session, lambda: self.abort_flag)
else:
logging.info(f"Skipping unsupported file type: {file_type}")
self.all_unsupported_files.append(file_path)
self.unsupported_files_count += 1
if len(self.unsupported_files_list) < 20:
self.unsupported_files_list.append(f"{file_path} (Type: {file_type})")
continue
self.update_tree_signal.emit()
self.update_checkboxes_signal.emit()
self.processed_files_count = index + 1
self.update_progress.emit(index + 1)
self.update_status.emit(f" Verarbeitung abgeschlossen. {index + 1 - self.unsupported_files_count} von {len(self.file_paths)} Dateien verarbeitet.")
except Exception as e:
logging.error(f"Error processing files: {e}")
self.update_status.emit(f"Fehler beim Verarbeiten von Dateien {e}", exc_info=True)
def check_and_restore_db_connection(self):
attempts = 0
max_attempts = 5
while attempts < max_attempts:
try:
with session_scope() as session:
session.execute(text('SELECT 1'))
return True
except OperationalError:
attempts += 1
time.sleep(2 ** attempts) # Exponential backoff
continue
logging.error("Failed to re-establish database connection after several attempts.")
return False
def calculate_and_emit_rate(self):
current_time = time.time()
if current_time - self.last_update_time >= 1: # Check if 1 second has passed
entity_rate = self.calculate_rate()
file_rate = self.calculate_file_rate()
data_rate_kibs = self.calculate_data_rate()
estimated_time = self.calculate_estimated_time_to_completion(data_rate_kibs)
self.update_rate.emit(entity_rate, self.total_entities_count, file_rate, self.processed_files_count, estimated_time, data_rate_kibs)
self.last_update_time = current_time
def calculate_data_rate(self):
elapsed_time = time.time() - self.start_time
return self.total_data_processed_kb / elapsed_time if elapsed_time > 0 else 0
def calculate_estimated_time_to_completion(self, data_rate_kibs):
remaining_data_kb = self.total_files_size_kb - self.total_data_processed_kb
if data_rate_kibs > 0:
estimated_time = remaining_data_kb / data_rate_kibs
else:
estimated_time = float('inf') # Indefinite time if rate is zero
return estimated_time
def calculate_file_rate(self):
elapsed_time = time.time() - self.start_time
return self.processed_files_count / elapsed_time if elapsed_time > 0 else 0
def calculate_rate(self):
elapsed_time = time.time() - self.start_time
rate = self.total_entities_count / elapsed_time if elapsed_time > 0 else 0
return rate
def abort(self):
self.abort_flag = True
def getUnsupportedFilesCount(self):
return self.unsupported_files_count
def getUnsupportedFilesList(self):
return self.unsupported_files_list
def debugFileProcessor(self, file_path):
logging.debug(f"Attempting to process file: {file_path}")
if not os.path.exists(file_path):
logging.warning(f"File does not exist: {file_path}")
return False
elif not os.access(file_path, os.R_OK):
logging.warning(f"File is not accessible: {file_path}")
return False
try:
detected_encoding = magic.from_file(file_path, mime=True)
logging.debug(f"Detected encoding for {file_path}: {detected_encoding}")
except Exception as e:
logging.error(f"Failed to detect encoding for {file_path}: {e}", exc_info=True)
file_type = self.classify_file_type(file_path)
logging.debug(f"Classified file type for {file_path}: {file_type}")
return True

View File

@ -0,0 +1,101 @@
# the parse_content receives the full_content string from the methods process_text_file, process_xlsx_file, process_pdf_file or similar along the abort_flag
import os
import sys
import re
import logging
import importlib.util
import multiprocessing
from logline_leviathan.database.database_manager import EntityTypesTable
#logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
#multiprocessing.set_start_method('spawn')
def parse_with_script(parser_module_name, full_content):
parser_module_name = parser_module_name.replace('.py', '') # Remove .py extension
if getattr(sys, 'frozen', False):
# The base path is the directory of the executable
base_dir = os.path.dirname(sys.executable)
# Construct the path to the 'data/parser' directory
base_path = os.path.join(base_dir, 'data', 'parser')
else:
# Running in a normal Python environment
base_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'parser')
# Construct the path to the parser module
parser_module_path = os.path.join(base_path, parser_module_name + '.py')
if not os.path.exists(parser_module_path):
logging.error(f"Parser module not found: {parser_module_path}")
return []
# Dynamically import the module using its file path
spec = importlib.util.spec_from_file_location(parser_module_name, parser_module_path)
parser_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(parser_module)
try:
script_results = parser_module.parse(full_content)
return script_results
except Exception as e:
logging.error(f"Error using parser module {parser_module_name}: {e}")
return []
def parse_with_regex(regex_pattern, full_content):
try:
#logging.debug(f"Using regex pattern: {regex_pattern}")
regex_results = [(match.group(), match.start(), match.end()) for match in re.finditer(regex_pattern, full_content)]
#logging.debug(f"Regex parser results: {regex_results}")
return regex_results
except re.error as e:
logging.error(f"Invalid regex pattern: {regex_pattern}. Error: {e}")
return []
def parse_entity_type(entity_type, full_content):
try:
if entity_type.script_parser and os.path.exists(os.path.join('data', 'parser', entity_type.script_parser)):
# Use the script_parser name directly
parser_module_name = entity_type.script_parser.replace('.py', '')
return [(entity_type.entity_type_id, *match) for match in parse_with_script(parser_module_name, full_content)]
elif entity_type.regex_pattern:
return [(entity_type.entity_type_id, *match) for match in parse_with_regex(entity_type.regex_pattern, full_content)]
else:
return []
except Exception as e:
logging.error(f"Error in parse_entity_type for {entity_type}: {e}")
return []
def parse_content(full_content, abort_flag, db_session):
#logging.debug("Starting parsing content")
entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parser_enabled == True).all()
matches = []
with multiprocessing.Pool() as pool:
results = [pool.apply_async(parse_entity_type, (et, full_content)) for et in entity_types]
for result in results:
if abort_flag():
logging.debug("Aborting parsing due to flag")
break
try:
match_result = result.get()
#logging.debug(f"Match result: {match_result}")
matches.extend(match_result)
except Exception as e:
logging.error(f"Error parsing entity type: {e}")
for match in matches:
if len(match) != 4:
logging.error(f"Unexpected format for parsd entity: {match}")
#logging.debug(f"Finished parsing content. Total matches: {len(matches)}")
return matches

View File

@ -0,0 +1,153 @@
import logging
import re
import os
from datetime import datetime
from logline_leviathan.file_processor.parser_thread import parse_content
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_distinct_entity, handle_context_snippet
import fitz
logging.getLogger('pdfminer').setLevel(logging.INFO)
def read_pdf_content(file_path):
try:
with fitz.open(file_path) as pdf:
pages = [page.get_text("text") for page in pdf]
return pages
except Exception as e:
logging.error(f"Error reading PDF file {file_path}: {e}")
return None
def process_pdf_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
try:
logging.info(f"Starting processing of PDF file: {file_path}")
with fitz.open(file_path) as pdf: # Open the PDF with fitz
pages = [page.get_text("text") for page in pdf]
if pages is None:
return 0
entity_count = 0
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype)
for page_number, content in enumerate(pages):
if content is None:
continue # Skip empty pages
if abort_flag():
logging.info("Processing aborted.")
return entity_count
thread_instance.update_status.emit(f" Verarbeite PDF-Datei: {file_path}, Seite {page_number + 1}")
parsed_entities = parse_content(content, abort_flag, db_session)
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
if not match_text.strip():
continue
timestamp = find_timestamp_before_match(content, start_pos, file_path)
match_start_line, match_end_line = get_line_numbers_from_pos(pdf, page_number, start_pos, end_pos)
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
if individual_entity:
handle_context_snippet(db_session, individual_entity, [content], match_start_line, match_end_line)
entity_count += 1
logging.info(f"Finished processing PDF file: {file_path}")
return entity_count
except Exception as e:
db_session.rollback()
logging.error(f"Error processing PDF file {file_path}: {e}")
return 0
def alternative_get_line_numbers_from_pos(pdf, page_number, start_pos, end_pos):
cumulative_line_number = 0
start_line_number = end_line_number = None
for current_page in range(page_number + 1):
page = pdf[current_page]
text_blocks = page.get_text("dict")["blocks"]
for block in text_blocks:
if 'lines' in block:
for line_number, line in enumerate(block['lines']):
if current_page == page_number:
line_text = "".join([span['text'] for span in line['spans']])
current_pos = len(line_text)
if start_pos < current_pos and start_line_number is None:
start_line_number = cumulative_line_number + line_number
if end_pos <= current_pos:
end_line_number = cumulative_line_number + line_number
return start_line_number, end_line_number
cumulative_line_number += 1
return start_line_number, end_line_number
def get_line_numbers_from_pos(pdf, page_number, start_pos, end_pos):
page = pdf[page_number]
text_blocks = page.get_text("dict")["blocks"]
start_line_number = end_line_number = 0
current_pos = 0
for block in text_blocks:
if 'lines' in block:
for line_number, line in enumerate(block['lines']):
line_text = "".join([span['text'] for span in line['spans']])
current_pos += len(line_text)
if start_pos < current_pos and start_line_number == 0:
start_line_number = line_number
if end_pos <= current_pos:
end_line_number = line_number
#logging.debug(f"start_line_number: {start_line_number}, end_line_number: {end_line_number}, line_number: {line_number}, page_number: {page_number}")
return start_line_number, end_line_number
return start_line_number, end_line_number
def find_timestamp_before_match(content, match_start_pos, file_path):
# Function to search for timestamps across line breaks
def search_timestamps(search_content):
# Modified patterns to account for potential line breaks
timestamp_patterns = [
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
# Add more patterns as needed
]
for pattern, date_format in timestamp_patterns:
for timestamp_match in reversed(list(re.finditer(pattern, search_content, re.DOTALL))):
try:
# Convert the matched timestamp to the standardized format
matched_timestamp = datetime.strptime(timestamp_match.group().replace('\n', ''), date_format)
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
except ValueError:
continue
return None
# First, try to find a timestamp in the content
timestamp = search_timestamps(content[:match_start_pos])
if timestamp:
return timestamp
# If not found in content, try to find a timestamp in the file path
basename = os.path.basename(file_path)
return search_timestamps(basename)

View File

@ -0,0 +1,104 @@
import re
import os
import logging
from logline_leviathan.file_processor.parser_thread import parse_content
from datetime import datetime
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_context_snippet, handle_distinct_entity
from logline_leviathan.database.database_manager import session_scope
def read_file_content(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as file:
return file.readlines()
except Exception as e:
logging.error(f"Error reading file {file_path}: {e}")
return None
def process_text_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
#with session_scope() as db_session:
try:
#logging.info(f"Starting processing of text file: {file_path}")
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype)
content = read_file_content(file_path)
full_content = ''.join(content) # Join all lines into a single string
thread_instance.update_status.emit(f" Verarbeite textbasierte Datei: {file_path}")
# Call the new parser and get matches along with entity types
parsed_entities = parse_content(full_content, abort_flag, db_session)
entity_count = 0
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
if not match_text.strip():
continue
timestamp = find_timestamp_before_match(full_content, start_pos, file_path)
match_start_line, match_end_line = get_line_numbers_from_pos(content, start_pos, end_pos)
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
if individual_entity:
entity_count += 1
handle_context_snippet(db_session, individual_entity, content, match_start_line, match_end_line)
return entity_count
except Exception as e:
db_session.rollback()
logging.error(f"Error processing text file {file_path}: {e}")
return 0
def get_line_numbers_from_pos(content, start_pos, end_pos):
start_line = end_line = 0
current_pos = 0
for i, line in enumerate(content):
current_pos += len(line)
if start_pos < current_pos:
start_line = i
break
for i, line in enumerate(content[start_line:], start=start_line):
current_pos += len(line)
if end_pos <= current_pos:
end_line = i
break
return start_line, end_line
def find_timestamp_before_match(content, match_start_pos, file_path):
# Function to search for timestamps across line breaks
def search_timestamps(search_content):
# Modified patterns to account for potential line breaks
timestamp_patterns = [
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
# Add more patterns as needed
]
for pattern, date_format in timestamp_patterns:
for timestamp_match in reversed(list(re.finditer(pattern, search_content, re.DOTALL))):
try:
# Convert the matched timestamp to the standardized format
matched_timestamp = datetime.strptime(timestamp_match.group().replace('\n', ''), date_format)
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
except ValueError:
continue
return None
# First, try to find a timestamp in the content
timestamp = search_timestamps(content[:match_start_pos])
if timestamp:
return timestamp
# If not found in content, try to find a timestamp in the file path
basename = os.path.basename(file_path)
return search_timestamps(basename)

View File

@ -0,0 +1,120 @@
import logging
import datetime
import re
import os
from openpyxl import load_workbook
from logline_leviathan.file_processor.parser_thread import parse_content
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_context_snippet, handle_distinct_entity
def read_xlsx_content(file_path):
try:
workbook = load_workbook(filename=file_path)
return workbook
except Exception as e:
logging.error(f"Error reading XLSX file {file_path}: {e}")
return None
def get_line_numbers_from_pos(content, start_pos, end_pos):
# For XLSX, the line number is the row number in the current sheet
start_line = end_line = 0
current_pos = 0
for i, line in enumerate(content):
current_pos += len(line)
if start_pos < current_pos:
start_line = i
break
for i, line in enumerate(content[start_line:], start=start_line):
current_pos += len(line)
if end_pos <= current_pos:
end_line = i
break
return start_line, end_line
def find_timestamp_before_match(content, match_start_pos, file_path):
# Function to search for timestamps across line breaks
def search_timestamps(search_content):
# Modified patterns to account for potential line breaks
timestamp_patterns = [
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
# Add more patterns as needed
]
for pattern, date_format in timestamp_patterns:
for timestamp_match in reversed(list(re.finditer(pattern, search_content, re.DOTALL))):
try:
# Convert the matched timestamp to the standardized format
matched_timestamp = datetime.strptime(timestamp_match.group().replace('\n', ''), date_format)
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
except ValueError:
continue
return None
# First, try to find a timestamp in the content
timestamp = search_timestamps(content[:match_start_pos])
if timestamp:
return timestamp
# If not found in content, try to find a timestamp in the file path
basename = os.path.basename(file_path)
return search_timestamps(basename)
def process_xlsx_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
try:
logging.info(f"Starting processing of XLSX file: {file_path}")
workbook = read_xlsx_content(file_path)
if workbook is None:
return 0
entity_count = 0
for sheet in workbook:
sheet_name = sheet.title
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype, sheet_name=sheet_name)
if abort_flag():
logging.info("Processing aborted.")
return entity_count
# Combining all cells into a single string for parsing
content = [' '.join([str(cell.value) if cell.value is not None else '' for cell in row]) for row in sheet.iter_rows()]
full_content = '\n'.join(content)
thread_instance.update_status.emit(f"Processing Excel file: {file_path} Sheet {sheet_name}")
parsed_entities = parse_content(full_content, abort_flag, db_session)
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
if not match_text.strip():
continue
match_start_line, match_end_line = get_line_numbers_from_pos(content, start_pos, end_pos)
# Find timestamp before match
timestamp = find_timestamp_before_match(full_content, start_pos, file_path)
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
if individual_entity:
handle_context_snippet(db_session, individual_entity, content, match_start_line, match_end_line)
entity_count += 1
logging.info(f"Finished processing XLSX file: {file_path}")
return entity_count
except Exception as e:
db_session.rollback()
logging.error(f"Error processing XLSX file {file_path}: {e}")
return 0

View File

View File

@ -0,0 +1,398 @@
from PyQt5.QtWidgets import QWidget, QVBoxLayout, QCheckBox, QToolTip, QTreeWidget, QTreeWidgetItem, QHBoxLayout, QLabel, QScrollArea
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QColor
import logging
from logline_leviathan.database.database_manager import EntitiesTable, DistinctEntitiesTable, EntityTypesTable, FileMetadata, session_scope
class CustomCheckBox(QCheckBox):
def __init__(self, *args, **kwargs):
super(CustomCheckBox, self).__init__(*args, **kwargs)
self.setMouseTracking(True) # Enable mouse tracking
self.setStyleSheet("QCheckBox { color: white; }")
def mouseMoveEvent(self, event):
QToolTip.showText(event.globalPos(), self.toolTip()) # Show tooltip at mouse position
super(CustomCheckBox, self).mouseMoveEvent(event)
class FileCheckboxItem(QWidget):
def __init__(self, text, parent=None):
super(FileCheckboxItem, self).__init__(parent)
layout = QHBoxLayout(self)
self.checkBox = QCheckBox()
self.checkBox.setChecked(True)
self.label = QLabel(text)
self.label.setStyleSheet("QLabel { color: white; }") # Set text color
layout.addWidget(self.checkBox)
layout.addWidget(self.label)
layout.addStretch(1) # Add stretch factor to push content to the left
self.setLayout(layout)
class CheckboxPanel(QWidget):
def __init__(self):
super().__init__()
layout = QVBoxLayout(self)
self.treeWidget = QTreeWidget()
self.treeWidget.setHeaderHidden(True)
self.treeWidget.setStyleSheet("""
QTreeWidget::branch {color: white; /* White color for branches */
}
""")
layout.addWidget(self.treeWidget)
def _addChildren(self, parentItem, parent_entity_type, db_session, used_ids, depth=0):
try:
# Log the depth of recursion
#logging.debug(f"Adding children at depth: {depth}, parent entity type: {parent_entity_type}")
child_entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parent_type == parent_entity_type).all()
for child_entity_type in child_entity_types:
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == child_entity_type.entity_type_id).count()
text = f"{child_entity_type.gui_name}"
childItem = QTreeWidgetItem(parentItem)
isCheckable = not child_entity_type.entity_type.startswith("category_")
childItem.setFlags(childItem.flags() | Qt.ItemIsUserCheckable) if isCheckable else None
childItem.setCheckState(0, Qt.Unchecked) if isCheckable else None
text += f" ({count} Erwähnungen)" if isCheckable else ""
childItem.setText(0, text)
childItem.setToolTip(0, child_entity_type.gui_tooltip)
childItem.entity_type_id = child_entity_type.entity_type_id
childItem.entity_type = child_entity_type.entity_type
if child_entity_type.entity_type_id in used_ids and not child_entity_type.parser_enabled == False:
color = QColor('green')
elif not child_entity_type.entity_type.startswith("category_") and not child_entity_type.parser_enabled == True:
color = QColor('red')
else:
color = QColor('white')
childItem.setForeground(0, color)
# Recursive call with increased depth
depth = depth + 1
self._addChildren(childItem, child_entity_type.entity_type, db_session, used_ids, depth)
except Exception as e:
logging.error(f"Error adding children: {e}")
def updateCheckboxes(self, db_session):
#logging.info("Updating checkboxes with database content")
with session_scope() as db_session:
try:
# Query database for entity types
entity_types = db_session.query(EntityTypesTable).all()
used_ids = {d.entity_types_id for d in db_session.query(DistinctEntitiesTable.entity_types_id).distinct()}
#logging.debug(f"Used IDs: {used_ids}")
# Clear existing items
self.treeWidget.clear()
rootItems = {}
# Construct hierarchical tree structure
for entity_type in entity_types:
if entity_type.parent_type != 'root': # Skip non-root items
continue
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == entity_type.entity_type_id).count()
text = f"{entity_type.gui_name}"
treeItem = QTreeWidgetItem()
treeItem.setToolTip(0, entity_type.gui_tooltip)
treeItem.entity_type_id = entity_type.entity_type_id
treeItem.entity_type = entity_type.entity_type
if not entity_type.entity_type.startswith("category_"):
treeItem.setFlags(treeItem.flags() | Qt.ItemIsUserCheckable)
treeItem.setCheckState(0, Qt.Unchecked)
text = f"{entity_type.gui_name} ({count} Erwähnungen)"
treeItem.setText(0, text)
# Add item to tree widget
self.treeWidget.addTopLevelItem(treeItem)
rootItems[entity_type.entity_type_id] = treeItem
# Call recursive function to add children
self._addChildren(treeItem, entity_type.entity_type, db_session, used_ids)
# Optionally expand all tree items
self.treeWidget.expandAll()
except Exception as e:
logging.error("Error updating checkboxes", exc_info=True)
def filterCheckboxes(self, filter_text):
def filterTreeItem(treeItem):
# Check if the current item or any of its properties match the filter text
try:
match = filter_text.lower() in treeItem.text(0).lower() or filter_text.lower() in treeItem.toolTip(0).lower()
except Exception as e:
logging.error(f"Error checking filter match for tree item: {e}")
match = False
# Recursively check child items and set 'childMatch' if any child matches
childMatch = False
for j in range(treeItem.childCount()):
if filterTreeItem(treeItem.child(j)):
childMatch = True
# Unhide the item and its parents if there's a match in the item or its children
if match or childMatch:
treeItem.setHidden(False)
parent = treeItem.parent()
while parent:
parent.setHidden(False)
parent = parent.parent()
return True
else:
treeItem.setHidden(True)
return False
# Filter all top-level items
for i in range(self.treeWidget.topLevelItemCount()):
filterTreeItem(self.treeWidget.topLevelItem(i))
def checkAllVisible(self):
with session_scope() as db_session:
used_ids = self.getUsedIds(db_session)
self._setCheckStateForVisibleItems(Qt.Checked, used_ids)
def uncheckAllVisible(self):
with session_scope() as db_session:
used_ids = self.getUsedIds(db_session)
self._setCheckStateForVisibleItems(Qt.Unchecked, used_ids)
def _setCheckStateForVisibleItems(self, state, used_ids):
def setCheckState(item):
try:
if (item.flags() & Qt.ItemIsUserCheckable) and not item.isHidden(): # and item.parent():
# Check if entity_type_id is in used_ids
if hasattr(item, 'entity_type_id') and item.entity_type_id in used_ids:
item.setCheckState(0, state)
#logging.debug(f"Set check state for item with entity_type_id: {item.entity_type_id}")
#else:
#logging.debug(f"Item with entity_type_id: {getattr(item, 'entity_type_id', 'N/A')} skipped")
for i in range(item.childCount()):
childItem = item.child(i)
setCheckState(childItem)
except Exception as e:
logging.error(f"Error in setCheckState: {e}")
try:
for i in range(self.treeWidget.topLevelItemCount()):
topItem = self.treeWidget.topLevelItem(i)
setCheckState(topItem)
except Exception as e:
logging.error(f"Error in _setCheckStateForVisibleItems: {e}")
def getUsedIds(self, db_session):
# Assuming db_session is your database session object
try:
used_ids = {d.entity_types_id for d in db_session.query(DistinctEntitiesTable.entity_types_id).distinct()}
return used_ids
except Exception as e:
logging.error(f"Error in getUsedIds: {e}")
return set()
def expandAllTreeItems(self):
for i in range(self.treeWidget.topLevelItemCount()):
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), True)
def collapseAllTreeItems(self):
for i in range(self.treeWidget.topLevelItemCount()):
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), False)
def _expandCollapseRecursive(self, treeItem, expand=True):
if treeItem is not None:
treeItem.setExpanded(expand)
for j in range(treeItem.childCount()):
self._expandCollapseRecursive(treeItem.child(j), expand)
class DatabasePanel(QWidget):
def __init__(self):
super().__init__()
layout = QVBoxLayout(self)
self.treeWidget = QTreeWidget()
self.treeWidget.setHeaderHidden(True) # Hide the header
self.treeWidget.setStyleSheet("""QTreeWidget::branch {color: white; /* White color for branches */}""")
layout.addWidget(self.treeWidget)
def _getTotalCountForChildren(self, entity_type, db_session):
# Recursive function to get total count
total_count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == entity_type.entity_type_id).count()
child_entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parent_type == entity_type.entity_type).all()
for child_entity_type in child_entity_types:
total_count += self._getTotalCountForChildren(child_entity_type, db_session)
return total_count
def _addChildren(self, parentItem, parent_entity_type, db_session, used_ids, depth=0):
try:
# Log the depth of recursion
#logging.debug(f"Adding children at depth: {depth}, parent entity type: {parent_entity_type}")
child_entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parent_type == parent_entity_type).all()
for child_entity_type in child_entity_types:
if not child_entity_type.entity_type.startswith("category_"):
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == child_entity_type.entity_type_id).count()
text = f" {count} - {child_entity_type.gui_name} ({child_entity_type.entity_type})"
else:
# Use the new method to get the total count for this category
total_count = self._getTotalCountForChildren(child_entity_type, db_session)
text = f" {total_count} - {child_entity_type.gui_name} (Total)"
childItem = QTreeWidgetItem(parentItem)
childItem.setText(0, text)
childItem.setToolTip(0, child_entity_type.gui_tooltip)
childItem.entity_type_id = child_entity_type.entity_type_id
childItem.entity_type = child_entity_type.entity_type
if child_entity_type.entity_type_id in used_ids and child_entity_type.parser_enabled:
color = QColor('green')
elif not child_entity_type.entity_type.startswith("category_") and not child_entity_type.parser_enabled:
color = QColor('red')
else:
color = QColor('white')
childItem.setForeground(0, color)
# Recursive call with increased depth
depth = depth + 1
self._addChildren(childItem, child_entity_type.entity_type, db_session, used_ids, depth)
except Exception as e:
logging.error(f"Error in _addChildren: {e}")
def updateTree(self, db_session):
#logging.info("Updating checkboxes with database content")
with session_scope() as db_session:
try:
# Query database for entity types
entity_types = db_session.query(EntityTypesTable).all()
used_ids = {d.entity_types_id for d in db_session.query(DistinctEntitiesTable.entity_types_id).distinct()}
#logging.debug(f"Used IDs: {used_ids}")
# Clear existing items
self.treeWidget.clear()
rootItems = {}
# Construct hierarchical tree structure
for entity_type in entity_types:
if entity_type.parent_type != 'root': # Skip non-root items
continue
if not entity_type.entity_type.startswith("category_"):
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == entity_type.entity_type_id).count()
text = f"{count} - {entity_type.gui_name} {entity_type.entity_type}"
else:
# Use the new method to get the total count for this category
total_count = self._getTotalCountForChildren(entity_type, db_session)
text = f"{total_count} - {entity_type.gui_name} (Total)"
treeItem = QTreeWidgetItem()
treeItem.setText(0, text)
treeItem.setToolTip(0, entity_type.gui_tooltip)
treeItem.entity_type_id = entity_type.entity_type_id
treeItem.entity_type = entity_type.entity_type
if entity_type.entity_type_id in used_ids and entity_type.parser_enabled:
color = QColor('green')
elif not entity_type.entity_type.startswith("category_") and not entity_type.parser_enabled:
color = QColor('red')
else:
color = QColor('white')
treeItem.setForeground(0, color)
self.treeWidget.addTopLevelItem(treeItem)
# Call recursive function to add children
self._addChildren(treeItem, entity_type.entity_type, db_session, used_ids)
# Optionally expand all tree items
self.treeWidget.expandAll()
except Exception as e:
logging.error("Error updating database tree", exc_info=True)
def expandAllTreeItems(self):
for i in range(self.treeWidget.topLevelItemCount()):
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), True)
def collapseAllTreeItems(self):
for i in range(self.treeWidget.topLevelItemCount()):
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), False)
def _expandCollapseRecursive(self, treeItem, expand=True):
if treeItem is not None:
treeItem.setExpanded(expand)
for j in range(treeItem.childCount()):
self._expandCollapseRecursive(treeItem.child(j), expand)
class FileCheckboxPanel(QWidget):
def __init__(self):
super().__init__()
self.mainLayout = QVBoxLayout(self)
self.scrollArea = QScrollArea(self)
self.scrollArea.setWidgetResizable(True)
self.scrollAreaContents = QWidget()
self.scrollLayout = QVBoxLayout(self.scrollAreaContents)
self.scrollArea.setWidget(self.scrollAreaContents)
self.mainLayout.addWidget(self.scrollArea)
self.items = [] # Keep track of the custom widgets
def updateCheckboxes(self, db_session):
try:
# Clear existing items
for item in self.items:
item.deleteLater()
self.items.clear()
with session_scope() as db_session:
try:
file_metadata = db_session.query(FileMetadata).all()
for file in file_metadata:
entity_count = db_session.query(EntitiesTable).filter(EntitiesTable.file_id == file.file_id).count()
item_text = f"{file.file_name} ({entity_count})"
custom_widget = FileCheckboxItem(item_text)
self.scrollLayout.addWidget(custom_widget)
self.items.append(custom_widget)
# Add a stretch to push everything up
self.scrollLayout.addStretch(1)
except Exception as e:
logging.error("Error updating file checkboxes", exc_info=True)
except Exception as e:
logging.error("Error updating file checkboxes", exc_info=True)
def filterCheckboxes(self, filter_text):
for item in self.items:
if filter_text.lower() in item.label.text().lower():
item.show()
else:
item.hide()
def checkAllVisible(self):
for item in self.items:
if not item.isHidden():
item.checkBox.setChecked(True)
def uncheckAllVisible(self):
for item in self.items:
if not item.isHidden():
item.checkBox.setChecked(False)
def getCheckedFiles(self):
checked_files = []
for custom_widget in self.items:
if custom_widget.checkBox.isChecked():
# Extract the file name from the item text
file_name = custom_widget.label.text().split(" (")[0]
checked_files.append(file_name)
return checked_files
def _setCheckStateForVisibleItems(self, state):
for custom_widget in self.items:
if not custom_widget.isHidden():
custom_widget.checkBox.setChecked(state)

View File

@ -0,0 +1,77 @@
from PyQt5.QtWidgets import QTableWidget, QTableWidgetItem, QDialog, QVBoxLayout, QHBoxLayout, QComboBox, QPushButton
from logline_leviathan.database.database_manager import EntitiesTable, DistinctEntitiesTable, EntityTypesTable, ContextTable, FileMetadata, session_scope
from sqlalchemy import func, label
from sqlalchemy.orm import aliased
class CustomizeResultsDialog(QDialog):
def __init__(self, parent=None):
super().__init__(parent)
self.db_session = session_scope()
self.setWindowTitle("Report anpassen")
stylesheet = """
/* Style for the main window */
QWidget {
background-color: #282C34; /* Dark grey background */
color: white; /* White text */
}
/* Style for buttons */
QPushButton {
background-color: #4B5563; /* Dark grey background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 60px;
min-height: 20px;
}
QPushButton:hover {
background-color: #6E6E6E; /* Slightly lighter grey on hover */
}
QPushButton:pressed {
background-color: #484848; /* Even darker grey when pressed */
}
"""
self.layout = QVBoxLayout(self)
self.setStyleSheet(stylesheet)
self.comboBoxLayout = QHBoxLayout()
self.layout.addLayout(self.comboBoxLayout)
# Initially add one combo box
self.addComboBox()
# Button to add more combo boxes
self.addButton = QPushButton("Spalte hinzufügen", self)
self.addButton.clicked.connect(self.addComboBox)
self.layout.addWidget(self.addButton)
# OK and Cancel buttons
self.okButton = QPushButton("OK", self)
self.okButton.clicked.connect(self.accept)
self.cancelButton = QPushButton("Abbruch", self)
self.cancelButton.clicked.connect(self.reject)
self.buttonLayout = QHBoxLayout()
self.buttonLayout.addWidget(self.okButton)
self.buttonLayout.addWidget(self.cancelButton)
self.layout.addLayout(self.buttonLayout)
self.selectedColumns = []
def addComboBox(self):
comboBox = QComboBox(self)
comboBox.addItems(['Entitätentyp', 'Entität', 'Anzahl Erwähnungen', 'Dateiname', 'Zeilennummer', 'Zeitstempel', 'Kontext - gleiche Zeile', 'Kontext - mittelgroß', 'Kontext - umfangreich'])
self.comboBoxLayout.addWidget(comboBox)
def comboBoxes(self):
# Utility method to get all combo boxes
return [self.comboBoxLayout.itemAt(i).widget() for i in range(self.comboBoxLayout.count())]
def on_accept(self):
self.selectedColumns = [comboBox.currentText() for comboBox in self.comboBoxes()]
self.accept()

View File

@ -0,0 +1,328 @@
import os
from PyQt5.QtWidgets import (QMessageBox, QWidget, QApplication,
QFileDialog, QLabel, QPushButton, QGridLayout, QGroupBox, QHBoxLayout, QVBoxLayout, QLineEdit)
from PyQt5.QtCore import Qt
from logline_leviathan.gui.checkbox_panel import *
from logline_leviathan.gui.ui_helper import UIHelper
from logline_leviathan.database.database_manager import session_scope
from logline_leviathan.exporter.wordlist_export import generate_wordlist
from logline_leviathan.gui.checkbox_panel import *
import shutil
import glob
class DBBrowserWindow(QWidget):
def __init__(self, app):
super().__init__()
self.app = app
self.initialize_dbbrowser_window(app)
def initialize_dbbdrowser_window(dbbrowser_window, app):
dbbrowser_window.setWindowTitle('Logline Leviathan - Database-Browser')
dbbrowser_window.mainLayout = QVBoxLayout(dbbrowser_window)
#dbbrowser_window.extendedLayout = QHBoxLayout(dbbrowser_window)
dbbrowser_window.db_session = None
stylesheet = """
/* Style for the main window */
QWidget {
background-color: #282C34; /* Dark grey background */
color: white; /* White text */
}
/* Style for buttons */
QPushButton {
background-color: #4B5563; /* Dark grey background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 50px;
min-height: 15px;
}
QPushButton:hover {
background-color: #6E6E6E; /* Slightly lighter grey on hover */
}
QPushButton:pressed {
background-color: #484848; /* Even darker grey when pressed */
}
"""
highlited_button_style = """
QPushButton {
background-color: #3C8CCE; /* Lighter blue background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 50px;
min-height: 15px;
}
QPushButton:hover {
background-color: #7EC0EE; /* Even lighter blue on hover */
}
QPushButton:pressed {
background-color: #4A86E8; /* Slightly darker blue when pressed */
}
"""
dbbrowser_window.setStyleSheet(stylesheet)
dbbrowser_window.statusLabel = QLabel(' Erwarte Selektion der Entitätentypen', dbbrowser_window)
dbbrowser_window.statusLabel.setWordWrap(True)
dbbrowser_window.statusLabel.setMinimumHeight(40)
dbbrowser_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
dbbrowser_window.mainLayout.addWidget(dbbrowser_window.statusLabel)
# Create a GroupBox for the CheckboxPanel
exportOptionsGroupBox = QGroupBox("SELEKTION", dbbrowser_window)
exportOptionsLayout = QVBoxLayout(exportOptionsGroupBox)
dbbrowser_window.checkboxPanel = CheckboxPanel()
# Create a horizontal layout
filterLayout = QHBoxLayout()
# Create the "Check All" button
checkAllButton = QPushButton("Alle markieren", dbbrowser_window)
checkAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.checkAllVisible())
# Create the "Uncheck All" button
uncheckAllButton = QPushButton("Keine markieren", dbbrowser_window)
uncheckAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.uncheckAllVisible())
expandAllButton = QPushButton("Expandieren", dbbrowser_window)
expandAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.expandAllTreeItems())
collapseAllButton = QPushButton("Komprimieren", dbbrowser_window)
collapseAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.collapseAllTreeItems())
# Add buttons to the filter layout, to the left of the filter label
filterLayout.addWidget(checkAllButton)
filterLayout.addWidget(uncheckAllButton)
filterLayout.addWidget(expandAllButton)
filterLayout.addWidget(collapseAllButton)
# Create the label for the filter
filterLabel = QLabel("Filtern:")
filterLayout.addWidget(filterLabel) # Add label to the horizontal layout
# Add Text Input for Filtering
filterLineEdit = QLineEdit(dbbrowser_window)
filterLineEdit.setPlaceholderText(" nach Typ, Tooltip oder Kurzbezeichnung filtern...")
filterLineEdit.setStyleSheet("""
QLineEdit {
background-color: #3C4043; /* Background color */
color: white; /* Text color */
min-height: 20px;
}
""")
filterLayout.addWidget(filterLineEdit) # Add line edit to the horizontal layout
exportOptionsLayout.addLayout(filterLayout) # Add the horizontal layout to the export options layout
# Add CheckboxPanel to the GroupBox's Layout
exportOptionsLayout.addWidget(dbbrowser_window.checkboxPanel)
# Connect the textChanged signal of QLineEdit to a new method
filterLineEdit.textChanged.connect(dbbrowser_window.checkboxPanel.filterCheckboxes)
dbbrowser_window.mainLayout.addWidget(exportOptionsGroupBox)
copyWordlistToParserDirButton = QPushButton('Soeben generierte Wordlist zur Analyse hinzufügen (kopiert erzeugte Datei)', dbbrowser_window)
copyWordlistToParserDirButton.clicked.connect(dbbrowser_window.copyWordlistToParserDir)
dbbrowser_window.mainLayout.addWidget(copyWordlistToParserDirButton)
# Exit Button Layout
bottomLayout = QGridLayout()
dbbrowser_window.openWordlistPathButton = QPushButton('Ziel-Dateipfad...', dbbrowser_window)
dbbrowser_window.openWordlistPathButton.clicked.connect(dbbrowser_window.openWordlistPath)
bottomLayout.addWidget(dbbrowser_window.openWordlistPathButton, 1, 1)
# Start Export Button
dbbrowser_window.startExportButton = QPushButton('Wordlist erstellen', dbbrowser_window)
dbbrowser_window.startExportButton.clicked.connect(dbbrowser_window.start_export_process)
dbbrowser_window.startExportButton.setStyleSheet(highlited_button_style)
bottomLayout.addWidget(dbbrowser_window.startExportButton, 1, 2)
# Output File Directory
dbbrowser_window.selectOutputFileButton = QPushButton('Zieldateipfad setzen...', dbbrowser_window)
dbbrowser_window.selectOutputFileButton.clicked.connect(dbbrowser_window.selectOutputFile)
bottomLayout.addWidget(dbbrowser_window.selectOutputFileButton, 2, 1)
# Exit Button
dbbrowser_window.exitButton = QPushButton('Schließen', dbbrowser_window)
dbbrowser_window.exitButton.clicked.connect(dbbrowser_window.close)
bottomLayout.addWidget(dbbrowser_window.exitButton, 2, 2)
dbbrowser_window.crossmatchesCheckbox = QCheckBox('Nur Kreuztreffer (Entitäten, die in mehreren Dateien vorkommen)', dbbrowser_window)
bottomLayout.addWidget(dbbrowser_window.crossmatchesCheckbox, 0, 1)
# Output File Path Label
dbbrowser_window.WordlistPathLabel = QLabel('', dbbrowser_window)
dbbrowser_window.updateWordlistPathLabel() # Call this method to set the initial text
bottomLayout.addWidget(dbbrowser_window.WordlistPathLabel, 0, 2)
dbbrowser_window.mainLayout.addLayout(bottomLayout)
dbbrowser_window.setLayout(dbbrowser_window.mainLayout)
def updateCheckboxes(self):
with session_scope() as session:
self.checkboxPanel.updateCheckboxes(session)
def getSelectedCheckboxes(self):
selected_checkboxes = []
def traverseTreeItems(treeItem):
if treeItem.checkState(0) == Qt.Checked:
selected_checkboxes.append(treeItem)
for i in range(treeItem.childCount()):
traverseTreeItems(treeItem.child(i))
for i in range(self.checkboxPanel.treeWidget.topLevelItemCount()):
traverseTreeItems(self.checkboxPanel.treeWidget.topLevelItem(i))
return selected_checkboxes
def updateWordlistPathLabel(self):
outputDirPath = os.path.dirname(self.WordlistPath)
display_text = f'{outputDirPath}/'
self.WordlistPathLabel.setText(display_text)
def openWordlistPath(self):
outputDirPath = os.path.dirname(self.WordlistPath)
wordlistPath = os.path.join(outputDirPath, 'wordlist')
self.ui_helper.openFile(wordlistPath)
def selectOutputFile(self):
options = QFileDialog.Options()
output_format = self.outputFormatList.currentItem().text().lower()
extension_map = {'html': '.html', 'xlsx': '.xlsx'}
default_extension = extension_map.get(output_format, '')
selected_file, _ = QFileDialog.getSaveFileName(
self,
"Selektieren des Ziel-Dateipfads",
self.WordlistPath,
f"{output_format.upper()} Files (*{default_extension});;All Files (*)",
options=options
)
if selected_file:
if not selected_file.endswith(default_extension):
selected_file += default_extension
self.WordlistPath = selected_file
self.outputDir = os.path.dirname(selected_file)
self.updateWordlistPathLabel()
def get_unique_filename(self, base_path):
directory, filename = os.path.split(base_path)
name, extension = os.path.splitext(filename)
counter = 1
new_path = base_path
while os.path.exists(new_path):
new_filename = f"{name}_{counter}{extension}"
new_path = os.path.join(directory, new_filename)
counter += 1
return new_path
def copyWordlistToParserDir(self):
try:
# Path to the parser directory
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
# Ensure the parser directory exists
os.makedirs(parser_dir, exist_ok=True)
# Find the newest .txt file in the WordlistPath directory
list_of_files = glob.glob(os.path.join(self.WordlistPath, '*.txt'))
if not list_of_files:
raise FileNotFoundError("No .txt files found in the WordlistPath directory.")
newest_file = max(list_of_files, key=os.path.getctime)
# Destination file path
destination_file = os.path.join(parser_dir, 'generated_wordlist.txt')
# Copy and overwrite the newest file to the destination
shutil.copy2(newest_file, destination_file)
self.statusLabel.setText(f" Wordlist erfolgreich kopiert nach {destination_file}")
except Exception as e:
self.message("Fehler beim kopieren", f"Fehler beim kopieren: {str(e)}")
def start_export_process(self):
# Base filename for the wordlist file
base_filename = "wordlist.txt"
# Construct the full path with the base filename
full_output_path = os.path.join(self.WordlistPath, base_filename)
# Generate a unique filename to avoid overwriting existing files
unique_output_path = self.get_unique_filename(full_output_path)
try:
with session_scope() as session:
selected_checkboxes = self.getSelectedCheckboxes()
if not selected_checkboxes:
self.message("Generieren nicht möglich", "Keine Typen selektiert. Auswahl vornehmen.")
return
only_crossmatches = self.crossmatchesCheckbox.isChecked()
generate_wordlist(unique_output_path, session, selected_checkboxes, only_crossmatches)
self.statusLabel.setText(f" Generierte Liste gespeichert unter {unique_output_path}")
except Exception as e:
self.statusLabel.setText(f" Fehler beim speichern: {str(e)}")
logging.error(f"Export Error: {str(e)}")
def message(self, title, text, extra_widget=None):
msgBox = QMessageBox()
msgBox.setStyleSheet("""
QMessageBox {
background-color: #282C34; /* Dark grey background */
}
QLabel {
color: white; /* White text */
}
QPushButton {
color: white; /* White text for buttons */
background-color: #4B5563; /* Dark grey background for buttons */
border-style: solid;
border-width: 2px;
border-radius: 5px;
border-color: #4A4A4A;
padding: 6px;
min-width: 80px;
min-height: 30px;
}
""")
msgBox.setIcon(QMessageBox.Warning)
msgBox.setWindowTitle(title)
msgBox.setText(text)
if extra_widget:
msgBox.setInformativeText('')
msgBox.layout().addWidget(extra_widget, 1, 1)
msgBox.exec_()

View File

@ -0,0 +1,188 @@
import os
import logging
from PyQt5.QtWidgets import (QMessageBox, QWidget, QApplication,
QFileDialog)
from PyQt5.QtCore import Qt
from logline_leviathan.gui.initui_report_window import initialize_generate_report_window
from logline_leviathan.gui.checkbox_panel import CheckboxPanel, FileCheckboxPanel
from logline_leviathan.gui.ui_helper import UIHelper
from logline_leviathan.gui.customize_results import CustomizeResultsDialog
from logline_leviathan.database.database_manager import session_scope
from logline_leviathan.exporter.html_export import generate_html_file
from logline_leviathan.exporter.xlsx_export import generate_xlsx_file
from logline_leviathan.exporter.nice_export import generate_niceoutput_file
class GenerateReportWindow(QWidget):
def __init__(self, app):
super().__init__()
self.app = app
self.checkboxPanel = CheckboxPanel()
self.fileCheckboxPanel = FileCheckboxPanel()
self.ui_helper = UIHelper(self)
self.outputFilePath = os.path.join(os.getcwd(), 'output')
initialize_generate_report_window(self, app)
self.updateCheckboxes()
def updateCheckboxes(self):
with session_scope() as session:
self.checkboxPanel.updateCheckboxes(session)
self.fileCheckboxPanel.updateCheckboxes(session)
def getSelectedCheckboxes(self):
selected_checkboxes = []
def traverseTreeItems(treeItem):
if treeItem.checkState(0) == Qt.Checked:
selected_checkboxes.append(treeItem)
for i in range(treeItem.childCount()):
traverseTreeItems(treeItem.child(i))
for i in range(self.checkboxPanel.treeWidget.topLevelItemCount()):
traverseTreeItems(self.checkboxPanel.treeWidget.topLevelItem(i))
return selected_checkboxes
def updateOutputFilePathLabel(self):
self.outputFilePathLabel.setText(self.outputFilePath)
def openOutputFilepath(self):
if not os.path.isdir(self.outputFilePath):
self.outputFilePath = os.path.dirname(self.outputFilePath)
self.ui_helper.openFile(self.outputFilePath)
def openCustomizeResultsDialog(self):
dialog = CustomizeResultsDialog()
if dialog.exec_():
selected_columns = [dialog.comboBoxLayout.itemAt(i).widget().currentText() for i in range(dialog.comboBoxLayout.count())]
def start_export_process(self):
current_item = self.outputFormatList.currentItem()
if current_item is not None:
output_format = current_item.text().lower()
extension_map = {'html': '.html', 'interactive html': '.html', 'xlsx': '.xlsx'}
selected_extension = extension_map.get(output_format, '.html')
only_crossmatches = self.crossmatchesCheckbox.isChecked()
include_flagged = self.flaggedEntriesCheckbox.isChecked()
only_flagged = self.flaggedRadioButton.isChecked()
only_unflagged = self.notflaggedRadioButton.isChecked()
# Get custom filename from QLineEdit or use default
custom_filename = self.setOutputFileNameLineEdit.text().strip()
if not custom_filename:
custom_filename = "entities_export" # Default filename if not specified
initial_output_path = os.path.join(self.outputFilePath, f"{custom_filename}{selected_extension}")
unique_output_path = self.get_unique_filename(initial_output_path)
try:
with session_scope() as session:
selected_checkboxes = self.getSelectedCheckboxes() # Get selected checkboxes from the tree
selected_files = self.fileCheckboxPanel.getCheckedFiles()
if not selected_checkboxes:
self.message("Export nicht möglich", "Keine Entitäten ausgewählt. Auf dem Panel eine Selektion vornehmen.")
return
if not selected_files:
self.message("Export nicht möglich", "Keine Dateien ausgewählt. Auf dem Panel eine Selektion vornehmen.")
return
if self.timestampFilterCheckbox.isChecked():
start_date = self.startDateEdit.date().toPyDate()
end_date = self.endDateEdit.date().toPyDate()
else:
start_date = end_date = None
if output_format == 'html':
logging.debug(f"only_crossmatches: {only_crossmatches}")
generate_html_file(unique_output_path, session, selected_checkboxes, self.fileCheckboxPanel, self.exportContextList.currentItem().text(), only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
elif output_format == 'interactive html':
logging.debug(f"only_crossmatches: {only_crossmatches}")
generate_niceoutput_file(unique_output_path, session, selected_checkboxes, self.fileCheckboxPanel, self.exportContextList.currentItem().text(), only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
elif output_format == 'xlsx':
logging.debug(f"only_crossmatches: {only_crossmatches}")
generate_xlsx_file(unique_output_path, session, selected_checkboxes, self.fileCheckboxPanel, self.exportContextList.currentItem().text(), only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
else:
raise ValueError(f"Unsupported format: {output_format}")
self.statusLabel.setText(f" Export gespeichert unter: {unique_output_path}")
# Check if 'Open After Export' is checked, and open the file if so
if self.openAfterExportCheckbox.isChecked():
if os.path.exists(unique_output_path):
self.ui_helper.openFile(unique_output_path)
except Exception as e:
self.statusLabel.setText(f" Export Error: {str(e)}")
logging.error(f"Export Error: {str(e)}")
else:
self.message("Export nicht möglich", "Ausgabeformat und Umfang des Kontexts spezifizieren.")
def selectOutputFile(self):
options = QFileDialog.Options()
# Set the dialog for directory selection
options |= QFileDialog.ShowDirsOnly
# Open a dialog to select a directory
selected_directory = QFileDialog.getExistingDirectory(
self,
"Select Output Directory",
self.outputFilePath, # Start at the current output file path
options=options
)
# If a directory was selected, update the output file path
if selected_directory:
self.outputFilePath = selected_directory
self.updateOutputFilePathLabel() # Update any labels or UI components as necessary
def get_unique_filename(self, base_path):
directory, filename = os.path.split(base_path)
name, extension = os.path.splitext(filename)
counter = 1
new_path = base_path
while os.path.exists(new_path):
new_filename = f"{name}_{counter}{extension}"
new_path = os.path.join(directory, new_filename)
counter += 1
return new_path
def message(self, title, text, extra_widget=None):
msgBox = QMessageBox()
msgBox.setStyleSheet("""
QMessageBox {
background-color: #282C34; /* Dark grey background */
}
QLabel {
color: white; /* White text */
}
QPushButton {
color: white; /* White text for buttons */
background-color: #4B5563; /* Dark grey background for buttons */
border-style: solid;
border-width: 2px;
border-radius: 5px;
border-color: #4A4A4A;
padding: 6px;
min-width: 80px;
min-height: 30px;
}
""")
msgBox.setIcon(QMessageBox.Warning)
msgBox.setWindowTitle(title)
msgBox.setText(text)
if extra_widget:
msgBox.setInformativeText('')
msgBox.layout().addWidget(extra_widget, 1, 1)
msgBox.exec_()

View File

@ -0,0 +1,416 @@
import os
from PyQt5.QtWidgets import (QMessageBox, QWidget, QRadioButton,
QFileDialog, QLabel, QPushButton, QGridLayout, QGroupBox, QHBoxLayout, QVBoxLayout, QLineEdit, QDateTimeEdit)
from PyQt5.QtCore import Qt, QDate
from logline_leviathan.gui.checkbox_panel import *
from logline_leviathan.gui.ui_helper import UIHelper
from logline_leviathan.database.database_manager import session_scope
from logline_leviathan.database.database_operations import DatabaseOperations
from logline_leviathan.exporter.wordlist_export import generate_wordlist
from logline_leviathan.gui.checkbox_panel import *
import shutil
import glob
class GenerateWordlistWindow(QWidget):
def __init__(self, app):
super().__init__()
self.app = app
self.checkboxPanel = CheckboxPanel()
self.database_operations = DatabaseOperations(self, app)
self.ui_helper = UIHelper(self)
self.WordlistPath = os.path.join(os.getcwd(), 'data', 'wordlist')
os.makedirs(self.WordlistPath, exist_ok=True)
self.initialize_generate_wordlist_window(app)
self.updateCheckboxes()
def initialize_generate_wordlist_window(generate_wordlist_window, app):
generate_wordlist_window.setWindowTitle('Logline Leviathan - Wordlist-Generator')
generate_wordlist_window.mainLayout = QVBoxLayout(generate_wordlist_window)
#generate_wordlist_window.extendedLayout = QHBoxLayout(generate_wordlist_window)
generate_wordlist_window.db_session = None
stylesheet = """
/* Style for the main window */
QWidget {
background-color: #282C34; /* Dark grey background */
color: white; /* White text */
}
/* Style for buttons */
QPushButton {
background-color: #4B5563; /* Dark grey background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 50px;
min-height: 15px;
}
QPushButton:hover {
background-color: #6E6E6E; /* Slightly lighter grey on hover */
}
QPushButton:pressed {
background-color: #484848; /* Even darker grey when pressed */
}
"""
highlited_button_style = """
QPushButton {
background-color: #3C8CCE; /* Lighter blue background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 50px;
min-height: 15px;
}
QPushButton:hover {
background-color: #7EC0EE; /* Even lighter blue on hover */
}
QPushButton:pressed {
background-color: #4A86E8; /* Slightly darker blue when pressed */
}
"""
generate_wordlist_window.setStyleSheet(stylesheet)
generate_wordlist_window.statusLabel = QLabel(' Erwarte Selektion der Entitätentypen', generate_wordlist_window)
generate_wordlist_window.statusLabel.setWordWrap(True)
generate_wordlist_window.statusLabel.setMinimumHeight(40)
generate_wordlist_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
generate_wordlist_window.mainLayout.addWidget(generate_wordlist_window.statusLabel)
# Create a GroupBox for the CheckboxPanel
exportOptionsGroupBox = QGroupBox("SELEKTION", generate_wordlist_window)
exportOptionsLayout = QVBoxLayout(exportOptionsGroupBox)
generate_wordlist_window.checkboxPanel = CheckboxPanel()
# Create a horizontal layout
filterLayout = QHBoxLayout()
# Create the "Check All" button
checkAllButton = QPushButton("Alle markieren", generate_wordlist_window)
checkAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.checkAllVisible())
# Create the "Uncheck All" button
uncheckAllButton = QPushButton("Keine markieren", generate_wordlist_window)
uncheckAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.uncheckAllVisible())
expandAllButton = QPushButton("Expandieren", generate_wordlist_window)
expandAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.expandAllTreeItems())
collapseAllButton = QPushButton("Komprimieren", generate_wordlist_window)
collapseAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.collapseAllTreeItems())
# Add buttons to the filter layout, to the left of the filter label
filterLayout.addWidget(checkAllButton)
filterLayout.addWidget(uncheckAllButton)
filterLayout.addWidget(expandAllButton)
filterLayout.addWidget(collapseAllButton)
# Create the label for the filter
filterLabel = QLabel("Filtern:")
filterLayout.addWidget(filterLabel) # Add label to the horizontal layout
# Add Text Input for Filtering
filterLineEdit = QLineEdit(generate_wordlist_window)
filterLineEdit.setPlaceholderText(" nach Typ, Tooltip oder Kurzbezeichnung filtern...")
filterLineEdit.setStyleSheet("""
QLineEdit {
background-color: #3C4043; /* Background color */
color: white; /* Text color */
min-height: 20px;
}
""")
filterLayout.addWidget(filterLineEdit) # Add line edit to the horizontal layout
exportOptionsLayout.addLayout(filterLayout) # Add the horizontal layout to the export options layout
# Add CheckboxPanel to the GroupBox's Layout
exportOptionsLayout.addWidget(generate_wordlist_window.checkboxPanel)
# Connect the textChanged signal of QLineEdit to a new method
filterLineEdit.textChanged.connect(generate_wordlist_window.checkboxPanel.filterCheckboxes)
generate_wordlist_window.mainLayout.addWidget(exportOptionsGroupBox)
copyWordlistToParserDirButton = QPushButton('Soeben generierte Wordlist zur Analyse hinzufügen (kopiert erzeugte Datei)', generate_wordlist_window)
copyWordlistToParserDirButton.clicked.connect(generate_wordlist_window.copyWordlistToParserDir)
generate_wordlist_window.mainLayout.addWidget(copyWordlistToParserDirButton)
purgeWordlistEntriesButton = QPushButton('Alte Wordlist-Eintraege aus Datenbank entfernen (empfohlen, sofern neue Wordlist generiert)', generate_wordlist_window)
purgeWordlistEntriesButton.clicked.connect(generate_wordlist_window.purgeWordlistEntries)
generate_wordlist_window.mainLayout.addWidget(purgeWordlistEntriesButton)
twoWordlistButtonsLayout = QHBoxLayout()
openActiveWordlistButton = QPushButton('Bestehende Wordlist oeffnen', generate_wordlist_window)
openActiveWordlistButton.clicked.connect(generate_wordlist_window.openActiveWordlist)
twoWordlistButtonsLayout.addWidget(openActiveWordlistButton)
deleteActiveWordlistButton = QPushButton('Bestehende Wordlist entfernen', generate_wordlist_window)
deleteActiveWordlistButton.clicked.connect(generate_wordlist_window.deleteActiveWordlist)
twoWordlistButtonsLayout.addWidget(deleteActiveWordlistButton)
generate_wordlist_window.mainLayout.addLayout(twoWordlistButtonsLayout)
# Exit Button Layout
bottomLayout = QGridLayout()
generate_wordlist_window.openWordlistPathButton = QPushButton('Wordlist-Dateipfad...', generate_wordlist_window)
generate_wordlist_window.openWordlistPathButton.clicked.connect(generate_wordlist_window.openWordlistPath)
bottomLayout.addWidget(generate_wordlist_window.openWordlistPathButton, 3, 1)
# Start Export Button
generate_wordlist_window.startExportButton = QPushButton('Wordlist erstellen', generate_wordlist_window)
generate_wordlist_window.startExportButton.clicked.connect(generate_wordlist_window.start_export_process)
generate_wordlist_window.startExportButton.setStyleSheet(highlited_button_style)
bottomLayout.addWidget(generate_wordlist_window.startExportButton, 3, 2)
# Output File Directory
generate_wordlist_window.selectOutputFileButton = QPushButton('Wordlist-Ausgabepfad setzen...', generate_wordlist_window)
generate_wordlist_window.selectOutputFileButton.clicked.connect(generate_wordlist_window.selectOutputFile)
bottomLayout.addWidget(generate_wordlist_window.selectOutputFileButton, 4, 1)
# Exit Button
generate_wordlist_window.exitButton = QPushButton('Schließen', generate_wordlist_window)
generate_wordlist_window.exitButton.clicked.connect(generate_wordlist_window.close)
bottomLayout.addWidget(generate_wordlist_window.exitButton, 4, 2)
generate_wordlist_window.crossmatchesCheckbox = QCheckBox('Nur Kreuztreffer (Entitäten, die in mehreren Dateien vorkommen)', generate_wordlist_window)
bottomLayout.addWidget(generate_wordlist_window.crossmatchesCheckbox, 0, 1)
generate_wordlist_window.timestampFilterCheckbox = QCheckBox('Nach Zeitstempel filtern:', generate_wordlist_window)
generate_wordlist_window.startDateEdit = QDateTimeEdit(generate_wordlist_window)
generate_wordlist_window.startDateEdit.setCalendarPopup(True)
generate_wordlist_window.startDateEdit.setDate(QDate.currentDate())
generate_wordlist_window.endDateEdit = QDateTimeEdit(generate_wordlist_window)
generate_wordlist_window.endDateEdit.setCalendarPopup(True)
generate_wordlist_window.endDateEdit.setDate(QDate.currentDate())
generate_wordlist_window.timestampFilterQHBoxLayout = QHBoxLayout()
generate_wordlist_window.timestampFilterQHBoxLayout.addWidget(generate_wordlist_window.timestampFilterCheckbox)
generate_wordlist_window.timestampFilterQHBoxLayout.addWidget(generate_wordlist_window.startDateEdit)
generate_wordlist_window.timestampFilterQHBoxLayout.addWidget(generate_wordlist_window.endDateEdit)
bottomLayout.addLayout(generate_wordlist_window.timestampFilterQHBoxLayout, 1, 1)
generate_wordlist_window.flaggedEntriesLayout = QHBoxLayout()
generate_wordlist_window.flaggedEntriesCheckbox = QCheckBox('Markierte Einträge berücksichtigen', generate_wordlist_window)
generate_wordlist_window.flaggedEntriesLayout.addWidget(generate_wordlist_window.flaggedEntriesCheckbox)
generate_wordlist_window.flaggedRadioButtonLayout = QHBoxLayout()
generate_wordlist_window.flaggedRadioButton = QRadioButton('Nur markierte Einträge')
generate_wordlist_window.notflaggedRadioButton = QRadioButton('Nur nicht markierte Einträge')
generate_wordlist_window.flaggedRadioButtonLayout.addWidget(generate_wordlist_window.flaggedRadioButton)
generate_wordlist_window.flaggedRadioButtonLayout.addWidget(generate_wordlist_window.notflaggedRadioButton)
generate_wordlist_window.flaggedRadioButton.setChecked(True)
generate_wordlist_window.flaggedEntriesLayout.addLayout(generate_wordlist_window.flaggedRadioButtonLayout)
bottomLayout.addLayout(generate_wordlist_window.flaggedEntriesLayout, 2, 1)
# Output File Path Label
generate_wordlist_window.WordlistPathLabel = QLabel('', generate_wordlist_window)
generate_wordlist_window.updateWordlistPathLabel() # Call this method to set the initial text
bottomLayout.addWidget(generate_wordlist_window.WordlistPathLabel, 0, 2)
generate_wordlist_window.mainLayout.addLayout(bottomLayout)
generate_wordlist_window.setLayout(generate_wordlist_window.mainLayout)
def updateCheckboxes(self):
with session_scope() as session:
self.checkboxPanel.updateCheckboxes(session)
def getSelectedCheckboxes(self):
selected_checkboxes = []
def traverseTreeItems(treeItem):
if treeItem.checkState(0) == Qt.Checked:
selected_checkboxes.append(treeItem)
for i in range(treeItem.childCount()):
traverseTreeItems(treeItem.child(i))
for i in range(self.checkboxPanel.treeWidget.topLevelItemCount()):
traverseTreeItems(self.checkboxPanel.treeWidget.topLevelItem(i))
return selected_checkboxes
def updateWordlistPathLabel(self):
outputDirPath = os.path.dirname(self.WordlistPath)
display_text = f'{outputDirPath}/'
self.WordlistPathLabel.setText(display_text)
def openWordlistPath(self):
outputDirPath = os.path.dirname(self.WordlistPath)
wordlistPath = os.path.join(outputDirPath, 'wordlist')
self.ui_helper.openFile(wordlistPath)
def selectOutputFile(self):
options = QFileDialog.Options()
output_format = self.outputFormatList.currentItem().text().lower()
extension_map = {'html': '.html', 'xlsx': '.xlsx'}
default_extension = extension_map.get(output_format, '')
selected_file, _ = QFileDialog.getSaveFileName(
self,
"Selektieren des Ziel-Dateipfads",
self.WordlistPath,
f"{output_format.upper()} Files (*{default_extension});;All Files (*)",
options=options
)
if selected_file:
if not selected_file.endswith(default_extension):
selected_file += default_extension
self.WordlistPath = selected_file
self.outputDir = os.path.dirname(selected_file)
self.updateWordlistPathLabel()
def get_unique_filename(self, base_path):
directory, filename = os.path.split(base_path)
name, extension = os.path.splitext(filename)
counter = 1
new_path = base_path
while os.path.exists(new_path):
new_filename = f"{name}_{counter}{extension}"
new_path = os.path.join(directory, new_filename)
counter += 1
return new_path
def copyWordlistToParserDir(self):
try:
# Path to the parser directory
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
# Ensure the parser directory exists
os.makedirs(parser_dir, exist_ok=True)
# Find the newest .txt file in the WordlistPath directory
list_of_files = glob.glob(os.path.join(self.WordlistPath, '*.txt'))
if not list_of_files:
raise FileNotFoundError("No .txt files found in the WordlistPath directory.")
newest_file = max(list_of_files, key=os.path.getctime)
# Destination file path
destination_file = os.path.join(parser_dir, 'generated_wordlist.txt')
# Copy and overwrite the newest file to the destination
shutil.copy2(newest_file, destination_file)
self.statusLabel.setText(f" Wordlist erfolgreich kopiert nach {destination_file}")
except Exception as e:
self.message("Fehler beim kopieren", f"Fehler beim kopieren: {str(e)}")
def openActiveWordlist(self):
try:
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
wordlist_file = os.path.join(parser_dir, 'generated_wordlist.txt')
if os.path.exists(wordlist_file):
self.ui_helper.openFile(wordlist_file)
else:
raise FileNotFoundError("Wordlist file not found.")
except Exception as e:
self.message("Fehler beim Öffnen", f"Fehler beim Öffnen: {str(e)}")
def deleteActiveWordlist(self):
try:
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
wordlist_file = os.path.join(parser_dir, 'generated_wordlist.txt')
if os.path.exists(wordlist_file):
os.remove(wordlist_file)
self.statusLabel.setText(" Wordlist erfolgreich gelöscht.")
else:
raise FileNotFoundError("Wordlist file not found.")
except Exception as e:
self.message("Fehler beim Löschen", f"Fehler beim Löschen: {str(e)}")
def start_export_process(self):
# Base filename for the wordlist file
base_filename = "wordlist.txt"
# Construct the full path with the base filename
full_output_path = os.path.join(self.WordlistPath, base_filename)
# Generate a unique filename to avoid overwriting existing files
unique_output_path = self.get_unique_filename(full_output_path)
# Retrieve dates from QDateEdit widgets
start_date = self.startDateEdit.date().toPyDate() if self.timestampFilterCheckbox.isChecked() else None
end_date = self.endDateEdit.date().toPyDate() if self.timestampFilterCheckbox.isChecked() else None
include_flagged = self.flaggedEntriesCheckbox.isChecked()
only_flagged = self.flaggedRadioButton.isChecked()
only_unflagged = self.notflaggedRadioButton.isChecked()
try:
with session_scope() as session:
selected_checkboxes = self.getSelectedCheckboxes()
if not selected_checkboxes:
self.message("Generieren nicht möglich", "Keine Typen selektiert. Auswahl vornehmen.")
return
only_crossmatches = self.crossmatchesCheckbox.isChecked()
# Call the generate_wordlist function with timestamp parameters
generate_wordlist(unique_output_path, session, selected_checkboxes, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
self.statusLabel.setText(f" Generierte Liste gespeichert unter {unique_output_path}")
except Exception as e:
self.statusLabel.setText(f" Fehler beim speichern: {str(e)}")
logging.error(f"Export Error: {str(e)}")
def purgeWordlistEntries(self):
self.database_operations.purgeWordlistEntries()
self.updateCheckboxes()
def message(self, title, text, extra_widget=None):
msgBox = QMessageBox()
msgBox.setStyleSheet("""
QMessageBox {
background-color: #282C34; /* Dark grey background */
}
QLabel {
color: white; /* White text */
}
QPushButton {
color: white; /* White text for buttons */
background-color: #4B5563; /* Dark grey background for buttons */
border-style: solid;
border-width: 2px;
border-radius: 5px;
border-color: #4A4A4A;
padding: 6px;
min-width: 80px;
min-height: 30px;
}
""")
msgBox.setIcon(QMessageBox.Warning)
msgBox.setWindowTitle(title)
msgBox.setText(text)
if extra_widget:
msgBox.setInformativeText('')
msgBox.layout().addWidget(extra_widget, 1, 1)
msgBox.exec_()

View File

@ -0,0 +1,306 @@
import os
from PyQt5.QtWidgets import (QGridLayout, QPushButton, QLabel, QHBoxLayout, QApplication,
QVBoxLayout, QProgressBar, QGroupBox)
from PyQt5.QtGui import QPixmap
from PyQt5.QtCore import Qt
import logline_leviathan.gui.versionvars as versionvars
from logline_leviathan.gui.query_window import QueryLineEdit
QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
def initialize_main_window(main_window, app):
main_window.setWindowTitle('Logline Leviathan')
main_window.mainLayout = QVBoxLayout(main_window)
#main_window.extendedLayout = QHBoxLayout(main_window)
main_window.db_session = None
# Logo
pixmap = QPixmap(os.path.join('logline_leviathan', 'gui', 'logo.png'))
scaled_pixmap = pixmap.scaled(400, 400, Qt.KeepAspectRatio, Qt.SmoothTransformation)
logoLabel = QLabel(main_window)
logoLabel.setPixmap(scaled_pixmap)
# Version label
versionLabel = QLabel(versionvars.version_string, main_window) # Replace X.X.X with your actual version number
versionLabel.setAlignment(Qt.AlignLeft | Qt.AlignVCenter)
# Horizontal layout
hbox = QHBoxLayout()
hbox.addWidget(versionLabel) # Add version label to the left
hbox.addStretch() # Add stretchable space between the version label and logo
hbox.addWidget(logoLabel, alignment=Qt.AlignRight) # Add logo label to the right
# Add horizontal layout to the main layout
main_window.mainLayout.addLayout(hbox)
stylesheet = """
/* Style for the main window */
QWidget {
background-color: #282C34; /* Dark grey background */
color: white; /* White text */
}
/* Style for buttons */
QPushButton {
background-color: #4B5563; /* Dark grey background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 60px;
min-height: 15px;
}
QPushButton:hover {
background-color: #6E6E6E; /* Slightly lighter grey on hover */
}
QPushButton:pressed {
background-color: #484848; /* Even darker grey when pressed */
}
"""
highlited_button_style = """
QPushButton {
background-color: #3C8CCE; /* Lighter blue background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 50px;
min-height: 15px;
}
QPushButton:hover {
background-color: #7EC0EE; /* Even lighter blue on hover */
}
QPushButton:pressed {
background-color: #4A86E8; /* Slightly darker blue when pressed */
}
"""
main_window.setStyleSheet(stylesheet)
# Data Ingestion Settings Label
main_window.dataIngestionLabel = QLabel(' Willkommen beim LoglineLeviathan - Analyse/Export-Modul.\n Der Quick-Start-Button ermöglicht eine schnelle Selektion der zu analysierenden Daten.\n Nach Abschluss der Selektion über den Abbrechen-Button startet die Analyse sofort.')
main_window.dataIngestionLabel.setWordWrap(True)
main_window.dataIngestionLabel.setMinimumHeight(60)
main_window.dataIngestionLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
# Quick Start Button
quickStartButton = QPushButton('Quick Start', main_window)
quickStartButton.setStyleSheet(highlited_button_style)
quickStartButton.setFixedSize(270, 55)
quickStartButton.clicked.connect(main_window.quickStartWorkflow)
# Horizontal layout for label and button
hBoxLayout = QHBoxLayout()
hBoxLayout.addWidget(quickStartButton)
hBoxLayout.addWidget(main_window.dataIngestionLabel)
# Add horizontal layout to the main layout
main_window.mainLayout.addLayout(hBoxLayout)
# Grid Layout for Top Buttons
topButtonGridLayout = QGridLayout()
# Create Buttons
main_window.openButton = QPushButton('Einzelne Dateien selektieren...', main_window)
main_window.openButton.clicked.connect(main_window.openFileNameDialog)
main_window.addDirButton = QPushButton('Pfad zur rekursiven Analyse selektieren...', main_window)
main_window.addDirButton.clicked.connect(main_window.openDirNameDialog)
main_window.openFileSettingsButton = QPushButton('Selektierte Dateien...', main_window)
main_window.openFileSettingsButton.clicked.connect(lambda: main_window.openFileSettingsWindow())
main_window.createDbButton = QPushButton('Lokale Datenbank neu erstellen', main_window)
main_window.createDbButton.clicked.connect(main_window.purgeDatabase)
main_window.importDbButton = QPushButton('Existierende Datenbank importieren...', main_window)
main_window.importDbButton.clicked.connect(main_window.importDatabase)
main_window.exportDBButton = QPushButton('Lokale Datenbank speichern/exportieren...', main_window)
main_window.exportDBButton.clicked.connect(main_window.exportDatabase)
main_window.openAnalysisSettingsButton = QPushButton('Analyse-Einstellungen...', main_window)
main_window.openAnalysisSettingsButton.clicked.connect(lambda: main_window.openAnalysisSettingsWindow())
main_window.processButton = QPushButton('Verarbeitung beginnen', main_window)
main_window.processButton.setStyleSheet(highlited_button_style)
main_window.processButton.clicked.connect(main_window.processFiles)
main_window.abortAnalysisButton = QPushButton('Verarbeitung abbrechen', main_window)
main_window.abortAnalysisButton.clicked.connect(main_window.abortAnalysis)
# Create GroupBoxes
fileSelectionGroup = QGroupBox("Datenselektion")
databaseGroup = QGroupBox("Datenbank - Management")
analysisGroup = QGroupBox("Analyse - Management")
# Create Layouts for each GroupBox
fileSelectionLayout = QVBoxLayout()
databaseLayout = QVBoxLayout()
analysisLayout = QVBoxLayout()
# Add Buttons to their respective Layout
fileSelectionLayout.addWidget(main_window.openButton)
fileSelectionLayout.addWidget(main_window.addDirButton)
fileSelectionLayout.addWidget(main_window.openFileSettingsButton)
databaseLayout.addWidget(main_window.createDbButton)
databaseLayout.addWidget(main_window.importDbButton)
databaseLayout.addWidget(main_window.exportDBButton)
analysisLayout.addWidget(main_window.openAnalysisSettingsButton)
analysisLayout.addWidget(main_window.processButton)
analysisLayout.addWidget(main_window.abortAnalysisButton)
# Set Layouts to GroupBoxes
fileSelectionGroup.setLayout(fileSelectionLayout)
databaseGroup.setLayout(databaseLayout)
analysisGroup.setLayout(analysisLayout)
# Add GroupBoxes to Grid
topButtonGridLayout.addWidget(fileSelectionGroup, 0, 0)
topButtonGridLayout.addWidget(databaseGroup, 0, 1)
topButtonGridLayout.addWidget(analysisGroup, 0, 2)
# Set uniform spacing
topButtonGridLayout.setHorizontalSpacing(20)
topButtonGridLayout.setVerticalSpacing(10)
# Add the Grid Layout to the Main Layout
main_window.mainLayout.addLayout(topButtonGridLayout)
# Progress Bar, Status Label, Entity Rate Label, File Count Label
main_window.progressBar = QProgressBar(main_window)
main_window.mainLayout.addWidget(main_window.progressBar)
main_window.statusLabel = QLabel(' Bereit // Analyse starten oder Export generieren', main_window)
main_window.statusLabel.setWordWrap(True)
main_window.statusLabel.setMinimumHeight(40)
main_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
main_window.mainLayout.addWidget(main_window.statusLabel)
main_window.entityRateLabel = QLabel(' Bereit // Analyse starten oder Export generieren', main_window)
main_window.mainLayout.addWidget(main_window.entityRateLabel)
main_window.fileCountLabel = QLabel(' Keine Dateien selektiert', main_window)
main_window.fileCountLabel.setMinimumHeight(40)
main_window.fileCountLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
main_window.mainLayout.addWidget(main_window.fileCountLabel)
# Create the new QGroupBox for Database Query
databaseQueryGroupBox = QGroupBox("Datensatz durchsuchen", main_window)
databaseQueryLayout = QVBoxLayout(databaseQueryGroupBox)
databaseQueryLayout.setAlignment(Qt.AlignTop)
# Create QLineEdit for text input
databaseQueryLineEdit = QueryLineEdit(main_window)
databaseQueryLineEdit.setPlaceholderText(" Suchbegriff...")
databaseQueryLineEdit.setStyleSheet("""
QLineEdit {
background-color: #3C4043;
color: white;
min-height: 20px;
}
""")
databaseQueryLineEdit.returnPressed.connect(lambda: main_window.execute_query_wrapper(databaseQueryLineEdit.text()))
databaseQueryLabel = QLabel("\nIntelligentes Durchsuchen der Datenbank nach jeglichem Suchbegriff. Die Nutzung von Suchoperatoren +, - und '' ist möglich. Als Suchbegriffe können jegliche Entitäten, aber auch Dateinamen oder Sätze im Kontext, sowie Entitätentyp-Kurzbezeichnungen (s. rechts) verwendet werden.\n\n", main_window)
databaseQueryLabel.setWordWrap(True)
# Create QPushButton for executing the query
executeQueryButton = QPushButton("Ausführen", main_window)
executeQueryButton.clicked.connect(lambda: main_window.execute_query_wrapper(databaseQueryLineEdit.text()))
main_window.databaseStatusLabel = QLabel(" Datenbank noch nicht initialisiert", main_window)
# Add QLineEdit and QPushButton to the QVBoxLayout
databaseQueryLayout.addWidget(databaseQueryLineEdit)
databaseQueryLayout.addWidget(databaseQueryLabel)
databaseQueryLayout.addWidget(executeQueryButton)
databaseQueryLayout.addWidget(main_window.databaseStatusLabel)
# Set the QVBoxLayout as the layout for the QGroupBox
databaseQueryGroupBox.setLayout(databaseQueryLayout)
databaseContentsGroupBox = QGroupBox("Datensatz", main_window)
databaseContentsLayout = QHBoxLayout(databaseContentsGroupBox)
databaseContentSwitchLayout = QVBoxLayout()
expandAllButton = QPushButton("Expandieren", main_window)
expandAllButton.clicked.connect(lambda: main_window.databaseTree.expandAllTreeItems())
collapseAllButton = QPushButton("Komprimieren", main_window)
collapseAllButton.clicked.connect(lambda: main_window.databaseTree.collapseAllTreeItems())
databaseContentSwitchLayout.addWidget(expandAllButton)
databaseContentSwitchLayout.addWidget(collapseAllButton)
databaseContentSwitchLayout.setAlignment(Qt.AlignTop)
databaseContentsLayout.addWidget(main_window.databaseTree)
databaseContentsLayout.addLayout(databaseContentSwitchLayout)
generationOptionsGroupBox = QGroupBox("Generator - Selektion", main_window)
generationOptionsLayout = QVBoxLayout(generationOptionsGroupBox)
generationOptionsLayout.setAlignment(Qt.AlignTop)
# Corrected button creation
openGenerateReportWindowButton = QPushButton("Report-Datei generieren", main_window)
openGenerateReportWindowButton.clicked.connect(main_window.openGenerateReportWindow)
openGenerateReportWindowButtonDescriptor = QLabel("REPORT-DATEI GENERIEREN:\nGeneriert eine Report-Datei, die einfach extern geteilt oder inspiziert werden kann. Im Auswahlfenster sind detaillierte Einstellungen verfügbar.\n", main_window)
openGenerateReportWindowButtonDescriptor.setWordWrap(True)
openGenerateWordlistButton = QPushButton("Wortliste generieren", main_window)
openGenerateWordlistButton.clicked.connect(main_window.openGenerateWordlistWindow)
openGenerateWordlistButtonDescriptor = QLabel("WORTLISTE GENERIEREN:\nGeneriert eine Wortliste, die entweder für die Analyse (auch für weitere Datensätze) genutzt werden, oder extern weiterverwendet werden kann. Im Auswahlfenster sind detaillierte Einstellungen verfügbar.\n\n\n", main_window)
openGenerateWordlistButtonDescriptor.setWordWrap(True)
generationOptionsLayout.addWidget(openGenerateReportWindowButton)
generationOptionsLayout.addWidget(openGenerateReportWindowButtonDescriptor)
generationOptionsLayout.addWidget(openGenerateWordlistButton)
generationOptionsLayout.addWidget(openGenerateWordlistButtonDescriptor)
# Create a new QGridLayout for arranging QGroupBoxes
groupBoxLayout = QGridLayout()
databaseQueryGroupBox.setFixedWidth(300)
databaseContentsGroupBox.setFixedWidth(500)
generationOptionsGroupBox.setFixedWidth(300)
# Add databaseQueryGroupBox to the grid layout
groupBoxLayout.addWidget(databaseQueryGroupBox, 0, 0)
groupBoxLayout.addWidget(databaseContentsGroupBox, 0, 1)
groupBoxLayout.addWidget(generationOptionsGroupBox, 0, 2)
# Link to GitHub Repo
main_window.githubLink = QLabel(f'<a href="{versionvars.repo_link}">{versionvars.repo_link_text}</a>', main_window)
main_window.githubLink.setOpenExternalLinks(True)
main_window.openLogDirButton = QPushButton('Log-Verzeichnis', main_window)
main_window.openLogDirButton.clicked.connect(main_window.openLogDir)
# Exit Button
main_window.exitButton = QPushButton('Beenden', main_window)
main_window.exitButton.clicked.connect(main_window.close)
groupBoxLayout.addWidget(main_window.githubLink, 1, 1)
groupBoxLayout.addWidget(main_window.openLogDirButton, 1, 0)
groupBoxLayout.addWidget(main_window.exitButton, 1, 2)
# Add this grid layout to the main layout of the main window
main_window.mainLayout.addLayout(groupBoxLayout)
main_window.update()

View File

@ -0,0 +1,339 @@
from PyQt5.QtWidgets import QRadioButton, QDateTimeEdit, QVBoxLayout, QCheckBox, QHBoxLayout, QGroupBox, QPushButton, QLineEdit, QGridLayout, QLabel, QListWidget, QGridLayout
from PyQt5.QtCore import QDate
import logline_leviathan.gui.versionvars as versionvars
from logline_leviathan.gui.checkbox_panel import *
def initialize_generate_report_window(generate_report_window, app):
generate_report_window.setWindowTitle('Logline Leviathan - Report - Generator')
generate_report_window.mainLayout = QVBoxLayout(generate_report_window)
#generate_report_window.extendedLayout = QHBoxLayout(generate_report_window)
generate_report_window.db_session = None
stylesheet = """
/* Style for the main window */
QWidget {
background-color: #282C34; /* Dark grey background */
color: white; /* White text */
}
/* Style for buttons */
QPushButton {
background-color: #4B5563; /* Dark grey background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 50px;
min-height: 15px;
}
QPushButton:hover {
background-color: #6E6E6E; /* Slightly lighter grey on hover */
}
QPushButton:pressed {
background-color: #484848; /* Even darker grey when pressed */
}
"""
highlited_button_style = """
QPushButton {
background-color: #3C8CCE; /* Lighter blue background */
color: white; /* White text */
border-style: outset;
border-width: 2px;
border-radius: 1px; /* Rounded corners */
border-color: #4A4A4A;
padding: 6px;
min-width: 50px;
min-height: 15px;
}
QPushButton:hover {
background-color: #7EC0EE; /* Even lighter blue on hover */
}
QPushButton:pressed {
background-color: #4A86E8; /* Slightly darker blue when pressed */
}
"""
# Update function for output format selection label with custom text and line breaks
def update_output_format_label(current):
if current is not None:
format_text = current.text()
format_descriptions = {
'HTML': " HTML\n Generiert eine einzelne HTML-Datei, die einfach\n geteilt und mit jedem Browser geöffnet werden kann.\n Geeignet für Übersichtsanalyse, sofern der Datensatz nicht zu\n umfangreich ist.",
'Interactive HTML': " Interaktive HTML-Datei.\n Generiert eine einzelne HTML-Datei, die mit einem Webbrowser,\n der JavaScript unterstützt, angezeigt wird.\n Geeignet für umfangreichere Datensätze.",
'XLSX': " XLSX\n Exportiert Daten in eine Excel-Datei.\n Schreibe sämtliche Entitätentypen in\n separate Sheets, unterstützt keine visuelle Hervorhebung.\n Geeignet für weitere Analyse über MS-Excel."
}
generate_report_window.outputFormatSelectionLabel.setText(format_descriptions.get(format_text, ""))
def update_export_context_label(current):
if current is not None:
context_text = current.text() # Get the text of the current item
context_descriptions = {
"Kontext - gleiche Zeile": " Kontext - gleiche Zeile\n Eine Zeile pro Fundstelle einer Entität.\n Der Kontext innerhalb der gleichen Zeile\n wird dargestellt.\n",
"Kontext - mittelgroß": " Kontext - mittelgroß\n Eine Zeile pro Fundstelle einer Entität.\n Der Kontext von +/- 8 Zeilen wird dargestellt.\n",
"Kontext - umfangreich": " Kontext - umfangreich\n Eine Zeile pro Fundstelle einer Entität.\n Der Kontext von +/- 15 Zeilen wird dargestellt.\n",
"Kompakte Zusammenfassung ohne Kontext": " Kompakte Zusammenfassung ohne Kontext\n Listet Entitäten untereinander auf.\n Die jeweiligen Fundstellen werden komprimiert\n dargestellt.\n Kontext wird nicht unterstützt."
}
generate_report_window.exportContextSelectionLabel.setText(context_descriptions.get(context_text, ""))
generate_report_window.setStyleSheet(stylesheet)
generate_report_window.statusLabel = QLabel(' Erwarte Selektion von Entitätentypen, die im Export dargestellt werden.', generate_report_window)
generate_report_window.statusLabel.setWordWrap(True)
generate_report_window.statusLabel.setMinimumHeight(40)
generate_report_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
generate_report_window.mainLayout.addWidget(generate_report_window.statusLabel)
# Create a GroupBox for the CheckboxPanel
exportOptionsGroupBox = QGroupBox("Typen - Selektion", generate_report_window)
exportOptionsLayout = QVBoxLayout(exportOptionsGroupBox)
generate_report_window.checkboxPanel = CheckboxPanel()
# Checkbox Panel Filter Layout
checkboxFilterLayout = QHBoxLayout()
# Create the "Check All" button
checkAllButton = QPushButton("Alle markieren", generate_report_window)
checkAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.checkAllVisible())
# Create the "Uncheck All" button
uncheckAllButton = QPushButton("Keine markieren", generate_report_window)
uncheckAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.uncheckAllVisible())
expandAllButton = QPushButton("Expandieren", generate_report_window)
expandAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.expandAllTreeItems())
collapseAllButton = QPushButton("Komprimieren", generate_report_window)
collapseAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.collapseAllTreeItems())
checkboxFilterLayout.addWidget(checkAllButton)
checkboxFilterLayout.addWidget(uncheckAllButton)
checkboxFilterLayout.addWidget(expandAllButton)
checkboxFilterLayout.addWidget(collapseAllButton)
checkboxFilterLabel = QLabel("Filtern:")
checkboxFilterLayout.addWidget(checkboxFilterLabel)
checkboxFilterLineEdit = QLineEdit(generate_report_window)
checkboxFilterLineEdit.setPlaceholderText(" nach Typ, Tooltip oder Kurzbezeichnung filtern...")
checkboxFilterLineEdit.setStyleSheet("""
QLineEdit {
background-color: #3C4043;
color: white;
min-height: 20px;
}
""")
checkboxFilterLayout.addWidget(checkboxFilterLineEdit)
# Connect the textChanged signal of QLineEdit to the filter method
checkboxFilterLineEdit.textChanged.connect(generate_report_window.checkboxPanel.filterCheckboxes)
exportOptionsLayout.addLayout(checkboxFilterLayout)
exportOptionsLayout.addWidget(generate_report_window.checkboxPanel)
# Create a GroupBox for the FileCheckboxPanel
fileSelectionGroupBox = QGroupBox("Dateien - Selektion", generate_report_window)
fileExportOptionsLayout = QVBoxLayout(fileSelectionGroupBox)
generate_report_window.fileCheckboxPanel = FileCheckboxPanel()
# File Checkbox Panel Filter Layout
fileCheckboxFilterLayout = QHBoxLayout()
# Create the "Check All" button
filCheckAllButton = QPushButton("Alle markieren", generate_report_window)
filCheckAllButton.clicked.connect(lambda: generate_report_window.fileCheckboxPanel.checkAllVisible())
# Create the "Uncheck All" button
fileUncheckAllButton = QPushButton("Keine markieren", generate_report_window)
fileUncheckAllButton.clicked.connect(lambda: generate_report_window.fileCheckboxPanel.uncheckAllVisible())
fileCheckboxFilterLayout.addWidget(filCheckAllButton)
fileCheckboxFilterLayout.addWidget(fileUncheckAllButton)
fileFilterLabel = QLabel("Filtern:")
fileCheckboxFilterLayout.addWidget(fileFilterLabel)
fileFilterLineEdit = QLineEdit(generate_report_window)
fileFilterLineEdit.setPlaceholderText(" nach Dateiname filtern...")
fileFilterLineEdit.setStyleSheet("""
QLineEdit {
background-color: #3C4043;
color: white;
min-height: 20px;
}
""")
fileCheckboxFilterLayout.addWidget(fileFilterLineEdit)
# Connect the textChanged signal of QLineEdit to the filter method
fileFilterLineEdit.textChanged.connect(generate_report_window.fileCheckboxPanel.filterCheckboxes)
fileExportOptionsLayout.addLayout(fileCheckboxFilterLayout)
fileExportOptionsLayout.addWidget(generate_report_window.fileCheckboxPanel)
# First Horizontal Layout for Database Query and Export Options
topHBoxLayout = QHBoxLayout()
topHBoxLayout.addWidget(exportOptionsGroupBox)
topHBoxLayout.addWidget(fileSelectionGroupBox)
generate_report_window.mainLayout.addLayout(topHBoxLayout)
# Export Settings as a Grid Layout
exportCustomizationLayout = QGridLayout()
item_height = 20
visible_items = 3
# Set a fixed width for both QListWidgets (adjust the width as needed)
outputFormatGroupBox = QGroupBox("Ausgabeformat - Selektion", generate_report_window)
outputFormatGroupBox.setFixedHeight(200)
outputFormatLayout = QVBoxLayout(outputFormatGroupBox)
generate_report_window.outputFormatList = QListWidget()
generate_report_window.outputFormatList.addItems(['HTML', 'Interactive HTML', 'XLSX'])
generate_report_window.outputFormatList.setCurrentRow(0)
generate_report_window.outputFormatList.setFixedHeight(item_height * visible_items)
outputFormatLayout.addWidget(generate_report_window.outputFormatList)
# Label to display current selection of output format
generate_report_window.outputFormatSelectionLabel = QLabel('')
generate_report_window.outputFormatSelectionLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
generate_report_window.outputFormatSelectionLabel.setWordWrap(True)
generate_report_window.outputFormatSelectionLabel.setFixedHeight(80)
outputFormatLayout.addWidget(generate_report_window.outputFormatSelectionLabel)
exportCustomizationLayout.addWidget(outputFormatGroupBox, 0, 0)
# Export Context Group Box
exportContextGroupBox = QGroupBox("Ausgabedatei - Aufbau", generate_report_window)
exportContextGroupBox.setFixedHeight(200)
exportContextLayout = QVBoxLayout(exportContextGroupBox)
generate_report_window.exportContextList = QListWidget()
generate_report_window.exportContextList.addItems(['Kontext - gleiche Zeile', 'Kontext - mittelgroß', 'Kontext - umfangreich', 'Kompakte Zusammenfassung ohne Kontext'])
generate_report_window.exportContextList.setCurrentRow(0)
generate_report_window.exportContextList.setFixedHeight(item_height * visible_items)
exportContextLayout.addWidget(generate_report_window.exportContextList)
# Label to display current selection of export context
generate_report_window.exportContextSelectionLabel = QLabel('')
generate_report_window.exportContextSelectionLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
generate_report_window.exportContextSelectionLabel.setWordWrap(True)
generate_report_window.exportContextSelectionLabel.setFixedHeight(80)
exportContextLayout.addWidget(generate_report_window.exportContextSelectionLabel)
exportCustomizationLayout.addWidget(exportContextGroupBox, 0, 1)
# Connect signals to the update functions
generate_report_window.outputFormatList.currentItemChanged.connect(update_output_format_label)
generate_report_window.exportContextList.currentItemChanged.connect(update_export_context_label)
# Initially update the labels
update_output_format_label(generate_report_window.outputFormatList.currentItem())
update_export_context_label(generate_report_window.exportContextList.currentItem())
# Initially update the label
update_output_format_label(generate_report_window.outputFormatList.currentItem())
# Initially update the label
update_export_context_label(generate_report_window.exportContextList.currentItem())
#exportLayout.addLayout(exportCustomizationLayout)
generate_report_window.mainLayout.addLayout(exportCustomizationLayout)
exportSettingsLayout = QGridLayout()
# Add a checkbox for Crossmatches
generate_report_window.crossmatchesCheckbox = QCheckBox('Nur Kreuztreffer (Gibt Entitäten an, die in mehreren Dateien gefunden wurden)', generate_report_window)
exportSettingsLayout.addWidget(generate_report_window.crossmatchesCheckbox, 0, 0)
generate_report_window.timestampFilterCheckbox = QCheckBox('Nach Zeitstempel filtern:', generate_report_window)
generate_report_window.startDateEdit = QDateTimeEdit(generate_report_window)
generate_report_window.startDateEdit.setCalendarPopup(True)
generate_report_window.startDateEdit.setDate(QDate.currentDate())
generate_report_window.endDateEdit = QDateTimeEdit(generate_report_window)
generate_report_window.endDateEdit.setCalendarPopup(True)
generate_report_window.endDateEdit.setDate(QDate.currentDate())
generate_report_window.timestampFilterQHBoxLayout = QHBoxLayout()
generate_report_window.timestampFilterQHBoxLayout.addWidget(generate_report_window.timestampFilterCheckbox)
generate_report_window.timestampFilterQHBoxLayout.addWidget(generate_report_window.startDateEdit)
generate_report_window.timestampFilterQHBoxLayout.addWidget(generate_report_window.endDateEdit)
exportSettingsLayout.addLayout(generate_report_window.timestampFilterQHBoxLayout, 1, 0)
generate_report_window.flaggedEntriesLayout = QHBoxLayout()
generate_report_window.flaggedEntriesCheckbox = QCheckBox('Markierte Einträge berücksichtigen', generate_report_window)
generate_report_window.flaggedEntriesLayout.addWidget(generate_report_window.flaggedEntriesCheckbox)
generate_report_window.flaggedRadioButtonLayout = QHBoxLayout()
generate_report_window.flaggedRadioButton = QRadioButton('Nur markierte Einträge')
generate_report_window.notflaggedRadioButton = QRadioButton('Nur nicht markierte Einträge')
generate_report_window.flaggedRadioButtonLayout.addWidget(generate_report_window.flaggedRadioButton)
generate_report_window.flaggedRadioButtonLayout.addWidget(generate_report_window.notflaggedRadioButton)
generate_report_window.flaggedRadioButton.setChecked(True)
generate_report_window.flaggedEntriesLayout.addLayout(generate_report_window.flaggedRadioButtonLayout)
exportSettingsLayout.addLayout(generate_report_window.flaggedEntriesLayout, 2, 0)
generate_report_window.openAfterExportCheckbox = QCheckBox('Datei nach dem Export oeffnen', generate_report_window)
exportSettingsLayout.addWidget(generate_report_window.openAfterExportCheckbox, 3, 0)
# Output File Path Label
generate_report_window.outputFilePathLabel = QLabel('', generate_report_window)
generate_report_window.updateOutputFilePathLabel() # Call this method to set the initial text
exportSettingsLayout.addWidget(generate_report_window.outputFilePathLabel, 0, 1)
generate_report_window.setOutputFileNameLineEdit = QLineEdit(generate_report_window)
generate_report_window.setOutputFileNameLineEdit.setPlaceholderText(' Eigenen Dateinamen spezifizieren...')
exportSettingsLayout.addWidget(generate_report_window.setOutputFileNameLineEdit, 3, 1)
generate_report_window.mainLayout.addLayout(exportSettingsLayout)
# Exit Button Layout
bottomLayout = QGridLayout()
#generate_report_window.customizeResultsButton = QPushButton('Customize Results (WiP)', generate_report_window)
#generate_report_window.customizeResultsButton.setDisabled(True)
#generate_report_window.customizeResultsButton.clicked.connect(generate_report_window.openCustomizeResultsDialog)
#bottomLayout.addWidget(generate_report_window.customizeResultsButton, 0, 0)
generate_report_window.openOutputFilepathButton = QPushButton('Ausgabeverzeichnis öffnen...', generate_report_window)
generate_report_window.openOutputFilepathButton.clicked.connect(generate_report_window.openOutputFilepath)
bottomLayout.addWidget(generate_report_window.openOutputFilepathButton, 0, 1)
# Start Export Button
generate_report_window.startExportButton = QPushButton('Report generieren...', generate_report_window)
generate_report_window.startExportButton.clicked.connect(generate_report_window.start_export_process)
generate_report_window.startExportButton.setStyleSheet(highlited_button_style)
bottomLayout.addWidget(generate_report_window.startExportButton, 0, 2)
# Output File Directory
generate_report_window.selectOutputFileButton = QPushButton('Ausgabeverzeichnis setzen...', generate_report_window)
generate_report_window.selectOutputFileButton.clicked.connect(generate_report_window.selectOutputFile)
bottomLayout.addWidget(generate_report_window.selectOutputFileButton, 1, 1)
# Exit Button
generate_report_window.exitButton = QPushButton('Schließen', generate_report_window)
generate_report_window.exitButton.clicked.connect(generate_report_window.close)
bottomLayout.addWidget(generate_report_window.exitButton, 1, 2)
generate_report_window.mainLayout.addLayout(bottomLayout)
#Easteregg
#generate_report_window.extendedLayout.addLayout(generate_report_window.mainLayout)
#generate_report_window.terminalEasterEgg = TerminalEasterEgg(generate_report_window)
#generate_report_window.terminalEasterEgg.hide()
#logoLabel.clicked.connect(generate_report_window.terminalEasterEgg.show)
generate_report_window.setLayout(generate_report_window.mainLayout)

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

View File

@ -0,0 +1,372 @@
import sys
import os
import logging
import shutil
import multiprocessing
import logline_leviathan.gui.versionvars as versionvars
from PyQt5.QtWidgets import QApplication, QWidget, QMessageBox, QLabel
from PyQt5.QtCore import QTimer
from logline_leviathan.file_processor.file_processor_thread import FileProcessorThread
from logline_leviathan.database.database_manager import EntityTypesTable, EntitiesTable, session_scope
from logline_leviathan.database.database_utility import DatabaseUtility
from logline_leviathan.database.database_operations import DatabaseOperations
from logline_leviathan.gui.checkbox_panel import *
from logline_leviathan.gui.initui_mainwindow import initialize_main_window
from logline_leviathan.gui.generate_report import GenerateReportWindow
from logline_leviathan.gui.generate_wordlist import GenerateWordlistWindow
from logline_leviathan.gui.ui_helper import UIHelper, format_time
from logline_leviathan.gui.settings_gui import FileSettingsWindow, AnalysisSettingsWindow
from logline_leviathan.gui.query_window import ResultsWindow
from logline_leviathan.database.query import DatabaseGUIQuery
from sqlalchemy import func
from datetime import datetime
class MainWindow(QWidget):
def __init__(self, app, db_init_func, directory=""):
super().__init__()
logging_level = getattr(logging, versionvars.loglevel, None)
if isinstance(logging_level, int):
logging.basicConfig(level=logging_level)
else:
logging.warning(f"Invalid log level: {versionvars.loglevel}")
self.app = app
self.ui_helper = UIHelper(self)
self.db_init_func = db_init_func
db_init_func()
self.database_operations = DatabaseOperations(self, db_init_func)
self.current_db_path = 'entities.db' # Default database path
self.directory = directory
self.filePaths = []
self.log_dir = os.path.join(os.getcwd(), 'output', 'entities_export', 'log')
os.makedirs(self.log_dir, exist_ok=True)
self.external_db_path = None
self.processing_thread = None
self.generate_report_window = None
self.databaseTree = DatabasePanel()
self.db_query_instance = DatabaseGUIQuery()
self.results_window = ResultsWindow(self.db_query_instance, parent=self)
self.generate_wordlist_window = GenerateWordlistWindow(self.db_query_instance)
self.generate_report_window = GenerateReportWindow(self.app)
self.analysis_settings_window = AnalysisSettingsWindow(self)
self.analysis_settings_window.parsersUpdated.connect(self.refreshApplicationState)
self.file_selection_window = FileSettingsWindow(self.filePaths, self)
self.database_operations.ensureDatabaseExists()
self.initUI()
self.ui_helper = UIHelper(self)
self.database_utility = DatabaseUtility(self)
yaml_data = self.database_operations.loadRegexFromYAML()
self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
# Load data and update checkboxes
self.refreshApplicationState()
self.database_operations.checkScriptPresence()
# Load files from the directory if specified
if self.directory and os.path.isdir(self.directory):
self.loadFilesFromDirectory(self.directory)
self.ui_update_interval = 500
self.needs_tree_update = False
self.needs_checkbox_update = False
self.update_timer = QTimer(self)
self.update_timer.timeout.connect(self.performPeriodicUpdate)
self.update_timer.start(self.ui_update_interval)
def loadFilesFromDirectory(self, directory):
for root, dirs, files in os.walk(directory):
for filename in files:
file_path = os.path.join(root, filename)
self.filePaths.append(file_path)
self.updateFileCountLabel()
def initUI(self):
initialize_main_window(self, self.app)
def openFileNameDialog(self):
self.ui_helper.openFileNameDialog()
self.file_selection_window.populateTable()
self.updateFileCountLabel()
def openDirNameDialog(self):
self.ui_helper.openDirNameDialog()
self.file_selection_window.populateTable()
self.updateFileCountLabel()
def clearFileSelection(self):
self.ui_helper.clearFileSelection()
self.file_selection_window.populateTable()
self.updateFileCountLabel()
def removeSingleFile(self, file):
self.ui_helper.removeSingleFile(file)
self.file_selection_window.populateTable()
self.updateFileCountLabel()
def refreshApplicationState(self):
#yaml_data = self.database_operations.loadRegexFromYAML()
#self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
self.processing_thread = FileProcessorThread(self.filePaths)
self.processing_thread.update_checkboxes_signal.connect(self.generate_report_window.updateCheckboxes)
self.processing_thread.update_checkboxes_signal.connect(self.generate_wordlist_window.updateCheckboxes)
self.generate_report_window.updateCheckboxes()
self.generate_wordlist_window.updateCheckboxes()
self.updateDatabaseStatusLabel()
self.updateTree()
self.updateFileCountLabel()
def updateFileCountLabel(self):
file_count = len(self.filePaths)
file_count_label = f" {file_count} Dateien selektiert"
readable_size = self.ui_helper.calculate_total_size(self.filePaths)
self.fileCountLabel.setText(file_count_label + f' // {readable_size}')
def updateTree(self):
with session_scope() as session:
self.databaseTree.updateTree(session)
def updateDatabaseStatusLabel(self):
with session_scope() as session:
entity_count = session.query(EntitiesTable).count()
db_file_path = self.current_db_path # Replace with your actual database file path
db_file_size = os.path.getsize(db_file_path)
db_file_size_mb = db_file_size / (1024 * 1024) # Convert size to MB
status_text = f"Anzahl Entitäten: {entity_count}\nDatenbank-Größe: {db_file_size_mb:.2f} MB"
self.databaseStatusLabel.setText(status_text)
def onTreeUpdateSignalReceived(self):
self.needs_tree_update = True
def onCheckboxUpdateSignalReceived(self):
self.needs_checkbox_update = True
def performPeriodicUpdate(self):
if self.needs_tree_update:
self.updateTree()
self.needs_tree_update = False
if self.needs_checkbox_update:
self.generate_report_window.updateCheckboxes()
self.generate_wordlist_window.updateCheckboxes()
self.updateTree()
self.needs_checkbox_update = False
def execute_query_wrapper(self, query_text):
self.results_window.show()
self.results_window.set_query_and_execute(query_text)
def quickStartWorkflow(self):
self.clearFileSelection()
self.purgeDatabase()
yaml_data = self.database_operations.loadRegexFromYAML()
self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
self.openDirNameDialog()
self.processFiles()
def purgeDatabase(self):
self.database_utility.purgeDatabase()
def importDatabase(self):
self.database_utility.importDatabase()
def exportDatabase(self):
self.database_utility.exportDatabase()
def processFiles(self):
try:
fileCount = len(self.filePaths)
if fileCount > 0:
self.progressBar.setMaximum(fileCount)
self.db_init_func()
self.processing_thread = FileProcessorThread(self.filePaths) # Assign the thread to processing_thread
self.processing_thread.finished.connect(self.onProcessingFinished)
self.processing_thread.update_progress.connect(self.progressBar.setValue)
self.processing_thread.update_status.connect(self.statusLabel.setText)
self.processing_thread.update_rate.connect(self.updateEntityRate)
#self.processing_thread.update_tree_signal.connect(self.updateTree)
#self.processing_thread.update_checkboxes_signal.connect(self.generate_report_window.updateCheckboxes)
#self.processing_thread.update_checkboxes_signal.connect(self.generate_wordlist_window.updateCheckboxes)
self.processing_thread.update_tree_signal.connect(self.onTreeUpdateSignalReceived)
self.processing_thread.update_checkboxes_signal.connect(self.onCheckboxUpdateSignalReceived)
self.processing_thread.start()
logging.debug(f"Thread started, isRunning: {self.processing_thread.isRunning()}")
else:
self.message("Information", "Keine Dateien Selektiert. Selektion vornehmen.")
except Exception as e:
logging.error(f"Error processing files: {e}")
def abortAnalysis(self):
if self.processing_thread and self.isProcessing():
logging.debug(f"Abort Analysis initiated.")
self.processing_thread.abort()
self.processing_thread.wait()
#self.processing_thread = None
self.statusLabel.setText(" Verarbeitung durch User unterbrochen.")
logging.info(f"Analysis aborted manually.")
self.refreshApplicationState()
def isProcessing(self):
if self.processing_thread is not None:
return self.processing_thread.isRunning()
return False
def onProcessingFinished(self):
if self.processing_thread:
summary = self.getProcessingSummary()
unsupported_files_count = self.processing_thread.getUnsupportedFilesCount()
# Generate CSV files for unprocessed and processed files
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
unprocessed_files_log = os.path.join(self.log_dir, f"{current_timestamp}_unprocessed_files_log.csv")
processed_files_log = os.path.join(self.log_dir, f"{current_timestamp}_processed_files_log.csv")
self.ui_helper.generate_files_log(unprocessed_files_log, self.processing_thread.all_unsupported_files)
processed_files = set(self.processing_thread.file_paths) - set(self.processing_thread.all_unsupported_files)
self.ui_helper.generate_files_log(processed_files_log, list(processed_files))
if unsupported_files_count > 0:
summary += f"\n{unsupported_files_count} nicht unterstützte Dateien übersprungen."
link_label = QLabel(f'<a href="#">Open list of all unsupported files...</a>')
link_label.linkActivated.connect(lambda: self.ui_helper.openFile(unprocessed_files_log))
self.message("Analyse-Zusammenfassung", summary, link_label)
else:
self.message("Analyse-Zusammenfassung", summary)
if self.external_db_path:
try:
shutil.copy('entities.db', self.external_db_path)
self.statusLabel.setText(f" Datenbank gespeichert unter: {self.external_db_path}")
except Exception as e:
logging.error(f"Error exporting database: {e}")
self.statusLabel.setText(f" Fehler beim Exportieren der Datenbank: {e}")
self.refreshApplicationState()
self.processing_thread = None
def openLogDir(self):
self.ui_helper.openFile(self.log_dir)
def getProcessingSummary(self):
with session_scope() as session:
entity_counts = session.query(EntityTypesTable.gui_name, func.count(EntitiesTable.entities_id)) \
.join(EntityTypesTable, EntitiesTable.entity_types_id == EntityTypesTable.entity_type_id) \
.group_by(EntityTypesTable.gui_name) \
.all()
summary = "Analyse-Zusammenfassung:\n\n"
for gui_name, count in entity_counts:
summary += f"{gui_name}: {count} gefunden\n"
return summary
def getUnsupportedFilesCount(self):
if self.processing_thread:
return self.processing_thread.getUnsupportedFilesCount()
return 0
def showProcessingWarning(self):
self.message("Operation unmöglich", "Diese Operation kann nicht durchgeführt werden, während Dateien analysiert werden. Warten oder abbrechen.")
def updateEntityRate(self, entity_rate, total_entities, file_rate, total_files_processed, estimated_time, data_rate_kibs):
formatted_time = format_time(estimated_time)
total_cpu_cores = multiprocessing.cpu_count()
rate_text = (f"{entity_rate:.2f} entities/second, Total: {total_entities} // "
f"{file_rate:.2f} files/second, Total: {total_files_processed} // "
f"{data_rate_kibs:.2f} KiB/s // "
f"ETC: {formatted_time} // "
f"CPU Cores: {total_cpu_cores}")
self.entityRateLabel.setText(rate_text)
def openGenerateReportWindow(self):
if self.isProcessing():
self.showProcessingWarning()
return
if not self.generate_report_window:
self.generate_report_window = GenerateReportWindow(self.app)
self.generate_report_window.show()
def openGenerateWordlistWindow(self):
if self.isProcessing():
self.showProcessingWarning()
return
if not self.generate_wordlist_window:
self.generate_wordlist_window = GenerateWordlistWindow(self.app)
self.generate_wordlist_window.show()
def openFileSettingsWindow(self):
if self.isProcessing():
self.showProcessingWarning()
return
if not self.file_selection_window:
self.file_selection_window = FileSettingsWindow(self.filePaths, self)
self.file_selection_window.show()
def openAnalysisSettingsWindow(self):
if self.isProcessing():
self.showProcessingWarning()
return
if not self.analysis_settings_window: # Use self.analysis_settings_window
self.analysis_settings_window = AnalysisSettingsWindow(self) # Use self.analysis_settings_window
self.analysis_settings_window.show() # Use self.analysis_settings_window
def message(self, title, text, extra_widget=None):
msgBox = QMessageBox()
msgBox.setStyleSheet("""
QMessageBox {
background-color: #282C34; /* Dark grey background */
}
QLabel {
color: white; /* White text */
}
QPushButton {
color: white; /* White text for buttons */
background-color: #4B5563; /* Dark grey background for buttons */
border-style: solid;
border-width: 2px;
border-radius: 5px;
border-color: #4A4A4A;
padding: 6px;
min-width: 80px;
min-height: 30px;
}
""")
msgBox.setIcon(QMessageBox.Warning)
msgBox.setWindowTitle(title)
msgBox.setText(text)
if extra_widget:
msgBox.setInformativeText('')
msgBox.layout().addWidget(extra_widget, 1, 1)
msgBox.exec_()
def main():
app = QApplication(sys.argv)
ex = MainWindow()
ex.show()
sys.exit(app.exec_())
if __name__ == '__main__':
main()

View File

@ -0,0 +1,32 @@
from PyQt5.QtWidgets import QWidget, QVBoxLayout, QTextEdit
from PyQt5.QtCore import QTimer
import random
class TerminalEasterEgg(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.initUI()
def initUI(self):
layout = QVBoxLayout(self)
self.terminal_widget = QTextEdit(self)
self.terminal_widget.setStyleSheet("background-color: black; color: green;")
self.terminal_widget.setReadOnly(True)
layout.addWidget(self.terminal_widget)
# Timer for fake prompts
self.terminal_timer = QTimer(self)
self.terminal_timer.timeout.connect(self.update_terminal)
self.terminal_timer.start(1000) # Update every second
def update_terminal(self):
fake_prompts = [
"Decrypting data...",
"Accessing secure server...",
"Running diagnostics...",
"Analyzing patterns...",
"Compiling code...",
"Scanning network...",
# Add more fake prompts as desired
]
self.terminal_widget.append(random.choice(fake_prompts))

View File

@ -0,0 +1,666 @@
from PyQt5.QtWidgets import QMessageBox, QCheckBox, QGroupBox, QDateTimeEdit,QProgressBar, QMainWindow, QTableWidget, QTableWidgetItem, QLineEdit, QStyledItemDelegate, QTextEdit, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QComboBox, QLabel
from PyQt5.QtCore import pyqtSignal, Qt, pyqtSignal, QDateTime, QThread
from PyQt5.QtGui import QTextDocument, QTextOption
import re
import logging
import html
import datetime
from logline_leviathan.database.query import DatabaseGUIQuery, QueryThread
from logline_leviathan.database.database_manager import EntitiesTable, session_scope
COLUMN_WIDTHS = [200, 100, 250, 100, 120, 600, 80, 100, 40] # Adjust these values as needed
COLUMN_NAMES = ['Distinct Entity', 'Entity Type', 'File Name', 'Line Number', 'Timestamp', 'Context', 'Match Score', 'Flag', 'Identifier']
DEFAULT_ROW_HEIGHT = 120
FILTER_EDIT_WIDTH = 150
class DataProcessor(QThread):
dataProcessed = pyqtSignal(list)
def __init__(self, total_data, search_terms, chunk_size=50):
super().__init__()
self.total_data = total_data
self.search_terms = search_terms
# Ensure chunk_size is an integer
if not isinstance(chunk_size, int):
raise ValueError(f"chunk_size must be an integer, got {type(chunk_size)}")
self.chunk_size = chunk_size
def run(self):
# Process initial chunk
for i in range(0, len(self.total_data), self.chunk_size):
chunk = sorted(self.total_data[i:i+self.chunk_size], key=lambda x: x[1], reverse=True)
self.dataProcessed.emit(chunk)
# Continue processing remaining data
remaining_data = self.total_data[self.chunk_size:]
for i in range(0, len(remaining_data), self.chunk_size):
chunk = sorted(remaining_data[i:i+self.chunk_size], key=lambda x: x[1], reverse=True)
self.dataProcessed.emit(chunk)
class ResultsWindow(QMainWindow):
def __init__(self, db_query_instance, parent=None):
super(ResultsWindow, self).__init__(parent)
self.db_query_instance = db_query_instance
self.total_data = []
self.current_filters = {}
self.query_text = None
self.database_query = DatabaseGUIQuery()
self.query_thread = QueryThread(self.db_query_instance, self.query_text)
self.sorted_results = []
self.setWindowTitle("Suchergebnis")
self.setGeometry(800, 600, 1600, 700) # Adjust size as needed
# Create central widget and set layout
centralWidget = QWidget(self)
self.setCentralWidget(centralWidget)
mainLayout = QVBoxLayout(centralWidget)
queryFieldLayout = QHBoxLayout()
self.databaseQueryLineEdit = QueryLineEdit(self)
self.databaseQueryLineEdit.setPlaceholderText(" Suchbegriff eingeben...")
self.databaseQueryLineEdit.returnPressed.connect(self.execute_query_from_results_window)
self.databaseQueryLineEdit.setStyleSheet("""
QLineEdit {
background-color: #3C4043;
color: white;
min-height: 20px;
}
""")
queryFieldLayout.addWidget(self.databaseQueryLineEdit)
# Create a progress bar for query in progress
self.query_progress_bar = QProgressBar(self)
self.query_progress_bar.setRange(0, 1) # Indeterminate mode
self.query_progress_bar.setFixedWidth(100) # Initially hidden
queryFieldLayout.addWidget(self.query_progress_bar)
executeQueryButton = QPushButton("Suche ausführen", self)
executeQueryButton.clicked.connect(self.execute_query_from_results_window)
queryFieldLayout.addWidget(executeQueryButton)
mainLayout.addLayout(queryFieldLayout)
mainLayout.addWidget(QLabel(' Die Suche nach mehreren bestimmten Begriffen, Entitätentypen (Kurzform z.B. <ipv4>), Dateinamen und Timestamps ist möglich.\n Nach erfolgreicher Abfrage der Datenbank werden die Ergebnisse tabellarisch dargestellt. Sollte die Anzahl der Suchergebnisse sehr hoch sein, dauert der Prozess einige Sekunden. Die Anzahl der Ergebnisse ist aus Performancegründen auf die besten 512 beschränkt.\n Groß- und Kleinschreibung wird bei Zitatsuchen berücksichtigt; Werden mehrere Suchbegriffe eingegeben, fließt deren Abstand und Reihenfolge ins Ergebnis mit ein.', self))
# Create a horizontal layout for filter options
filterLayout = QHBoxLayout()
mainLayout.addLayout(filterLayout)
# Updated stylesheet for the entire ResultsWindow
stylesheet = """
/* Styles for QTableWidget and headers */
QTableWidget, QHeaderView::section {
background-color: #2A2F35;
color: white;
border: 1px solid #4A4A4A;
}
/* Style for QLineEdit */
QLineEdit {
background-color: #3A3F44;
color: white;
border: 1px solid #4A4A4A;
}
/* Style for QPushButton */
QPushButton {
background-color: #4B5563;
color: white;
border-radius: 4px;
padding: 5px;
margin: 5px;
}
QPushButton:hover {
background-color: #5C677D;
}
QPushButton:pressed {
background-color: #2A2F35;
}
/* Style for empty rows and other areas */
QWidget {
background-color: #2A2F35;
color: white;
}
"""
self.setStyleSheet(stylesheet)
self.resultsTable = QTableWidget(self)
self.clearAllButton = QPushButton("Alle Filteroptionen loeschen", self)
self.clearAllButton.clicked.connect(self.clearAllFilters)
filterLayout.addWidget(self.clearAllButton)
# GroupBox for Entitätenfilter
entitaten_filter_groupbox = QGroupBox("Entitätenfilter", self)
entitaten_filter_layout = QVBoxLayout()
entitaten_filter_groupbox.setLayout(entitaten_filter_layout)
self.distinct_entity_edit = QLineEdit(self)
self.distinct_entity_edit.setPlaceholderText(" Enter distinct entity...")
self.distinct_entity_edit.textChanged.connect(self.applyDistinctEntityTextFilter)
entitaten_filter_layout.addWidget(self.distinct_entity_edit)
self.entityTypeComboBox = QComboBox()
entitaten_filter_layout.addWidget(self.entityTypeComboBox)
filterLayout.addWidget(entitaten_filter_groupbox)
# GroupBox for Fundstelle
fundstelle_groupbox = QGroupBox("Fundstelle", self)
fundstelle_layout = QVBoxLayout()
fundstelle_groupbox.setLayout(fundstelle_layout)
self.file_name_edit = QLineEdit(self)
self.file_name_edit.setPlaceholderText(" Enter file name...")
self.file_name_edit.textChanged.connect(self.applyFileNameTextFilter)
fundstelle_layout.addWidget(self.file_name_edit)
self.line_number_edit = QLineEdit(self)
self.line_number_edit.setPlaceholderText(" Enter line number...")
self.line_number_edit.textChanged.connect(self.applyLineNumberTextFilter)
fundstelle_layout.addWidget(self.line_number_edit)
filterLayout.addWidget(fundstelle_groupbox)
# GroupBox for timestamp filtering
self.timestampFilterGroupbox = QGroupBox("Zeitrahmen", self)
timestampFilterLayout = QVBoxLayout()
self.timestampFilterGroupbox.setLayout(timestampFilterLayout)
filterLayout.addWidget(self.timestampFilterGroupbox)
dateedit_layout = QHBoxLayout()
self.startDateEdit = QDateTimeEdit(self)
self.startDateEdit.setDisplayFormat("yyyy-MM-dd HH:mm:ss")
self.startDateEdit.setCalendarPopup(True)
self.startDateEdit.setDateTime(QDateTime.currentDateTime().addDays(-10000))
dateedit_layout.addWidget(self.startDateEdit)
self.startDateEdit.dateTimeChanged.connect(self.applyTimestampFilter)
self.endDateEdit = QDateTimeEdit(self)
self.endDateEdit.setDisplayFormat("yyyy-MM-dd HH:mm:ss")
self.endDateEdit.setCalendarPopup(True)
self.endDateEdit.setDateTime(QDateTime.currentDateTime().addDays(1))
dateedit_layout.addWidget(self.endDateEdit)
self.endDateEdit.dateTimeChanged.connect(self.applyTimestampFilter)
self.timestamp_edit = QLineEdit(self)
self.timestamp_edit.setPlaceholderText(" Enter timestamp...")
self.timestamp_edit.textChanged.connect(self.applyTimestampTextFilter)
timestampFilterLayout.addWidget(self.timestamp_edit)
timestampFilterLayout.addLayout(dateedit_layout)
# GroupBox for Match Score, Flags, and Identifier
more_filters_groupbox = QGroupBox("Weitere Filter", self)
more_filters_layout = QHBoxLayout()
more_filters_groupbox.setLayout(more_filters_layout)
filterLayout.addWidget(more_filters_groupbox)
flag_layout = QVBoxLayout()
flag_true_checkbox = QCheckBox("Flagged", self)
flag_layout.addWidget(flag_true_checkbox)
flag_false_checkbox = QCheckBox("Not Flagged", self)
flag_layout.addWidget(flag_false_checkbox)
more_filters_layout.addLayout(flag_layout)
meta_layout = QVBoxLayout()
self.match_score_edit = QLineEdit(self)
self.match_score_edit.setPlaceholderText(" Enter match score...")
self.match_score_edit.textChanged.connect(self.applyMatchScoreTextFilter)
meta_layout.addWidget(self.match_score_edit)
self.identifier_edit = QLineEdit(self)
self.identifier_edit.setPlaceholderText(" Enter identifier...")
self.identifier_edit.textChanged.connect(self.applyIdentifierTextFilter)
meta_layout.addWidget(self.identifier_edit)
more_filters_layout.addLayout(meta_layout)
self.resultsTable.setColumnCount(len(COLUMN_NAMES))
self.resultsTable.setHorizontalHeaderLabels(COLUMN_NAMES)
self.resultsTable.setSortingEnabled(True)
mainLayout.addWidget(self.resultsTable)
bottomLayout = QHBoxLayout()
self.query_status_label = QLabel(" Hintergrundsuchprozess läuft...")
bottomLayout.addWidget(self.query_status_label)
self.bottomButtonLayout = QHBoxLayout()
flag_visible_items_button = QPushButton("Sichtbare Objekte markieren", self)
flag_visible_items_button.clicked.connect(self.flagVisibleItems)
self.bottomButtonLayout.addWidget(flag_visible_items_button)
unflag_visible_items_button = QPushButton("Sichtbare Objekte demarkieren", self)
unflag_visible_items_button.clicked.connect(self.unflagVisibleItems)
self.bottomButtonLayout.addWidget(unflag_visible_items_button)
clear_all_flags_button = QPushButton("Sämtliche Markierungen entfernen", self)
clear_all_flags_button.clicked.connect(self.clearAllFlags)
self.bottomButtonLayout.addWidget(clear_all_flags_button)
bottomLayout.addLayout(self.bottomButtonLayout)
mainLayout.addLayout(bottomLayout)
# Create and add the Dismiss button
self.dismissButton = QPushButton("Schließen", self)
self.dismissButton.clicked.connect(self.close)
mainLayout.addWidget(self.dismissButton)
self.populate_entity_type_combobox()
def populate_entity_type_combobox(self):
entity_types = self.database_query.get_entity_types()
self.entityTypeComboBox.addItem("Alle verfügbaren Typen", None) # Default option
for entity_type in entity_types:
self.entityTypeComboBox.addItem(entity_type, entity_type)
self.entityTypeComboBox.currentIndexChanged.connect(self.applyEntityTypeFilter)
def execute_query_from_results_window(self):
self.resultsTable.clear()
self.resultsTable.setRowCount(0)
self.query_text = self.databaseQueryLineEdit.text().strip()
if not self.query_text:
# Handle empty query case
return
self.query_status_label.setText(" Suche wird mit Suchbegriffen " + self.query_text + " durchgeführt...")
self.query_thread = QueryThread(self.database_query, self.query_text)
self.query_thread.queryCompleted.connect(self.on_query_completed)
self.query_thread.start()
self.query_progress_bar.setRange(0, 0)
def set_query_and_execute(self, query_text):
self.databaseQueryLineEdit.setText(query_text)
self.execute_query_from_results_window()
def on_query_completed(self, results_dict):
self.resultsTable.setUpdatesEnabled(False)
self.resultsTable.setSortingEnabled(False) # Disable sorting
self.resultsTable.setRowCount(0)
self.resultsTable.setColumnCount(len(COLUMN_NAMES))
self.sorted_results = sorted(results_dict.items(), key=lambda x: x[1], reverse=True)
self.top_results = self.sorted_results[:512]
self.query_status_label.setText(" Suche abgeschlossen // Anzahl Suchergebnisse: " + str(len(results_dict)) + " // Tabelle wird befüllt (das kann einige Zeit dauern)")
for entities_id, score in self.top_results:
self.insert_row(entities_id, score)
self.query_progress_bar.setRange(0, 1)
self.query_status_label.setText(" Suche abgeschlossen // Anzahl Suchergebnisse: " + str(len(results_dict)) + " (begrenzt auf die besten 512)")
self.resultsTable.setHorizontalHeaderLabels(COLUMN_NAMES)
self.adjust_column_widths()
self.resultsTable.setSortingEnabled(True)
self.resultsTable.sortByColumn(6, Qt.DescendingOrder) # Enable sorting
self.resultsTable.setUpdatesEnabled(True)
self.resultsTable.update()
self.applyAllFilters()
def insert_row(self, entities_id, score):
try:
with session_scope() as session:
# Fetch the entity from the database
entity = session.query(EntitiesTable).filter(EntitiesTable.entities_id == entities_id).first()
if not entity:
return # Skip if the entity is not found
# Fetch related data
distinct_entity = entity.entity.distinct_entity if entity.entity else ""
entity_type = entity.regex_library.gui_name if entity.regex_library else ""
file_name = entity.file.file_name if entity.file else ""
line_number = str(entity.line_number) if entity.line_number is not None else ""
entry_timestamp = entity.entry_timestamp.strftime("%Y-%m-%d %H:%M:%S") if entity.entry_timestamp else ""
context_large = entity.context.context_large if entity.context else ""
# Insert a new row in the table
row_position = self.resultsTable.rowCount()
self.resultsTable.insertRow(row_position)
search_terms = self.query_text.split()
self.highlight_delegate = HighlightDelegate(self.resultsTable, search_terms)
# Set the values for each column
self.resultsTable.setItem(row_position, 0, QTableWidgetItem(distinct_entity))
self.resultsTable.setItem(row_position, 1, QTableWidgetItem(entity_type))
self.resultsTable.setItem(row_position, 2, QTableWidgetItem(file_name))
self.resultsTable.setItem(row_position, 3, QTableWidgetItem(line_number))
self.resultsTable.setItem(row_position, 4, QTableWidgetItem(entry_timestamp))
context_widget = ScrollableTextWidget(context_large, search_terms, distinct_entity)
self.resultsTable.setCellWidget(row_position, 5, context_widget)
match_score_item = NumericTableWidgetItem(str(score))
self.resultsTable.setItem(row_position, 6, match_score_item)
flag_button_widget = FlagButton(entities_id, entity.flag)
self.resultsTable.setCellWidget(row_position, COLUMN_NAMES.index('Flag'), flag_button_widget)
self.resultsTable.setItem(row_position, 8, QTableWidgetItem(str(entities_id)))
self.resultsTable.setRowHeight(row_position, DEFAULT_ROW_HEIGHT)
for column_index in range(self.resultsTable.columnCount()):
if column_index in [0, 1, 2, 3, 4]:
self.resultsTable.setItemDelegateForColumn(column_index, self.highlight_delegate)
self.resultsTable.update()
except Exception as e:
logging.error(f"Error inserting row: {e}")
def adjust_column_widths(self):
for i, width in enumerate(COLUMN_WIDTHS):
self.resultsTable.setColumnWidth(i, width)
def flagVisibleItems(self):
"""Flag all visible items in the table."""
for row in range(self.resultsTable.rowCount()):
if not self.resultsTable.isRowHidden(row):
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
if flag_button_widget and not flag_button_widget.flag:
flag_button_widget.toggle_flag()
def unflagVisibleItems(self):
"""Unflag all visible items in the table."""
for row in range(self.resultsTable.rowCount()):
if not self.resultsTable.isRowHidden(row):
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
if flag_button_widget and flag_button_widget.flag:
flag_button_widget.toggle_flag()
def clearAllFlags(self):
"""Ask for confirmation and clear all flags in the table if confirmed."""
reply = QMessageBox.question(self, 'Confirm Action',
"Sollen tatsächlich alle Markierungen entfernt werden?\nDas wirkt sich auf alle Einträge in der Datenbank aus!",
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
if reply == QMessageBox.Yes:
# Clear all flags in the database
FlagButton.clearAllFlagsInDatabase()
# Update all flag buttons in the table
for row in range(self.resultsTable.rowCount()):
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
if flag_button_widget and flag_button_widget.flag:
flag_button_widget.flag = False
flag_button_widget.button.setText("_")
flag_button_widget.update_button_style()
# Update all flag buttons in the table
for row in range(self.resultsTable.rowCount()):
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
if flag_button_widget and flag_button_widget.flag:
flag_button_widget.flag = False
flag_button_widget.button.setText("_")
flag_button_widget.update_button_style()
def applyTextFilter(self, column_index, filter_text):
if filter_text:
for row in range(self.resultsTable.rowCount()):
item = self.resultsTable.item(row, column_index)
show_row = not filter_text or (item and filter_text.lower() in item.text().lower())
self.resultsTable.setRowHidden(row, not show_row)
def applyDistinctEntityTextFilter(self):
filter_text = self.distinct_entity_edit.text()
self.applyTextFilter(COLUMN_NAMES.index('Distinct Entity'), filter_text)
def applyFileNameTextFilter(self):
filter_text = self.file_name_edit.text()
self.applyTextFilter(COLUMN_NAMES.index('File Name'), filter_text)
def applyLineNumberTextFilter(self):
filter_text = self.line_number_edit.text()
self.applyTextFilter(COLUMN_NAMES.index('Line Number'), filter_text)
def applyMatchScoreTextFilter(self):
filter_text = self.match_score_edit.text()
self.applyTextFilter(COLUMN_NAMES.index('Match Score'), filter_text)
def applyTimestampTextFilter(self):
filter_text = self.timestamp_edit.text()
self.applyTextFilter(COLUMN_NAMES.index('Timestamp'), filter_text)
def applyIdentifierTextFilter(self):
filter_text = self.identifier_edit.text()
self.applyTextFilter(COLUMN_NAMES.index('Identifier'), filter_text)
def applyEntityTypeFilter(self):
selected_type = self.entityTypeComboBox.currentData()
entity_type_column = COLUMN_NAMES.index('Entity Type')
for row in range(self.resultsTable.rowCount()):
item = self.resultsTable.item(row, entity_type_column)
show_row = (selected_type is None or (item and item.text() == selected_type))
self.resultsTable.setRowHidden(row, not show_row)
def applyTimestampFilter(self):
start_date = self.startDateEdit.dateTime().toPyDateTime()
end_date = self.endDateEdit.dateTime().toPyDateTime()
timestamp_column = COLUMN_NAMES.index('Timestamp')
for row in range(self.resultsTable.rowCount()):
item = self.resultsTable.item(row, timestamp_column)
if item and item.text() != "":
row_timestamp = datetime.datetime.strptime(item.text(), "%Y-%m-%d %H:%M:%S")
show_row = start_date <= row_timestamp <= end_date
self.resultsTable.setRowHidden(row, not show_row)
else:
self.resultsTable.setRowHidden(row, False)
def applyAllFilters(self):
self.applyDistinctEntityTextFilter()
self.applyFileNameTextFilter()
self.applyLineNumberTextFilter()
self.applyMatchScoreTextFilter()
self.applyTimestampTextFilter()
self.applyEntityTypeFilter()
self.applyTimestampFilter()
def clearAllFilters(self):
self.entityTypeComboBox.setCurrentIndex(0)
self.startDateEdit.setDateTime(QDateTime(QDateTime(2009, 1, 1, 0, 0, 0)))
self.endDateEdit.setDateTime(QDateTime.currentDateTime())
self.distinct_entity_edit.clear()
self.file_name_edit.clear()
self.line_number_edit.clear()
self.match_score_edit.clear()
self.timestamp_edit.clear()
self.identifier_edit.clear()
self.applyAllFilters()
class QueryLineEdit(QLineEdit):
returnPressed = pyqtSignal()
def keyPressEvent(self, event):
if event.key() == Qt.Key_Return:
self.returnPressed.emit()
else:
super().keyPressEvent(event)
class HighlightDelegate(QStyledItemDelegate):
def __init__(self, parent=None, search_terms=None):
super().__init__(parent)
self.search_terms = search_terms or []
def paint(self, painter, option, index):
painter.save()
# Set text color and other options
options = QTextOption()
options.setWrapMode(QTextOption.WrapAtWordBoundaryOrAnywhere)
document = QTextDocument()
document.setDefaultTextOption(options)
document.setDefaultFont(option.font)
# Prepare highlighted text
text = index.model().data(index)
highlighted_text = self.get_highlighted_text(text)
document.setHtml(highlighted_text)
# Set the width of the document to the cell width
document.setTextWidth(option.rect.width())
# Draw the contents
painter.translate(option.rect.topLeft())
document.drawContents(painter)
painter.restore()
def get_highlighted_text(self, text):
if text is None:
text = ""
text_with_color = f"<span style='color: white;'>{text}</span>"
for term in self.search_terms:
# Retain the '+' at the beginning and strip other special characters
is_positive = term.startswith('+')
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-')).lower()
if is_positive and clean_term.lower() in text.lower():
# Use regex for case-insensitive search and replace
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
text_with_color = regex.sub(highlighted_term, text_with_color)
return text_with_color.replace("\n", "<br>")
class ScrollableTextWidget(QWidget):
def __init__(self, text, search_terms, distinct_entity, parent=None):
super().__init__(parent)
layout = QVBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
self.text_edit = CustomTextEdit(self)
self.text_edit.setReadOnly(True)
# Apply styles including scrollbar styles
self.text_edit.setStyleSheet("""
QTextEdit {
background-color: #2A2F35; /* Dark blue-ish background */
color: white; /* White text */
}
QTextEdit QScrollBar:vertical {
border: none;
background-color: #3A3F44; /* Dark scrollbar background */
width: 8px; /* Width of the scrollbar */
}
QTextEdit QScrollBar::handle:vertical {
background-color: #6E6E6E; /* Scroll handle color */
border-radius: 4px; /* Rounded corners for the handle */
}
QTextEdit QScrollBar::add-line:vertical, QTextEdit QScrollBar::sub-line:vertical {
background: none;
}
""")
# Set the text with highlighting
self.setHighlightedText(text, search_terms, distinct_entity)
layout.addWidget(self.text_edit)
# Scroll to the distinct entity
self.scroll_to_text(distinct_entity)
def setHighlightedText(self, text, search_terms, distinct_entity):
if text is None:
text = ""
# Wrap the original text in a span to maintain color
text_with_color = f"<span style='color: white;'>{text}</span>"
# Highlight distinct entity in a different color
if distinct_entity:
distinct_entity_escaped = html.escape(distinct_entity)
text_with_color = re.sub(
re.escape(distinct_entity_escaped),
lambda match: f"<span style='background-color: blue; color: white;'>{match.group()}</span>",
text_with_color,
flags=re.IGNORECASE
)
for term in search_terms:
# Check if the term starts with '+'
is_positive = term.startswith('+')
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-'))
# If the term starts with '+', highlight all matches regardless of case
if is_positive or clean_term.lower() in text.lower():
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
text_with_color = regex.sub(highlighted_term, text_with_color)
self.text_edit.setHtml(text_with_color.replace("\n", "<br>"))
def scroll_to_text(self, text):
if text:
cursor = self.text_edit.document().find(text)
self.text_edit.setTextCursor(cursor)
class CustomTextEdit(QTextEdit):
def __init__(self, parent=None):
super().__init__(parent)
self.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) # Enable vertical scrollbar as needed
def wheelEvent(self, event):
# Always handle the wheel event within QTextEdit
super().wheelEvent(event)
# Stop propagation of the event to parent
if self.verticalScrollBar().isVisible():
event.accept()
else:
event.ignore()
class FlagButton(QWidget):
def __init__(self, entities_id, flag, parent=None):
super().__init__(parent)
self.entities_id = entities_id
self.flag = flag
self.layout = QHBoxLayout(self)
self.button = QPushButton("FLAG" if flag else "_", self)
self.update_button_style()
self.button.clicked.connect(self.toggle_flag)
self.layout.addWidget(self.button)
self.layout.setContentsMargins(0, 0, 0, 0)
self.setLayout(self.layout)
def toggle_flag(self):
# Toggle the flag
self.flag = not self.flag
self.button.setText("FLAG" if self.flag else "_")
self.update_button_style()
# Update the flag in the database
with session_scope() as session:
entity = session.query(EntitiesTable).filter(EntitiesTable.entities_id == self.entities_id).first()
if entity:
entity.flag = self.flag
session.commit()
@staticmethod
def clearAllFlagsInDatabase():
with session_scope() as session:
entities = session.query(EntitiesTable).all()
for entity in entities:
entity.flag = False
session.commit()
def update_button_style(self):
if self.flag:
self.button.setStyleSheet("QPushButton { background-color: yellow; color: black; }")
else:
self.button.setStyleSheet("")
class NumericTableWidgetItem(QTableWidgetItem):
def __lt__(self, other):
return float(self.text()) < float(other.text())

View File

@ -0,0 +1,255 @@
from PyQt5.QtWidgets import QGroupBox, QCheckBox, QLineEdit, QDialog, QTableWidget, QVBoxLayout, QTableWidgetItem, QPushButton, QHBoxLayout, QFileDialog
from PyQt5.QtCore import Qt, pyqtSignal
from PyQt5.QtGui import QPalette, QColor
from logline_leviathan.gui.ui_helper import UIHelper
from logline_leviathan.file_processor.file_processor_thread import FileProcessorThread
from logline_leviathan.database.database_manager import EntityTypesTable, session_scope
import os
class FileSettingsWindow(QDialog):
def __init__(self, file_paths, main_window=None):
super().__init__(main_window)
self.file_paths = file_paths
self.main_window = main_window # Reference to MainWindow
self.ui_helper = UIHelper(self)
self.processing_thread = None
self.initUI()
def initUI(self):
self.layout = QVBoxLayout(self)
# Button layout
self.buttonLayout = QHBoxLayout()
self.removeSelectedButton = QPushButton("Remove Selected Files")
self.removeAllButton = QPushButton("Remove All")
self.addFilesButton = QPushButton("Add Files to Selection")
self.addDirButton = QPushButton("Add Directory to Selection")
self.buttonLayout.addWidget(self.removeSelectedButton)
self.buttonLayout.addWidget(self.removeAllButton)
self.buttonLayout.addWidget(self.addFilesButton)
self.buttonLayout.addWidget(self.addDirButton)
# Connect buttons to functions
self.removeSelectedButton.clicked.connect(self.removeSelected)
self.removeAllButton.clicked.connect(self.removeAll)
self.addFilesButton.clicked.connect(self.openFileNameDialog)
self.addDirButton.clicked.connect(self.openDirNameDialog)
self.layout.addLayout(self.buttonLayout)
self.filterLineEdit = QLineEdit()
self.filterLineEdit.setPlaceholderText("Filter files...")
self.filterLineEdit.textChanged.connect(self.filterTableItems)
self.layout.insertWidget(1, self.filterLineEdit) # Inserting QLineEdit above the table
# Table widget
self.tableWidget = QTableWidget()
self.tableWidget.setColumnCount(1)
self.tableWidget.setHorizontalHeaderLabels(["File Path"])
self.layout.addWidget(self.tableWidget)
self.populateTable()
# Close button
self.closeButton = QPushButton("Close")
self.layout.addWidget(self.closeButton)
self.closeButton.clicked.connect(self.close)
def populateTable(self):
self.tableWidget.clearContents()
self.tableWidget.setRowCount(len(self.file_paths))
for row, file_path in enumerate(self.file_paths):
self.tableWidget.setItem(row, 0, QTableWidgetItem(file_path))
self.tableWidget.resizeColumnsToContents()
def filterTableItems(self, text):
for row in range(self.tableWidget.rowCount()):
item = self.tableWidget.item(row, 0)
self.tableWidget.setRowHidden(row, text.lower() not in item.text().lower())
def removeSelected(self):
if self.isProcessing():
self.main_window.showProcessingWarning()
return
# Get the selected rows from the table
selected_rows = self.tableWidget.selectionModel().selectedRows()
# Extract file paths from the selected rows
selected_files = [self.tableWidget.item(row.row(), 0).text() for row in selected_rows]
# Remove each selected file
for file_path in selected_files:
if file_path in self.file_paths:
self.file_paths.remove(file_path) # Remove from local file_paths list
self.main_window.removeSingleFile(file_path) # Call method in main_window
# Refresh the table and update the file count
self.populateTable()
self.main_window.updateFileCountLabel()
def removeAll(self):
if self.isProcessing():
self.main_window.showProcessingWarning()
return
self.main_window.clearFileSelection()
self.main_window.updateFileCountLabel()
def openFileNameDialog(self):
if self.isProcessing():
self.main_window.showProcessingWarning()
return
self.main_window.openFileNameDialog()
self.main_window.updateFileCountLabel()
def openDirNameDialog(self):
if self.isProcessing():
self.main_window.showProcessingWarning()
return
self.main_window.openDirNameDialog()
self.main_window.updateFileCountLabel()
def isProcessing(self):
return self.processing_thread and self.processing_thread.isRunning()
class AnalysisSettingsWindow(QDialog):
parsersUpdated = pyqtSignal()
def __init__(self, main_window=None):
super().__init__(main_window)
self.main_window = main_window
self.ui_helper = UIHelper(self)
self.initUI()
def initUI(self):
self.layout = QHBoxLayout(self)
# Create the QGroupBox
self.parserSettingsGroupBox = QGroupBox("Parser Settings")
self.parserSettingsGroupBoxLayout = QVBoxLayout()
self.toggleAllButtonsLayout = QHBoxLayout()
self.parserSettingsGroupBoxLayout.addLayout(self.toggleAllButtonsLayout)
self.enableAllButton = QPushButton("Enable All")
self.disableAllButton = QPushButton("Disable All")
self.toggleAllButtonsLayout.addWidget(self.enableAllButton)
self.enableAllButton.clicked.connect(self.enableAllCheckboxes)
self.toggleAllButtonsLayout.addWidget(self.disableAllButton)
self.disableAllButton.clicked.connect(self.disableAllCheckboxes)
self.parserSettingsGroupBox.setLayout(self.parserSettingsGroupBoxLayout)
self.layout.addWidget(self.parserSettingsGroupBox)
# Populate the QGroupBox
self.populateGroupBox()
self.configDirectoriesGroupBox = QGroupBox("Config Directories")
self.configDirectoriesGroupBoxLayout = QVBoxLayout()
self.configDirectoriesGroupBox.setLayout(self.configDirectoriesGroupBoxLayout)
self.configDirectoriesGroupBoxLayout.setAlignment(Qt.AlignTop)
self.layout.addWidget(self.configDirectoriesGroupBox)
# Inspect Regex Button
self.inspectRegexButton = QPushButton('YAML-Konfigurationsdatei inspizieren...', self)
self.inspectRegexButton.clicked.connect(self.openRegexLibrary)
self.configDirectoriesGroupBoxLayout.addWidget(self.inspectRegexButton)
self.openWordlistPathButton = QPushButton('Wordlist-Verzeichnis oeffnen...', self)
self.openWordlistPathButton.clicked.connect(self.openWordlistPath)
self.configDirectoriesGroupBoxLayout.addWidget(self.openWordlistPathButton)
self.openScriptsPathButton = QPushButton('Scripts-Verzeichnis oeffnen...', self)
self.openScriptsPathButton.clicked.connect(self.openScriptsPath)
self.configDirectoriesGroupBoxLayout.addWidget(self.openScriptsPathButton)
# Close Button
self.closeButton = QPushButton("Close")
self.layout.addWidget(self.closeButton)
self.closeButton.clicked.connect(self.close)
def populateGroupBox(self):
self.parserSettingsGroupBoxLayout.setAlignment(Qt.AlignTop)
with session_scope() as db_session:
entity_types = db_session.query(EntityTypesTable).all()
self.checkboxes = []
for et in entity_types:
# Omit entries that start with "category_"
if et.entity_type.startswith("category_"):
continue
parser_info = []
if et.regex_pattern:
parser_info.append("regex")
if et.script_parser:
parser_info.append("script")
checkBoxText = f"{et.gui_name} ({', '.join(parser_info)})" if parser_info else et.gui_name
checkBox = QCheckBox(checkBoxText)
# Fetch the current state of parser_enabled from the database for each entity type
current_parser_enabled = db_session.query(EntityTypesTable).filter(EntityTypesTable.entity_type_id == et.entity_type_id).first().parser_enabled
checkBox.setChecked(current_parser_enabled)
color = 'green' if current_parser_enabled else 'red'
checkBox.setStyleSheet(f"QCheckBox {{ color: {color}; }}")
self.parserSettingsGroupBoxLayout.addWidget(checkBox)
self.checkboxes.append((checkBox, et.entity_type_id))
checkBox.toggled.connect(lambda checked, et_id=et.entity_type_id: self.updateParserEnabled(checked, et_id))
def enableAllCheckboxes(self):
for checkBox, _ in self.checkboxes:
checkBox.setChecked(True)
def disableAllCheckboxes(self):
for checkBox, _ in self.checkboxes:
checkBox.setChecked(False)
def updateParserEnabled(self, checked, entity_type_id):
with session_scope() as db_session: # Using a session context manager
et = db_session.query(EntityTypesTable).filter_by(entity_type_id=entity_type_id).first()
if et:
et.parser_enabled = checked
db_session.commit() # Commit changes
if self.main_window:
self.main_window.refreshApplicationState() # Refresh the UI state
self.refreshGroupBox() # Refresh the group box to reflect changes
def refreshGroupBox(self):
# Clear and repopulate the group box
for checkBox, _ in self.checkboxes:
checkBox.deleteLater() # Properly delete the checkbox
self.populateGroupBox()
self.parsersUpdated.emit()
def openWordlistPath(self):
wordlist_path = os.path.join(os.getcwd(), 'data', 'wordlist')
if os.path.exists(wordlist_path):
self.ui_helper.openFile(wordlist_path) # Call openFile method on the UIHelper instance
else:
#self.statusLabel.setText(" wordlist nicht unter ./daten/ gefunden.")
pass
def openRegexLibrary(self):
path_to_yaml = os.path.join(os.getcwd(), 'data', 'entities.yaml')
if os.path.exists(path_to_yaml):
self.ui_helper.openFile(path_to_yaml) # Call openFile method on the UIHelper instance
else:
#self.statusLabel.setText(" entities.yaml nicht unter ./daten/ gefunden.")
pass
def openScriptsPath(self):
scripts_path = os.path.join(os.getcwd(), 'data', 'parser')
if os.path.exists(scripts_path):
self.ui_helper.openFile(scripts_path) # Call openFile method on the UIHelper instance
else:
#self.statusLabel.setText(" scripts nicht unter ./daten/ gefunden.")
pass

View File

@ -0,0 +1,119 @@
from PyQt5.QtWidgets import QFileDialog
from PyQt5.QtGui import QDesktopServices
from PyQt5.QtCore import QUrl
import os
import math
import logging
import csv
import sys
import subprocess
class UIHelper():
def __init__(self, main_window):
self.main_window = main_window
def openFileNameDialog(self):
if self.main_window.isProcessing():
self.main_window.showProcessingWarning()
return
options = QFileDialog.Options()
files, _ = QFileDialog.getOpenFileNames(self.main_window, "Dateien selektieren", "", "All Files (*)", options=options)
if files:
for file in files:
if file not in self.main_window.filePaths:
self.main_window.filePaths.append(file)
self.main_window.updateFileCountLabel()
def openDirNameDialog(self):
if self.main_window.isProcessing():
self.main_window.showProcessingWarning()
return
options = QFileDialog.Options()
options |= QFileDialog.DontUseNativeDialog
fileDialog = QFileDialog(self.main_window, "Ordner selektieren", "", options=options)
fileDialog.setFileMode(QFileDialog.Directory)
fileDialog.setOption(QFileDialog.ShowDirsOnly, True)
fileDialog.setOption(QFileDialog.DontResolveSymlinks, True)
# Store previously selected directories
selected_directories = []
while True:
if fileDialog.exec_() == QFileDialog.Accepted:
directory = fileDialog.selectedFiles()[0]
if directory and directory not in selected_directories:
selected_directories.append(directory)
self.addAllFilesFromDirectory(directory)
else:
break # Exit loop if user cancels
self.main_window.updateFileCountLabel()
def calculate_total_size(self, file_paths):
total_size = sum(os.path.getsize(f) for f in file_paths if os.path.exists(f))
return self.format_size(total_size)
def format_size(self, size_bytes):
if size_bytes == 0:
return "0B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return f"{s} {size_name[i]}"
def addAllFilesFromDirectory(self, directory):
for root, dirs, files in os.walk(directory):
for filename in files:
file_path = os.path.join(root, filename)
if file_path not in self.main_window.filePaths:
self.main_window.filePaths.append(file_path)
def clearFileSelection(self):
if self.main_window.isProcessing():
self.main_window.showProcessingWarning()
return
self.main_window.filePaths.clear()
self.main_window.updateFileCountLabel()
self.main_window.fileCountLabel.setText(' Keine Dateien selektiert')
def removeSingleFile(self, file):
if self.main_window.isProcessing():
self.main_window.showProcessingWarning()
return
if self.main_window.filePaths:
# Remove the file by value
if file in self.main_window.filePaths:
self.main_window.filePaths.remove(file)
self.main_window.updateFileCountLabel()
def generate_files_log(self, file_path, files_list):
try:
with open(file_path, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
for file in files_list:
writer.writerow([file])
except Exception as e:
logging.error(f"Error generating log file {file_path}: {e}")
def openFile(self, file_path):
if sys.platform == 'win32':
os.startfile(file_path)
elif sys.platform == 'darwin': # macOS
subprocess.Popen(['open', file_path])
else: # Linux and other Unix-like systems
subprocess.Popen(['xdg-open', file_path])
def format_time(seconds):
if seconds != seconds or seconds == float('inf'): # Check for NaN and inf
return "N/A"
minutes = int(seconds // 60)
seconds = int(seconds % 60)
return f"{minutes} min {seconds} sec"

View File

@ -0,0 +1,4 @@
repo_link = "https://cloud.mikoshi.de/call/qhtkcnmn#/"
repo_link_text = "Feedback // Support (öffnet externen Link)"
version_string = "2024-02-08 - Version: 0.4.4 // TESTING // UPDATE REGULARLY"
loglevel = "INFO"

View File

13
requirements.txt Normal file
View File

@ -0,0 +1,13 @@
sqlalchemy
pyYAML
PyQt5
odfpy
pandas
python-magic
openpyxl
PyMuPDF
tldextract
fuzzywuzzy
python-Levenshtein
phonenumbers
python-docx

8
run.py Normal file
View File

@ -0,0 +1,8 @@
from logline_leviathan import main
import multiprocessing
if __name__ == "__main__":
multiprocessing.freeze_support()
#multiprocessing.set_start_method('spawn')
main()