initial commit
This commit is contained in:
parent
13855a70ae
commit
759acc855d
54
README.md
54
README.md
@ -1,3 +1,53 @@
|
||||
# LoglineLeviathan
|
||||
## LoglineLeviathan // Analyze/Export-Module
|
||||
|
||||
Large-Scale Text Parser which extracts modular configurable artifacts with context
|
||||
## **Installation**
|
||||
|
||||
### Windows:
|
||||
|
||||
Currently no .exe available yet. Follow the below Linux Instructions and adapt to your Windows shell.
|
||||
If you installed via pip install -r requirements.txt, you should run
|
||||
|
||||
```
|
||||
pip uninstall python-magic
|
||||
pip install python-magic-bin==0.4.14
|
||||
```
|
||||
|
||||
afterwards.
|
||||
|
||||
> Important: The directories "data" with the entities.yaml and "output" need to be present.
|
||||
|
||||
### Linux / Python-Sourcecode:
|
||||
|
||||
This guide applies to building the application from source on a Linux host.
|
||||
|
||||
1. Required prerequisites: python3 (3.11 or newer), python3-pip and python3.11-venv (or whatever version you have), git.
|
||||
2. Clone the git repository:
|
||||
|
||||
git clone https://github.com/overcuriousity/LoglineLeviathan
|
||||
3. Shell:
|
||||
|
||||
```
|
||||
cd LoglineLeviathan && python3 -m venv venv && source venv/bin/activate && pip install -r requirements.txt
|
||||
```
|
||||
4. Start:
|
||||
|
||||
```
|
||||
python3 run.py
|
||||
```
|
||||
|
||||
## **Usage**
|
||||
|
||||
### Analysis:
|
||||
|
||||
> On startup, a new database will be created by default and populated with the available entities. If a database from a prior session is present, it will be used.
|
||||
|
||||
After startup, no files are selected for ingestion. Starting from there, you have the following possibilities:
|
||||
|
||||
- Select files with "Add Files to Selection": Opens a file browser and lets you select one or more files.
|
||||
- Choose directory with "Add Directory and Subdirectories": Recursively adds all files in all subdirectories of .
|
||||
|
||||
> Resetting the file selection is only possible via the "Clear Files from Selection"-Button.
|
||||
|
||||
- Choose existing database.
|
||||
|
||||
Button "Start/Resume File Analysis" strats the file ingestion and database population.
|
226
data/entities.yaml
Normal file
226
data/entities.yaml
Normal file
@ -0,0 +1,226 @@
|
||||
bip39:
|
||||
entity_type: bip39
|
||||
gui_name: BIP39 Wordlist
|
||||
gui_tooltip: Outputs BIP39 wordlists, which is parsed from the text by the required
|
||||
length, with 0-5 characters in between the words.
|
||||
parent_type: category_cryptocurrency
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: bip39.py
|
||||
btcaddr:
|
||||
entity_type: btcaddr
|
||||
gui_name: Bitcoin Address
|
||||
gui_tooltip: Outputs BTC addresses of the common formats P2PKH, P2SH and Bech32.
|
||||
parent_type: category_bitcoin
|
||||
parser_enabled: true
|
||||
regex_pattern: \b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b
|
||||
script_parser: btcaddr.py
|
||||
btctxid:
|
||||
entity_type: btctxid
|
||||
gui_name: Bitcoin TXID
|
||||
gui_tooltip: Outputs BTC TXIDs.
|
||||
parent_type: category_bitcoin
|
||||
parser_enabled: true
|
||||
regex_pattern: \b[a-fA-F0-9]{64}\b
|
||||
script_parser: null
|
||||
category_bitcoin:
|
||||
entity_type: category_bitcoin
|
||||
gui_name: Bitcoin
|
||||
gui_tooltip: Bitcoin related entities.
|
||||
parent_type: category_cryptocurrency
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
category_communication:
|
||||
entity_type: category_communication
|
||||
gui_name: Communication
|
||||
gui_tooltip: Communication related entities.
|
||||
parent_type: root
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
category_cryptocurrency:
|
||||
entity_type: category_cryptocurrency
|
||||
gui_name: Cryptocurrency
|
||||
gui_tooltip: Cryptocurrency related entities.
|
||||
parent_type: root
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
category_cybersecurity:
|
||||
entity_type: category_cybersecurity
|
||||
gui_name: Cybersecurity
|
||||
gui_tooltip: Cybersecurity related entities.
|
||||
parent_type: root
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
category_internet:
|
||||
entity_type: category_internet
|
||||
gui_name: Internet
|
||||
gui_tooltip: Internet related entities.
|
||||
parent_type: root
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
category_monero:
|
||||
entity_type: category_monero
|
||||
gui_name: Monero
|
||||
gui_tooltip: Monero related entities.
|
||||
parent_type: category_cryptocurrency
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
category_networking:
|
||||
entity_type: category_networking
|
||||
gui_name: Networking
|
||||
gui_tooltip: Networking related entities.
|
||||
parent_type: root
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
category_special:
|
||||
entity_type: category_special
|
||||
gui_name: Special Parsers
|
||||
gui_tooltip: Special parsers, e.g. created wordlists.
|
||||
parent_type: root
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
gdocurl:
|
||||
entity_type: gdocurl
|
||||
gui_name: Google Docs URL
|
||||
gui_tooltip: Outputs any possible Google Docs URLs.
|
||||
parent_type: url
|
||||
parser_enabled: true
|
||||
regex_pattern: \bhttps:\/\/docs\.google\.com\/[\w\/.-]*\/d\/[a-zA-Z0-9_-]+(?:\/\S*)?
|
||||
script_parser: null
|
||||
generated_wordlist_match:
|
||||
entity_type: generated_wordlist_match
|
||||
gui_name: Generated Wordlist Match
|
||||
gui_tooltip: Outputs any wordlist matches which are specified by the generated wordlist
|
||||
present in the parser directory.
|
||||
parent_type: category_special
|
||||
parser_enabled: true
|
||||
regex_pattern: null
|
||||
script_parser: generated_wordlist.py
|
||||
github:
|
||||
entity_type: github
|
||||
gui_name: GitHub
|
||||
gui_tooltip: Outputs any possible GitHub repositories.
|
||||
parent_type: url
|
||||
parser_enabled: true
|
||||
regex_pattern: \bhttps?:\/\/github\.com\/[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+\/?\S*
|
||||
script_parser: null
|
||||
ipv4:
|
||||
entity_type: ipv4
|
||||
gui_name: IPv4 Address
|
||||
gui_tooltip: Outputs any IPv4 addresses.
|
||||
parent_type: category_networking
|
||||
parser_enabled: true
|
||||
regex_pattern: \b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b
|
||||
script_parser: ipv4.py
|
||||
ipv4pr:
|
||||
entity_type: ipv4pr
|
||||
gui_name: Private Address Range
|
||||
gui_tooltip: Outputs any IPv4 addresses of the private address range.
|
||||
parent_type: ipv4
|
||||
parser_enabled: true
|
||||
regex_pattern: \b(10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(1[6-9]|2[0-9]|3[0-1])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3})\b
|
||||
script_parser: ipv4pr.py
|
||||
ipv4pu:
|
||||
entity_type: ipv4pu
|
||||
gui_name: Public Address Range
|
||||
gui_tooltip: Outputs any IPv4 addresses of the public address range.
|
||||
parent_type: ipv4
|
||||
parser_enabled: true
|
||||
regex_pattern: \b((?!10\.)(?!172\.(1[6-9]|2[0-9]|3[0-1]))(?!192\.168)(?:[0-9]{1,3}\.){3}[0-9]{1,3})\b
|
||||
script_parser: ipv4pu.py
|
||||
ipv6:
|
||||
entity_type: ipv6
|
||||
gui_name: IPv6 Address
|
||||
gui_tooltip: Outputs any IPv6 addresses.
|
||||
parent_type: category_networking
|
||||
parser_enabled: true
|
||||
regex_pattern: (([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))
|
||||
script_parser: ipv6.py
|
||||
macaddr:
|
||||
entity_type: macaddr
|
||||
gui_name: MAC Address
|
||||
gui_tooltip: Outputs any possible MAC addresses.
|
||||
parent_type: category_networking
|
||||
parser_enabled: true
|
||||
regex_pattern: \b(?:[0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}\b
|
||||
script_parser: null
|
||||
mailaddr:
|
||||
entity_type: mailaddr
|
||||
gui_name: EMail Address
|
||||
gui_tooltip: Outputs any possible email-addresses.
|
||||
parent_type: category_communication
|
||||
parser_enabled: true
|
||||
regex_pattern: \b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b
|
||||
script_parser: null
|
||||
onionurl:
|
||||
entity_type: onionurl
|
||||
gui_name: Onion URL
|
||||
gui_tooltip: Outputs any possible onion URL.
|
||||
parent_type: category_internet
|
||||
parser_enabled: true
|
||||
regex_pattern: \bhttps?:\/\/[a-z2-7]{16,56}\.onion(?:\/\S*)?
|
||||
script_parser: null
|
||||
telnum:
|
||||
entity_type: telnum
|
||||
gui_name: Possible Telephone Number
|
||||
gui_tooltip: Outputs any possible telephone numbers, this may have some 0-positives.
|
||||
parent_type: category_communication
|
||||
parser_enabled: true
|
||||
regex_pattern: \b(?:\+\d{1,4}\s?)?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b
|
||||
script_parser: telnum.py
|
||||
toxid:
|
||||
entity_type: toxid
|
||||
gui_name: Tox ID
|
||||
gui_tooltip: Outputs any possible tox ID, including QTOX. Unverified Regex Pattern.
|
||||
parent_type: category_communication
|
||||
parser_enabled: true
|
||||
regex_pattern: (?<![0-9a-fA-F])[0-9a-fA-F]{76}(?![0-9a-fA-F])
|
||||
script_parser: null
|
||||
url:
|
||||
entity_type: url
|
||||
gui_name: URL
|
||||
gui_tooltip: Outputs any possible URL.
|
||||
parent_type: category_internet
|
||||
parser_enabled: true
|
||||
regex_pattern: \b(?:https?|s?ftp):\/\/[\w\/.-]+(?:\.[a-z]{2,})+\S*
|
||||
script_parser: url.py
|
||||
vulnerability_CVE:
|
||||
entity_type: vulnerability_CVE
|
||||
gui_name: CVE String
|
||||
gui_tooltip: Outputs any possible CVE Vulnerability Identifier.
|
||||
parent_type: category_cybersecurity
|
||||
parser_enabled: true
|
||||
regex_pattern: cve-\d{4}-\d+
|
||||
script_parser: null
|
||||
xmraddr:
|
||||
entity_type: xmraddr
|
||||
gui_name: Monero Address
|
||||
gui_tooltip: Outputs Monero addresses.
|
||||
parent_type: category_monero
|
||||
parser_enabled: true
|
||||
regex_pattern: \b4[0-9AB][1-9A-HJ-NP-Za-km-z]{93}\b
|
||||
script_parser: xmraddr.py
|
||||
category_metadata:
|
||||
entity_type: category_metadata
|
||||
gui_name: Metadata
|
||||
gui_tooltip: Metadata related entities.
|
||||
parent_type: root
|
||||
parser_enabled: false
|
||||
regex_pattern: null
|
||||
script_parser: null
|
||||
timestamp:
|
||||
entity_type: timestamp
|
||||
gui_name: Timestamp
|
||||
gui_tooltip: Timestamp-like entities.
|
||||
parent_type: category_metadata
|
||||
parser_enabled: false
|
||||
regex_pattern: \b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z\b
|
||||
script_parser: timestamp.py
|
0
data/parser/__init__.py
Normal file
0
data/parser/__init__.py
Normal file
26
data/parser/bip39 copy.py
Normal file
26
data/parser/bip39 copy.py
Normal file
File diff suppressed because one or more lines are too long
28
data/parser/bip39.py
Normal file
28
data/parser/bip39.py
Normal file
File diff suppressed because one or more lines are too long
24
data/parser/btcaddr.py
Normal file
24
data/parser/btcaddr.py
Normal file
@ -0,0 +1,24 @@
|
||||
import re
|
||||
|
||||
def parse(text):
|
||||
# Regular expressions for different Bitcoin address formats
|
||||
p2pkh_regex = r'\b1[1-9A-HJ-NP-Za-km-z]{25,34}\b'
|
||||
p2sh_regex = r'\b3[1-9A-HJ-NP-Za-km-z]{25,34}\b'
|
||||
bech32_regex = r'\bbc1[q,p,z][0-9a-z]{39,59}\b'
|
||||
bech32_regex1 = r'\bbc1[qpz0-9ac-hj-np-z]{38,58}\b'
|
||||
less_common_regex = r'\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b'
|
||||
|
||||
# Combine all regexes
|
||||
combined_regex = f'({p2pkh_regex})|({p2sh_regex})|({bech32_regex})|({less_common_regex}) | ({bech32_regex1})'
|
||||
|
||||
matches = []
|
||||
for match in re.finditer(combined_regex, text):
|
||||
for addr in match.groups():
|
||||
if addr: # Check if the captured group is not None
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((addr, start_pos, end_pos))
|
||||
|
||||
return matches
|
||||
|
||||
# integrate regexes xpub, ypub, zpub
|
||||
# checksumme check
|
22
data/parser/generated_wordlist.py
Normal file
22
data/parser/generated_wordlist.py
Normal file
@ -0,0 +1,22 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
def load_wordlist(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
return [line.strip() for line in file]
|
||||
|
||||
def parse(text):
|
||||
wordlist_path = os.path.join(os.path.dirname(__file__), 'generated_wordlist.txt')
|
||||
wordlist = load_wordlist(wordlist_path)
|
||||
|
||||
# Create a regex pattern that matches any word in the wordlist
|
||||
pattern = '(' + '|'.join(re.escape(word).replace(' ', r'\s+') for word in wordlist) + ')'
|
||||
|
||||
matches = []
|
||||
for match in re.finditer(pattern, text, re.IGNORECASE):
|
||||
matched_word = match.group()
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((matched_word, start_pos, end_pos))
|
||||
|
||||
return matches
|
||||
|
22
data/parser/ipv4.py
Normal file
22
data/parser/ipv4.py
Normal file
@ -0,0 +1,22 @@
|
||||
import re
|
||||
import ipaddress
|
||||
|
||||
def is_valid_ipv4_address(ip_addr):
|
||||
try:
|
||||
# This will return True for both public and private IPv4 addresses
|
||||
return isinstance(ipaddress.ip_address(ip_addr), ipaddress.IPv4Address)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def parse(text):
|
||||
ipv4_regex = r'(?<!\d)(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?!\d)'
|
||||
matches = []
|
||||
|
||||
for match in re.finditer(ipv4_regex, text):
|
||||
ip_addr = match.group()
|
||||
if is_valid_ipv4_address(ip_addr):
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((ip_addr, start_pos, end_pos))
|
||||
|
||||
return matches
|
||||
|
22
data/parser/ipv4pr.py
Normal file
22
data/parser/ipv4pr.py
Normal file
@ -0,0 +1,22 @@
|
||||
import re
|
||||
import ipaddress
|
||||
|
||||
def is_private_ip(ip_addr):
|
||||
try:
|
||||
return ipaddress.ip_address(ip_addr).is_private
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def parse(text):
|
||||
ipv4_regex = r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
|
||||
matches = []
|
||||
|
||||
for match in re.finditer(ipv4_regex, text):
|
||||
ip_addr = match.group()
|
||||
if is_private_ip(ip_addr):
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((ip_addr, start_pos, end_pos))
|
||||
|
||||
return matches
|
||||
|
||||
|
21
data/parser/ipv4pu.py
Normal file
21
data/parser/ipv4pu.py
Normal file
@ -0,0 +1,21 @@
|
||||
import re
|
||||
import ipaddress
|
||||
|
||||
def is_public_ip(ip_addr):
|
||||
try:
|
||||
ip_obj = ipaddress.ip_address(ip_addr)
|
||||
return not ip_obj.is_private and not ip_obj.is_reserved and not ip_obj.is_loopback
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def parse(text):
|
||||
ipv4_regex = r'\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
|
||||
matches = []
|
||||
|
||||
for match in re.finditer(ipv4_regex, text):
|
||||
ip_addr = match.group()
|
||||
if is_public_ip(ip_addr):
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((ip_addr, start_pos, end_pos))
|
||||
|
||||
return matches
|
20
data/parser/ipv6.py
Normal file
20
data/parser/ipv6.py
Normal file
@ -0,0 +1,20 @@
|
||||
import re
|
||||
import ipaddress
|
||||
|
||||
def is_valid_ipv6_address(ip_addr):
|
||||
try:
|
||||
return isinstance(ipaddress.ip_address(ip_addr), ipaddress.IPv6Address)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def parse(text):
|
||||
ipv6_regex = r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))'
|
||||
matches = []
|
||||
|
||||
for match in re.finditer(ipv6_regex, text, re.IGNORECASE):
|
||||
ip_addr = match.group()
|
||||
if is_valid_ipv6_address(ip_addr):
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((ip_addr, start_pos, end_pos))
|
||||
|
||||
return matches
|
100
data/parser/telnum.py
Normal file
100
data/parser/telnum.py
Normal file
@ -0,0 +1,100 @@
|
||||
import phonenumbers
|
||||
import logging
|
||||
import re
|
||||
|
||||
|
||||
def parse(text, default_regions = [
|
||||
'US', 'GB', 'DE', 'FR', 'ES', 'IT', 'RU', 'CN', 'IN', 'JP',
|
||||
'BR', 'ZA', 'NG', 'EG', 'TR', 'ID', 'AU', 'CA', 'MX', 'AR',
|
||||
'KR', 'TH', 'VN', 'PH', 'MY', 'SA', 'IR', 'PK', 'BD', 'UA',
|
||||
'PL', 'NL', 'BE', 'CH', 'AT', 'SE', 'NO', 'DK', 'FI', 'IL',
|
||||
'SG', 'HK', 'NZ', 'AE', 'KE', 'CO', 'VE', 'PE', 'CL', 'GR',
|
||||
'PT', 'CZ', 'RO', 'HU', 'BG', 'SK', 'SI', 'HR', 'RS', 'LT',
|
||||
'LV', 'EE', 'CY', 'LU', 'MT', 'IS', 'KZ', 'UZ', 'AM', 'AZ',
|
||||
'GE', 'MN', 'KG', 'TJ', 'TM', 'BT', 'NP', 'LK', 'MM', 'KH',
|
||||
'LA', 'BN', 'FJ', 'PW', 'SB', 'VU', 'FM', 'WS', 'TO', 'TV',
|
||||
'KI', 'NR', 'MQ', 'GF', 'RE', 'YT', 'PF', 'NC', 'WF', 'TF',
|
||||
'AI', 'AG', 'AW', 'BS', 'BB', 'BZ', 'BM', 'VG', 'KY', 'CU',
|
||||
'CW', 'DM', 'DO', 'GD', 'GP', 'HT', 'JM', 'MQ', 'MS', 'PR',
|
||||
'KN', 'LC', 'VC', 'SX', 'TT', 'TC', 'VI', 'BO', 'BQ', 'EC',
|
||||
'GY', 'PY', 'SR', 'UY', 'DZ', 'AO', 'BJ', 'BW', 'BF', 'BI',
|
||||
'CV', 'CM', 'CF', 'TD', 'KM', 'CG', 'CD', 'DJ', 'GQ', 'ER',
|
||||
'SZ', 'ET', 'GA', 'GM', 'GH', 'GN', 'GW', 'CI', 'LS', 'LR',
|
||||
'LY', 'MG', 'MW', 'ML', 'MR', 'MU', 'MA', 'MZ', 'NA', 'NE',
|
||||
'NG', 'RW', 'ST', 'SN', 'SC', 'SL', 'SO', 'SS', 'SD', 'TZ',
|
||||
'TG', 'TN', 'UG', 'ZM', 'ZW'
|
||||
]
|
||||
):
|
||||
matches = []
|
||||
timestamp_patterns = [
|
||||
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
|
||||
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
|
||||
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
|
||||
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
|
||||
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
|
||||
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
|
||||
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
|
||||
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
|
||||
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
|
||||
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
|
||||
# Add more patterns as needed
|
||||
]
|
||||
unlikely_phone_patterns = [
|
||||
r'\d{5,}\s?bytes', # File size in bytes
|
||||
r'https?://\S+', # URLs
|
||||
r'\bversion \d+', # 'version' followed by numbers
|
||||
r'cve-\d{4}-\d+', # CVE identifiers
|
||||
r'\S+\.onion\S*', # Onion addresses
|
||||
r'Product ID: \S+', # Product IDs
|
||||
r'\|\s*[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\s*\|', # UUIDs
|
||||
r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', # IP addresses
|
||||
r'Mem: \d+\s+\d+\s+\d+', # Memory sizes
|
||||
r'Total: \d+\s+\d+\s+\d+', # Total memory
|
||||
r'block_size=\d+', # Block size
|
||||
r'-rw-------\s+\d+\s+\S+\s+\S+\s+\d+\s+\S+\s+\d{1,2}\s+\d{1,2}:\d{2}', # File details
|
||||
r'\d+\.\d+\.\d+\.\d+\s+\d+\s+\S+\s+\d+', # IP and port patterns
|
||||
# Add more patterns as needed
|
||||
]
|
||||
# More specific regex for phone numbers
|
||||
def is_unlikely_phone_context(extended_text):
|
||||
# Check against timestamp patterns
|
||||
for pattern, _ in timestamp_patterns:
|
||||
if re.search(pattern, extended_text):
|
||||
return True
|
||||
# Check against other unlikely phone patterns
|
||||
for pattern in unlikely_phone_patterns:
|
||||
if re.search(pattern, extended_text):
|
||||
return True
|
||||
return False
|
||||
|
||||
# More specific regex for phone numbers
|
||||
phone_regex = r'\b(\+?\d{1,3}[\s-]?)?(\(?\d{1,4}\)?[\s-]?)?\d{3,5}[\s-]?\d{3,5}\b'
|
||||
|
||||
for number_match in re.finditer(phone_regex, text):
|
||||
raw_number = number_match.group()
|
||||
start_pos, end_pos = number_match.span()
|
||||
|
||||
# Extend the search window for additional context
|
||||
extended_start = max(0, start_pos - 50)
|
||||
extended_end = min(len(text), end_pos + 50)
|
||||
extended_text = text[extended_start:extended_end]
|
||||
|
||||
if is_unlikely_phone_context(extended_text):
|
||||
continue # Skip if the context indicates it's not a phone number
|
||||
|
||||
valid_number_found = False
|
||||
|
||||
for region in default_regions:
|
||||
try:
|
||||
parsed_number = phonenumbers.parse(raw_number, region)
|
||||
if phonenumbers.is_valid_number(parsed_number):
|
||||
matches.append((raw_number, start_pos, end_pos))
|
||||
valid_number_found = True
|
||||
break
|
||||
except phonenumbers.NumberParseException:
|
||||
continue
|
||||
|
||||
if not valid_number_found:
|
||||
logging.debug(f"Failed to parse number: {raw_number}")
|
||||
|
||||
return matches
|
26
data/parser/timestamp.py
Normal file
26
data/parser/timestamp.py
Normal file
@ -0,0 +1,26 @@
|
||||
import re
|
||||
|
||||
# List of timestamp patterns
|
||||
timestamp_patterns = [
|
||||
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
|
||||
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
|
||||
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
|
||||
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
|
||||
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
|
||||
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
|
||||
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
|
||||
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
|
||||
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
|
||||
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
|
||||
# Add more patterns as needed
|
||||
]
|
||||
|
||||
def parse(text):
|
||||
matches = []
|
||||
for pattern, _ in timestamp_patterns:
|
||||
for match in re.finditer(pattern, text):
|
||||
timestamp_str = match.group()
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((timestamp_str, start_pos, end_pos))
|
||||
|
||||
return matches
|
21
data/parser/url.py
Normal file
21
data/parser/url.py
Normal file
@ -0,0 +1,21 @@
|
||||
import tldextract
|
||||
import re
|
||||
|
||||
def parse(text):
|
||||
# Regular expression for detecting potential URLs
|
||||
url_regex = r'\b(?:https?|ftp):\/\/[^\s]+'
|
||||
matches = []
|
||||
|
||||
for url_match in re.finditer(url_regex, text):
|
||||
full_url = url_match.group()
|
||||
|
||||
# Use tldextract to validate the domain and suffix
|
||||
extracted = tldextract.extract(full_url)
|
||||
|
||||
if extracted.domain and extracted.suffix:
|
||||
start_pos, end_pos = url_match.span()
|
||||
matches.append((full_url, start_pos, end_pos))
|
||||
|
||||
return matches
|
||||
|
||||
|
12
data/parser/xmraddr.py
Normal file
12
data/parser/xmraddr.py
Normal file
@ -0,0 +1,12 @@
|
||||
import re
|
||||
|
||||
def parse(text):
|
||||
xmr_regex = r'\b4[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]{94}\b|\b8[123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz]{94}\b'
|
||||
matches = []
|
||||
|
||||
for match in re.finditer(xmr_regex, text):
|
||||
match_text = match.group()
|
||||
start_pos, end_pos = match.span()
|
||||
matches.append((match_text, start_pos, end_pos))
|
||||
|
||||
return matches
|
1
logline_leviathan/__init__.py
Normal file
1
logline_leviathan/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .__main__ import main
|
32
logline_leviathan/__main__.py
Normal file
32
logline_leviathan/__main__.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""
|
||||
You're welcome! I'm glad you like the name "Logline Leviathan". It's a fitting name for a program that can delve into the depths of unstructured text data like a leviathan, extracting valuable insights from the chaotic ocean of information. I hope your program is successful in its mission to help investigators navigate the dark, digital realm of cyberpunk."""
|
||||
|
||||
import sys
|
||||
from PyQt5.QtWidgets import QApplication
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
# Add the parent directory of 'logline_leviathan' to sys.path
|
||||
parent_dir = str(Path(__file__).resolve().parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from logline_leviathan.gui.mainwindow import MainWindow
|
||||
from logline_leviathan.database.database_manager import create_database
|
||||
|
||||
def initialize_database():
|
||||
create_database()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Analyze Export')
|
||||
parser.add_argument('directory', nargs='?', default='', help='Directory to analyze')
|
||||
args = parser.parse_args()
|
||||
|
||||
app = QApplication(sys.argv)
|
||||
main_window = MainWindow(app, initialize_database, args.directory) # Pass the function as an argument
|
||||
main_window.show()
|
||||
sys.exit(app.exec_())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
0
logline_leviathan/database/__init__.py
Normal file
0
logline_leviathan/database/__init__.py
Normal file
107
logline_leviathan/database/database_manager.py
Normal file
107
logline_leviathan/database/database_manager.py
Normal file
@ -0,0 +1,107 @@
|
||||
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Text, DateTime, Boolean
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
|
||||
|
||||
|
||||
SessionFactory = sessionmaker(bind=create_engine('sqlite:///entities.db'))
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
class DistinctEntitiesTable(Base):
|
||||
__tablename__ = 'distinct_entities_table'
|
||||
distinct_entities_id = Column(Integer, primary_key=True) #is the primary key of the distinct_entities_table
|
||||
distinct_entity = Column(String, index=True) # is the distinct entity iself, e.g. 192.168.1.1, 192.168.1.1, etc., bc1qy3h5l8n9, etc.
|
||||
entity_types_id = Column(Integer, ForeignKey('entity_types_table.entity_type_id')) # is the foreign key of the entity_types_table
|
||||
regex_library = relationship("EntityTypesTable")
|
||||
individual_entities = relationship("EntitiesTable", back_populates="entity")
|
||||
|
||||
class EntitiesTable(Base):
|
||||
__tablename__ = 'entities_table'
|
||||
entities_id = Column(Integer, primary_key=True) # is the primary key of the entities_table
|
||||
distinct_entities_id = Column(Integer, ForeignKey('distinct_entities_table.distinct_entities_id')) # is the foreign key of the distinct_entities_table
|
||||
entity_types_id = Column(Integer, ForeignKey('entity_types_table.entity_type_id')) # is the foreign key of the entity_types_table
|
||||
regex_library = relationship("EntityTypesTable")
|
||||
file_id = Column(Integer, ForeignKey('file_metadata.file_id')) # is the foreign key of the file_metadata
|
||||
line_number = Column(Integer) # is the line number - the line inside the file which is available in the file_metadata
|
||||
entry_timestamp = Column(DateTime) # the timestamp which was obtained via regex from the original input file
|
||||
flag = Column(Boolean, default=False, index=True) # allows a flag to be set by the user and customize data inspection by the flag presence
|
||||
|
||||
entity = relationship("DistinctEntitiesTable", back_populates="individual_entities")
|
||||
file = relationship("FileMetadata")
|
||||
context = relationship("ContextTable", uselist=False, back_populates="individual_entity")
|
||||
|
||||
class ContextTable(Base):
|
||||
__tablename__ = 'context_table'
|
||||
context_id = Column(Integer, primary_key=True) # is the primary key of the context_table
|
||||
entities_id = Column(Integer, ForeignKey('entities_table.entities_id')) # is the foreign key of the entities_table
|
||||
context_small = Column(Text) # is the context of the entity which was parsed from the original file, by a specific number of lines before and after the entity
|
||||
context_medium = Column(Text) # is the context of the entity which was parsed from the original file, by a specific number of lines before and after the entity
|
||||
context_large = Column(Text, index=True)
|
||||
#context_indexed = Column(Text, index=True) # is the context of the entity which was parsed from the original file, by a specific number of lines before and after the entity
|
||||
individual_entity = relationship("EntitiesTable", back_populates="context")
|
||||
|
||||
class FileMetadata(Base):
|
||||
__tablename__ = 'file_metadata'
|
||||
# all stays as it is
|
||||
file_id = Column(Integer, primary_key=True) # is the primary key of the file_metadata
|
||||
file_name = Column(String, index=True) # is the name of the original input file
|
||||
file_path = Column(String) # is the path of the original input file
|
||||
file_mimetype = Column(String) # is the MIME type of the original input file
|
||||
|
||||
class EntityTypesTable(Base):
|
||||
__tablename__ = 'entity_types_table'
|
||||
entity_type_id = Column(Integer, primary_key=True) # is the primary key of the entity_types_table
|
||||
entity_type = Column(String) # is the entity type short form, e.g. ipv4, ipv6, btcaddr, etc
|
||||
regex_pattern = Column(String) # a regex pattern which could be used for parsing the files
|
||||
script_parser = Column(String) # the name of the python script which could be used for parsing the files
|
||||
gui_tooltip = Column(String) # the GUI tooltip
|
||||
gui_name = Column(String) # the GUI name which is more descriptive than entity_type
|
||||
parent_type = Column(String, default='root') # hierarchical structure from yaml specs
|
||||
parser_enabled = Column(Boolean, default=True) # is the parser enabled
|
||||
|
||||
|
||||
def create_database(db_path='sqlite:///entities.db'):
|
||||
engine = create_engine(db_path)
|
||||
logging.debug(f"Create Database Engine")
|
||||
Base.metadata.create_all(engine)
|
||||
logging.debug(f"Created all Metadata")
|
||||
engine.dispose()
|
||||
logging.debug(f"Disposed Engine")
|
||||
|
||||
# Start a new session
|
||||
session = SessionFactory()
|
||||
logging.debug(f"Started new session with session factory")
|
||||
|
||||
# Check if EntityTypesTable is empty
|
||||
if not session.query(EntityTypesTable).first():
|
||||
# Populate EntityTypesTable from the YAML file
|
||||
logging.debug(f"Didnt find the EntityTypesTable, running populate_entity_types_table")
|
||||
#populate_entity_types_table(session)
|
||||
|
||||
session.close()
|
||||
|
||||
|
||||
def get_db_session():
|
||||
return SessionFactory()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_database()
|
||||
|
||||
@contextmanager
|
||||
def session_scope():
|
||||
"""Provide a transactional scope around a series of operations."""
|
||||
session = SessionFactory()
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
raise e
|
||||
finally:
|
||||
session.close()
|
||||
|
357
logline_leviathan/database/database_operations.py
Normal file
357
logline_leviathan/database/database_operations.py
Normal file
@ -0,0 +1,357 @@
|
||||
import logging
|
||||
import os
|
||||
import yaml
|
||||
from PyQt5.QtWidgets import QDialog, QVBoxLayout, QMessageBox, QLabel, QRadioButton, QPushButton
|
||||
from logline_leviathan.gui.ui_helper import UIHelper
|
||||
|
||||
from logline_leviathan.database.database_manager import *
|
||||
|
||||
|
||||
|
||||
|
||||
class DatabaseOperations:
|
||||
def __init__(self, main_window, db_init_func):
|
||||
self.main_window = main_window
|
||||
self.db_init_func = db_init_func
|
||||
self.selected_resolutions = []
|
||||
|
||||
|
||||
def ensureDatabaseExists(self):
|
||||
db_path = 'entities.db'
|
||||
db_exists = os.path.exists(db_path)
|
||||
if not db_exists:
|
||||
logging.info("Database does not exist. Creating new database...")
|
||||
self.db_init_func() # This should call create_database
|
||||
else:
|
||||
logging.info("Database exists.")
|
||||
|
||||
def loadRegexFromYAML(self):
|
||||
with open('./data/entities.yaml', 'r') as file:
|
||||
yaml_data = yaml.safe_load(file)
|
||||
clean_yaml_data = self.notify_duplicates_from_yaml(yaml_data)
|
||||
return clean_yaml_data
|
||||
|
||||
def notify_duplicates_from_yaml(self, yaml_data):
|
||||
duplicates = []
|
||||
seen_fields = {'entity_type': {}, 'gui_name': {}, 'gui_tooltip': {}, 'regex_pattern': {}, 'script_parser': {}}
|
||||
|
||||
for entity_name, entity_data in yaml_data.items():
|
||||
# Iterate through each field and check for duplicates
|
||||
for field in seen_fields:
|
||||
value = entity_data.get(field)
|
||||
if value: # Only check non-empty values
|
||||
if value in seen_fields[field]:
|
||||
duplicates.append({
|
||||
"duplicate_field": field,
|
||||
"entity_name": entity_name,
|
||||
"original_entity_name": seen_fields[field][value]
|
||||
})
|
||||
seen_fields[field][value] = entity_name
|
||||
|
||||
if duplicates:
|
||||
self.show_duplicate_error_dialog(duplicates)
|
||||
raise ValueError("Duplicate entries found in YAML file. Aborting.")
|
||||
return yaml_data
|
||||
|
||||
def show_duplicate_error_dialog(self, duplicates):
|
||||
dialog = DuplicateErrorDialog(duplicates)
|
||||
dialog.exec_()
|
||||
|
||||
|
||||
def show_resolve_inconsistencies_dialog(self, db_entity, yaml_entity):
|
||||
dialog = ResolveInconsistenciesDialog([(db_entity, yaml_entity)])
|
||||
result = dialog.exec_()
|
||||
if result == QDialog.Accepted:
|
||||
resolutions = dialog.getSelectedResolutions()
|
||||
if resolutions:
|
||||
return resolutions[0] # Return the first (and only) resolution
|
||||
return None
|
||||
|
||||
|
||||
def populate_and_update_entities_from_yaml(self, yaml_data):
|
||||
with session_scope() as session:
|
||||
db_entities = session.query(EntityTypesTable).all()
|
||||
db_entity_dict = {entity.entity_type: entity for entity in db_entities}
|
||||
|
||||
for entity_name, entity_data in yaml_data.items():
|
||||
entity_type = entity_data['entity_type']
|
||||
db_entity = db_entity_dict.get(entity_type)
|
||||
|
||||
|
||||
if db_entity is None:
|
||||
db_entity = self.find_potentially_modified_entity(db_entities, entity_data)
|
||||
|
||||
if db_entity:
|
||||
parser_enabled_db = db_entity.parser_enabled
|
||||
entity_data['parser_enabled'] = parser_enabled_db
|
||||
if self.is_duplicate_or_inconsistent(db_entity, entity_data, db_entities):
|
||||
logging.warning(f"Issue found with entity {db_entity} and {entity_data}. Handling resolution.")
|
||||
resolution = self.show_resolve_inconsistencies_dialog(db_entity, entity_data)
|
||||
if resolution:
|
||||
self.apply_resolution([(resolution, db_entity)], session) # Pass db_entity as part of the resolution
|
||||
else:
|
||||
for key, value in entity_data.items():
|
||||
setattr(db_entity, key, value)
|
||||
else:
|
||||
new_entity = EntityTypesTable(**entity_data)
|
||||
session.add(new_entity)
|
||||
|
||||
session.commit()
|
||||
|
||||
def find_potentially_modified_entity(self, db_entities, yaml_entity):
|
||||
for db_ent in db_entities:
|
||||
if any(
|
||||
getattr(db_ent, key) == yaml_entity[key]
|
||||
for key in ['entity_type', 'gui_name', 'gui_tooltip', 'regex_pattern', 'script_parser', 'parser_enabled']
|
||||
if yaml_entity[key]
|
||||
):
|
||||
return db_ent
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def is_duplicate_or_inconsistent(self, db_entity, yaml_entity, db_entities):
|
||||
if db_entity:
|
||||
# Exclude 'parser_enabled' from the inconsistency check
|
||||
keys_to_check = ['entity_type', 'gui_name', 'gui_tooltip', 'regex_pattern', 'script_parser']
|
||||
for key in keys_to_check:
|
||||
if getattr(db_entity, key, None) != yaml_entity.get(key) and yaml_entity.get(key) is not None:
|
||||
logging.debug(f"Found inconsistent entity: DB-Entity: {db_entity} YAML-Entity: {yaml_entity}")
|
||||
return True
|
||||
|
||||
# Check for duplicate across all entities
|
||||
for db_ent in db_entities:
|
||||
if db_ent.entity_type == yaml_entity['entity_type']:
|
||||
continue
|
||||
|
||||
if any(
|
||||
getattr(db_ent, key) == yaml_entity[key] and yaml_entity[key] is not None
|
||||
for key in ['entity_type', 'gui_name', 'gui_tooltip', 'regex_pattern', 'script_parser',]
|
||||
):
|
||||
logging.debug(f"Found duplicate entity: {db_ent}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
|
||||
def update_database_entry(self, db_entity, yaml_entity):
|
||||
for key, value in yaml_entity.items():
|
||||
setattr(db_entity, key, value)
|
||||
|
||||
|
||||
|
||||
def apply_resolution(self, resolutions, session):
|
||||
with open('./data/entities.yaml', 'r') as file:
|
||||
yaml_data = yaml.safe_load(file)
|
||||
|
||||
for (resolution, entity), db_entity in resolutions:
|
||||
if resolution == 'yaml':
|
||||
logging.debug(f"Resolving YAML entity: {entity} with resolution: yaml and db_entity: {db_entity}")
|
||||
if db_entity:
|
||||
foreign_keys = self.capture_foreign_keys(db_entity.entity_type_id, session)
|
||||
session.delete(db_entity)
|
||||
|
||||
new_entity = EntityTypesTable(**entity)
|
||||
session.add(new_entity)
|
||||
session.flush()
|
||||
|
||||
self.reassign_foreign_keys(new_entity, foreign_keys, session)
|
||||
|
||||
elif resolution == 'db':
|
||||
if entity: # Existing database entity is chosen
|
||||
yaml_data[entity.entity_type] = {
|
||||
'entity_type': entity.entity_type,
|
||||
'gui_name': entity.gui_name,
|
||||
'gui_tooltip': entity.gui_tooltip,
|
||||
'parent_type': entity.parent_type,
|
||||
'regex_pattern': entity.regex_pattern,
|
||||
'script_parser': entity.script_parser,
|
||||
'parser_enabled': entity.parser_enabled
|
||||
}
|
||||
|
||||
with open('./data/entities.yaml', 'w') as file:
|
||||
yaml.dump(yaml_data, file)
|
||||
|
||||
|
||||
def capture_foreign_keys(self, entity_id, session):
|
||||
foreign_keys = {}
|
||||
|
||||
# Use entity_id to capture references
|
||||
distinct_entities_refs = session.query(DistinctEntitiesTable).filter_by(entity_types_id=entity_id).all()
|
||||
foreign_keys['distinct_entities'] = [ref.distinct_entities_id for ref in distinct_entities_refs]
|
||||
|
||||
entities_refs = session.query(EntitiesTable).filter_by(entity_types_id=entity_id).all()
|
||||
foreign_keys['entities'] = [ref.entities_id for ref in entities_refs]
|
||||
|
||||
return foreign_keys
|
||||
|
||||
|
||||
def reassign_foreign_keys(self, new_entity, foreign_keys, session):
|
||||
# Reassigning references in DistinctEntitiesTable
|
||||
for distinct_id in foreign_keys.get('distinct_entities', []):
|
||||
distinct_entity = session.query(DistinctEntitiesTable).get(distinct_id)
|
||||
distinct_entity.entity_types_id = new_entity.entity_type_id
|
||||
|
||||
# Reassigning references in EntitiesTable
|
||||
for entity_id in foreign_keys.get('entities', []):
|
||||
entity = session.query(EntitiesTable).get(entity_id)
|
||||
entity.entity_types_id = new_entity.entity_type_id
|
||||
|
||||
|
||||
def checkScriptPresence(self):
|
||||
parser_directory = './data/parser'
|
||||
missing_scripts = []
|
||||
|
||||
with session_scope() as session:
|
||||
all_entities = session.query(EntityTypesTable).all()
|
||||
for entity in all_entities:
|
||||
script_name = entity.script_parser
|
||||
if script_name:
|
||||
script_path = os.path.join(parser_directory, script_name)
|
||||
if not os.path.exists(script_path):
|
||||
missing_scripts.append(script_name)
|
||||
|
||||
if missing_scripts:
|
||||
missing_scripts_str = "\n".join(missing_scripts)
|
||||
msg = QMessageBox()
|
||||
msg.setIcon(QMessageBox.Warning)
|
||||
msg.setWindowTitle("Fehlende Skripte")
|
||||
msg.setText(".\nDas ist nicht zwingend ein Problem, aber falls nötig,\nsollten die Skritpte in ./data/parser/ ergänzt werden.\nListe der erwarteten Skripte:")
|
||||
msg.setInformativeText(missing_scripts_str)
|
||||
msg.exec_() # Display the message box
|
||||
|
||||
return missing_scripts
|
||||
|
||||
def purgeWordlistEntries(self):
|
||||
try:
|
||||
with session_scope() as session:
|
||||
# Identify the entity_type_id for 'generated_wordlist_match'
|
||||
wordlist_entity_type = session.query(EntityTypesTable).filter_by(entity_type='generated_wordlist_match').one_or_none()
|
||||
|
||||
if not wordlist_entity_type:
|
||||
logging.info("No 'generated_wordlist_match' entity type found. No action taken.")
|
||||
return
|
||||
|
||||
# Find all distinct entities associated with the wordlist entity type
|
||||
distinct_entities_to_remove = session.query(DistinctEntitiesTable).filter_by(entity_types_id=wordlist_entity_type.entity_type_id).all()
|
||||
|
||||
for distinct_entity in distinct_entities_to_remove:
|
||||
# Remove all related entities entries and their context
|
||||
entities_to_remove = session.query(EntitiesTable).filter_by(distinct_entities_id=distinct_entity.distinct_entities_id).all()
|
||||
for entity in entities_to_remove:
|
||||
# Remove related context entries
|
||||
session.query(ContextTable).filter_by(entities_id=entity.entities_id).delete()
|
||||
# Remove the entity itself
|
||||
session.delete(entity)
|
||||
|
||||
# Commit the changes
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
logging.error(f"Error during wordlist entries purge: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
class ResolveInconsistenciesDialog(QDialog):
|
||||
def __init__(self, inconsistencies, parent=None):
|
||||
super().__init__(parent)
|
||||
self.setWindowTitle("Inkonsistenzen auflösen")
|
||||
self.inconsistencies = inconsistencies
|
||||
self.resolution_choices = []
|
||||
self.selected_entity = None
|
||||
self.selected_entities = []
|
||||
self.selected_resolutions = []
|
||||
self.initUI()
|
||||
|
||||
def initUI(self):
|
||||
layout = QVBoxLayout(self)
|
||||
|
||||
for db_entity, yaml_entity in self.inconsistencies:
|
||||
db_entity_str = self.format_entity_for_display(db_entity)
|
||||
yaml_entity_str = self.format_entity_for_display(yaml_entity)
|
||||
|
||||
# Create labels and radio buttons for each inconsistency
|
||||
db_label = QLabel(f"Datenbank-Eintrag: {db_entity_str}")
|
||||
yaml_label = QLabel(f"YAML-Eintrag: {yaml_entity_str}")
|
||||
db_radio = QRadioButton("Datenbank-Eintrag behalten")
|
||||
yaml_radio = QRadioButton("YAML-Eintrag behalten")
|
||||
|
||||
layout.addWidget(db_label)
|
||||
layout.addWidget(db_radio)
|
||||
layout.addWidget(yaml_label)
|
||||
layout.addWidget(yaml_radio)
|
||||
|
||||
self.resolution_choices.append((db_radio, yaml_radio))
|
||||
|
||||
# Buttons for OK and Cancel
|
||||
btn_ok = QPushButton("OK", self)
|
||||
btn_ok.clicked.connect(self.on_ok)
|
||||
btn_cancel = QPushButton("Abbruch", self)
|
||||
btn_cancel.clicked.connect(self.reject)
|
||||
layout.addWidget(btn_ok)
|
||||
layout.addWidget(btn_cancel)
|
||||
|
||||
def on_ok(self):
|
||||
self.selected_resolutions = [] # Reset the list before storing new selections
|
||||
for (db_radio, yaml_radio), (db_entity, yaml_entity) in zip(self.resolution_choices, self.inconsistencies):
|
||||
if db_radio.isChecked():
|
||||
self.selected_resolutions.append(('db', db_entity))
|
||||
elif yaml_radio.isChecked():
|
||||
self.selected_resolutions.append(('yaml', yaml_entity))
|
||||
else:
|
||||
self.selected_resolutions.append((None, None))
|
||||
|
||||
self.accept()
|
||||
|
||||
|
||||
def getSelectedResolutions(self):
|
||||
return self.selected_resolutions
|
||||
|
||||
def format_entity_for_display(self, entity):
|
||||
if isinstance(entity, dict):
|
||||
# YAML entity is already a dictionary
|
||||
return "\n".join(f"{key}: {value}" for key, value in entity.items())
|
||||
else:
|
||||
# Database entity needs to be formatted
|
||||
return "\n".join(f"{attr}: {getattr(entity, attr)}" for attr in ['entity_type', 'gui_name', 'gui_tooltip', 'parent_type', 'regex_pattern', 'script_parser', 'parser_enabled'])
|
||||
|
||||
|
||||
|
||||
class DuplicateErrorDialog(QDialog):
|
||||
def __init__(self, duplicates, parent=None):
|
||||
super().__init__(parent)
|
||||
self.setWindowTitle("Duplikate gefunden")
|
||||
self.duplicates = duplicates
|
||||
self.initUI()
|
||||
|
||||
def initUI(self):
|
||||
layout = QVBoxLayout(self)
|
||||
|
||||
# Display duplicate entries
|
||||
error_label = QLabel("Duplikate wurden in ./data/entities.yaml gefunden. Diese sollten manuell aufgelöst werden:")
|
||||
layout.addWidget(error_label)
|
||||
|
||||
for dup in self.duplicates:
|
||||
dup_str = self.format_entity_for_display(dup)
|
||||
dup_label = QLabel(dup_str)
|
||||
layout.addWidget(dup_label)
|
||||
|
||||
# Buttons
|
||||
open_button = QPushButton("YAML-Datei oeffnen", self)
|
||||
open_button.clicked.connect(self.openYAML)
|
||||
exit_button = QPushButton("Abbruch", self)
|
||||
exit_button.clicked.connect(self.close)
|
||||
|
||||
layout.addWidget(open_button)
|
||||
layout.addWidget(exit_button)
|
||||
|
||||
def format_entity_for_display(self, entity):
|
||||
if isinstance(entity, dict):
|
||||
return "\n".join(f"{key}: {value}" for key, value in entity.items())
|
||||
|
||||
def openYAML(self):
|
||||
ui_helper = UIHelper(main_window=self)
|
||||
ui_helper.openFile('data/entities.yaml')
|
||||
|
||||
|
89
logline_leviathan/database/database_utility.py
Normal file
89
logline_leviathan/database/database_utility.py
Normal file
@ -0,0 +1,89 @@
|
||||
import shutil
|
||||
import logging
|
||||
import time
|
||||
import os
|
||||
from PyQt5.QtWidgets import QFileDialog
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from logline_leviathan.database.database_manager import session_scope
|
||||
from logline_leviathan.database.database_operations import DatabaseOperations
|
||||
|
||||
class DatabaseUtility():
|
||||
def __init__(self, main_window):
|
||||
self.main_window = main_window
|
||||
self.database_operations = DatabaseOperations(self, main_window.db_init_func)
|
||||
|
||||
def purgeDatabase(self):
|
||||
if self.main_window.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
|
||||
try:
|
||||
with session_scope() as db_session:
|
||||
# Close and dispose of any existing database session
|
||||
if db_session:
|
||||
db_session.close()
|
||||
db_session.bind.dispose()
|
||||
|
||||
# Attempt to delete the database file with retries
|
||||
retries = 3
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
if os.path.exists('entities.db'):
|
||||
os.remove('entities.db')
|
||||
break
|
||||
except OSError as e:
|
||||
if attempt < retries - 1:
|
||||
time.sleep(0.1)
|
||||
else:
|
||||
raise e
|
||||
|
||||
# Reinitialize the database
|
||||
self.main_window.db_init_func()
|
||||
self.main_window.statusLabel.setText(" Leere Datenbank initalisiert. Mit der Analyse fortfahren.")
|
||||
logging.debug("Database created.")
|
||||
yaml_data = self.database_operations.loadRegexFromYAML()
|
||||
self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
|
||||
self.main_window.refreshApplicationState()
|
||||
self.main_window.generate_report_window.updateCheckboxes() # Add this line to update the checkboxes
|
||||
self.main_window.generate_wordlist_window.updateCheckboxes()
|
||||
except SQLAlchemyError as e:
|
||||
logging.error(f"Error creating database: {e}")
|
||||
except Exception as e:
|
||||
logging.error(f"General error: {e}")
|
||||
|
||||
def importDatabase(self):
|
||||
if self.main_window.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
options = QFileDialog.Options()
|
||||
db_file, _ = QFileDialog.getOpenFileName(self.main_window, "Select External Database", "", "Database Files (*.db);;All Files (*)", options=options)
|
||||
if db_file and db_file.endswith(".db"):
|
||||
try:
|
||||
shutil.copy(db_file, 'entities.db')
|
||||
self.main_window.current_db_path = db_file
|
||||
self.main_window.statusLabel.setText(" Bestehende Datenbank für diese Sitzung ausgewählt.")
|
||||
self.main_window.refreshApplicationState()
|
||||
self.main_window.generate_report_window.updateCheckboxes() # Add this line to update the checkboxes
|
||||
self.main_window.generate_wordlist_window.updateCheckboxes()
|
||||
except Exception as e:
|
||||
logging.error(f"Error selecting external database: {e}")
|
||||
self.main_window.statusLabel.setText(f" Fehler bei der Auswahl der Datenbank: {e}")
|
||||
else:
|
||||
self.main_window.statusLabel.setText(" Keine gueltige Datenbank ausgewählt.")
|
||||
|
||||
def exportDatabase(self):
|
||||
if self.main_window.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
options = QFileDialog.Options()
|
||||
default_filename = "entities_" + time.strftime('%Y%m%d_%H%M%S') + ".db"
|
||||
save_path, _ = QFileDialog.getSaveFileName(self.main_window, "Save Database File", default_filename, "Database Files (*.db);;All Files (*)", options=options)
|
||||
if save_path:
|
||||
try:
|
||||
shutil.copy('entities.db', save_path)
|
||||
self.main_window.statusLabel.setText(f" Datenbank erfolgreich exportiert nach {save_path}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error exporting database: {e}")
|
||||
self.main_window.statusLabel.setText(f" Fehler beim Exportieren der Datenbank: {e}")
|
||||
|
||||
|
816
logline_leviathan/database/query copy.py
Normal file
816
logline_leviathan/database/query copy.py
Normal file
@ -0,0 +1,816 @@
|
||||
from sqlalchemy import or_, and_, not_, String
|
||||
from PyQt5.QtWidgets import QProgressBar, QMainWindow, QTableWidget, QTableWidgetItem, QLineEdit, QStyledItemDelegate, QTextEdit, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QComboBox, QStyle, QLabel
|
||||
from logline_leviathan.database.database_manager import get_db_session, EntitiesTable, DistinctEntitiesTable, EntityTypesTable, ContextTable, FileMetadata, session_scope
|
||||
from PyQt5.QtCore import pyqtSignal, Qt, QThread, pyqtSignal, QTimer
|
||||
from PyQt5.QtGui import QTextDocument, QTextOption
|
||||
from fuzzywuzzy import fuzz
|
||||
import re
|
||||
import logging
|
||||
import html
|
||||
|
||||
|
||||
class QueryThread(QThread):
|
||||
queryCompleted = pyqtSignal(list, list) # Signal to indicate completion
|
||||
|
||||
def __init__(self, db_query_instance, query_text):
|
||||
super(QueryThread, self).__init__()
|
||||
self.db_query_instance = db_query_instance
|
||||
self.query_text = query_text
|
||||
|
||||
def run(self):
|
||||
base_query, search_terms = self.db_query_instance.prepare_query(self.query_text)
|
||||
query_lambda = self.db_query_instance.parse_query(self.query_text)
|
||||
|
||||
# Pass the lambda function directly to filter
|
||||
results = base_query.filter(query_lambda).all()
|
||||
|
||||
# Calculate scored results
|
||||
scored_results = [(result, self.db_query_instance.calculate_match_score(result, self.query_text)) for result in results]
|
||||
self.queryCompleted.emit(scored_results, search_terms)
|
||||
|
||||
|
||||
class DatabaseGUIQuery:
|
||||
def __init__(self):
|
||||
self.db_session = get_db_session()
|
||||
self.entity_types = EntityTypesTable
|
||||
self.entities = EntitiesTable
|
||||
self.distinct_entities = DistinctEntitiesTable
|
||||
self.context = ContextTable
|
||||
self.file_metadata = FileMetadata
|
||||
|
||||
def parse_query(self, query):
|
||||
if not query.strip():
|
||||
return lambda _: False
|
||||
|
||||
# Split and strip special characters for database query
|
||||
tokens = re.findall(r'"[^"]+"|\S+', query)
|
||||
stripped_tokens = [token.strip('+-"') for token in tokens]
|
||||
|
||||
filters = []
|
||||
for token in stripped_tokens:
|
||||
search_condition = f'%{token.replace("*", "%")}%'
|
||||
|
||||
condition = or_(
|
||||
self.distinct_entities.distinct_entity.like(search_condition),
|
||||
self.entity_types.entity_type.like(search_condition),
|
||||
self.entity_types.gui_name.like(search_condition),
|
||||
self.entity_types.gui_tooltip.like(search_condition),
|
||||
self.entity_types.script_parser.like(search_condition),
|
||||
self.file_metadata.file_name.like(search_condition),
|
||||
self.file_metadata.file_path.like(search_condition),
|
||||
self.file_metadata.file_mimetype.like(search_condition),
|
||||
self.entities.line_number.cast(String).like(search_condition),
|
||||
self.context.context_large.like(search_condition)
|
||||
# Add other fields as needed
|
||||
)
|
||||
filters.append(condition)
|
||||
|
||||
return lambda: or_(*filters)
|
||||
|
||||
def parse_search_terms(self, query):
|
||||
tokens = query.split()
|
||||
search_terms = [token.lstrip('+-') for token in tokens if not token.startswith('-') and not token.startswith('+')]
|
||||
return search_terms
|
||||
|
||||
def prepare_query(self, query):
|
||||
search_terms = self.parse_search_terms(query)
|
||||
|
||||
# Construct the base query with proper joins
|
||||
base_query = self.db_session.query(
|
||||
self.distinct_entities.distinct_entity,
|
||||
self.entity_types.gui_name,
|
||||
self.file_metadata.file_name,
|
||||
self.entities.line_number,
|
||||
self.entities.entry_timestamp,
|
||||
self.context.context_large
|
||||
).join(
|
||||
self.entities, self.distinct_entities.distinct_entities_id == self.entities.distinct_entities_id
|
||||
).join(
|
||||
self.file_metadata, self.entities.file_id == self.file_metadata.file_id
|
||||
).join(
|
||||
self.context, self.entities.entities_id == self.context.entities_id
|
||||
).join(
|
||||
self.entity_types, self.entities.entity_types_id == self.entity_types.entity_type_id
|
||||
).distinct()
|
||||
|
||||
# Apply filters and return results
|
||||
return base_query, search_terms
|
||||
|
||||
|
||||
def display_results(self, results, search_terms):
|
||||
self.results_window = ResultsWindow(results, search_terms)
|
||||
self.results_window.show()
|
||||
|
||||
def calculate_match_score(self, result, query):
|
||||
# Adjusted weights and thresholds
|
||||
distinct_entity_weight = 4
|
||||
file_name_weight = 4
|
||||
timestamp_weight = 1
|
||||
line_number_weight = 1
|
||||
context_weight = 5
|
||||
multiple_term_weight = 1
|
||||
order_weight = 8 # Increased weight for exact order of terms
|
||||
fuzzy_match_weight = 0.3 # More discerning fuzzy match
|
||||
threshold_for_fuzzy = 90 # Higher threshold for fuzzy matches
|
||||
proximity_weight = 2 # Increased weight for proximity
|
||||
|
||||
positive_operand_weight = 10 # Weight for terms with '+'
|
||||
negative_operand_penalty = -5 # Penalty for terms with '-'
|
||||
exact_match_weight = 10 # Increased weight for exact sequence match
|
||||
|
||||
score = 0
|
||||
|
||||
# Extracting operands and terms
|
||||
tokens = re.findall(r'"[^"]+"|\S+', query)
|
||||
processed_terms = [(token.startswith('+'), token.startswith('-'), token.strip('+-"').lower()) for token in tokens]
|
||||
|
||||
# Normalize result fields
|
||||
lower_distinct_entity = result.distinct_entity.lower()
|
||||
lower_file_name = result.file_name.lower()
|
||||
timestamp_str = str(result.entry_timestamp).lower()
|
||||
line_number_str = str(result.line_number).lower()
|
||||
words_in_context = result.context_large.lower().split()
|
||||
|
||||
# Check matches in various fields with operand consideration
|
||||
for is_positive, is_negative, term in processed_terms:
|
||||
if term in lower_distinct_entity:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else distinct_entity_weight)
|
||||
if term in lower_file_name:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else file_name_weight)
|
||||
if term in timestamp_str:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else timestamp_weight)
|
||||
if term in line_number_str:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else line_number_weight)
|
||||
if term in words_in_context:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else context_weight)
|
||||
|
||||
# Creating a cleaned substring of search terms in the exact order they appear in the query
|
||||
exact_terms_substring = ' '.join([token.strip('+-"').lower() for token in tokens])
|
||||
|
||||
# Check for exact order of terms in the context
|
||||
if exact_terms_substring and exact_terms_substring in ' '.join(words_in_context):
|
||||
score += exact_match_weight
|
||||
|
||||
# Check for exact order of terms
|
||||
if '"' in query:
|
||||
exact_query = ' '.join(term for _, _, term in processed_terms)
|
||||
if exact_query in ' '.join(words_in_context):
|
||||
score += order_weight
|
||||
|
||||
# Additional weight for multiple different terms
|
||||
unique_terms = set(term for _, _, term in processed_terms)
|
||||
score += len(unique_terms) * multiple_term_weight
|
||||
|
||||
# Proximity score calculation
|
||||
for _, _, term in processed_terms:
|
||||
if term in words_in_context:
|
||||
# Find the positions of the term and the entity in the context
|
||||
term_pos = words_in_context.index(term)
|
||||
entity_pos = words_in_context.index(lower_distinct_entity) if lower_distinct_entity in words_in_context else 0
|
||||
|
||||
# Calculate the distance and adjust the score
|
||||
distance = abs(term_pos - entity_pos)
|
||||
proximity_score = max(0, proximity_weight - distance * 0.01) # Reduce score based on distance
|
||||
score += proximity_score
|
||||
|
||||
# Fuzzy matching
|
||||
all_text = f"{result.distinct_entity} {result.file_name} {result.entry_timestamp} {result.line_number} {result.context_large}".lower()
|
||||
for _, _, term in processed_terms:
|
||||
fuzzy_score = max(fuzz.partial_ratio(term, word) for word in all_text.split())
|
||||
if fuzzy_score > threshold_for_fuzzy:
|
||||
score += (fuzzy_score / 100) * fuzzy_match_weight
|
||||
|
||||
# Normalize the score
|
||||
max_possible_positive_score = (
|
||||
distinct_entity_weight + file_name_weight +
|
||||
timestamp_weight + line_number_weight +
|
||||
context_weight * len(processed_terms) + # Assuming each term can match in the context
|
||||
order_weight + exact_match_weight +
|
||||
len(processed_terms) * multiple_term_weight + # Each term contributes to multiple_term_weight
|
||||
len(processed_terms) * positive_operand_weight # Each term could have a positive operand
|
||||
)
|
||||
|
||||
# Considering the negative operand penalty
|
||||
max_possible_negative_score = len(processed_terms) * negative_operand_penalty
|
||||
|
||||
# The maximum score is the sum of the possible positive score and the absolute value of the possible negative score
|
||||
max_possible_score = max_possible_positive_score + abs(max_possible_negative_score)
|
||||
|
||||
# Normalizing the score to a scale of 100
|
||||
score = (score / max_possible_score) * 100
|
||||
|
||||
return score
|
||||
|
||||
|
||||
|
||||
def get_entity_types(self):
|
||||
with session_scope() as session:
|
||||
# Query to filter entity types that have either regex_pattern or script_parser
|
||||
return [entity_type.gui_name for entity_type in session.query(EntityTypesTable)
|
||||
.filter(or_(EntityTypesTable.regex_pattern.isnot(None),
|
||||
EntityTypesTable.script_parser.isnot(None)))
|
||||
.all()]
|
||||
|
||||
|
||||
|
||||
|
||||
COLUMN_WIDTHS = [200, 100, 250, 100, 120, 600, 80] # Adjust these values as needed
|
||||
COLUMN_NAMES = ['Distinct Entity', 'Entity Type', 'File Name', 'Line Number', 'Timestamp', 'Context', 'Match Score']
|
||||
DEFAULT_ROW_HEIGHT = 120
|
||||
FILTER_EDIT_WIDTH = 150
|
||||
|
||||
class ResultsWindow(QMainWindow):
|
||||
def __init__(self, db_query_instance, parent=None):
|
||||
super(ResultsWindow, self).__init__(parent)
|
||||
self.db_query_instance = db_query_instance
|
||||
self.loaded_data_count = 0
|
||||
self.total_data = []
|
||||
self.current_filters = {}
|
||||
self.setWindowTitle("Suchergebnis")
|
||||
self.setGeometry(800, 600, 1500, 600) # Adjust size as needed
|
||||
|
||||
# Create central widget and set layout
|
||||
centralWidget = QWidget(self)
|
||||
self.setCentralWidget(centralWidget)
|
||||
mainLayout = QVBoxLayout(centralWidget)
|
||||
|
||||
queryFieldLayout = QHBoxLayout()
|
||||
|
||||
self.databaseQueryLineEdit = QueryLineEdit(self)
|
||||
self.databaseQueryLineEdit.setPlaceholderText(" Suchbegriff eingeben...")
|
||||
self.databaseQueryLineEdit.returnPressed.connect(self.execute_query_from_results_window)
|
||||
self.databaseQueryLineEdit.setStyleSheet("""
|
||||
QLineEdit {
|
||||
background-color: #3C4043;
|
||||
color: white;
|
||||
min-height: 20px;
|
||||
}
|
||||
""")
|
||||
queryFieldLayout.addWidget(self.databaseQueryLineEdit)
|
||||
# Create a progress bar for query in progress
|
||||
self.queryProgressBar = QProgressBar(self)
|
||||
self.queryProgressBar.setRange(0, 1) # Indeterminate mode
|
||||
self.queryProgressBar.setFixedWidth(100) # Initially hidden
|
||||
queryFieldLayout.addWidget(self.queryProgressBar)
|
||||
executeQueryButton = QPushButton("Suche ausführen", self)
|
||||
executeQueryButton.clicked.connect(self.execute_query_from_results_window)
|
||||
queryFieldLayout.addWidget(executeQueryButton)
|
||||
|
||||
mainLayout.addLayout(queryFieldLayout)
|
||||
|
||||
# Create a horizontal layout for filter options
|
||||
filterLayout = QHBoxLayout()
|
||||
mainLayout.addLayout(filterLayout)
|
||||
|
||||
# Add the table widget to the main layout
|
||||
self.tableWidget = QTableWidget()
|
||||
mainLayout.addWidget(self.tableWidget)
|
||||
|
||||
# Updated stylesheet for the entire ResultsWindow
|
||||
stylesheet = """
|
||||
/* Styles for QTableWidget and headers */
|
||||
QTableWidget, QHeaderView::section {
|
||||
background-color: #2A2F35;
|
||||
color: white;
|
||||
border: 1px solid #4A4A4A;
|
||||
}
|
||||
|
||||
/* Style for QLineEdit */
|
||||
QLineEdit {
|
||||
background-color: #3A3F44;
|
||||
color: white;
|
||||
border: 1px solid #4A4A4A;
|
||||
}
|
||||
|
||||
/* Style for QPushButton */
|
||||
QPushButton {
|
||||
background-color: #4B5563;
|
||||
color: white;
|
||||
border-radius: 4px;
|
||||
padding: 5px;
|
||||
margin: 5px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #5C677D;
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #2A2F35;
|
||||
}
|
||||
|
||||
/* Style for empty rows and other areas */
|
||||
QWidget {
|
||||
background-color: #2A2F35;
|
||||
color: white;
|
||||
}
|
||||
"""
|
||||
self.setStyleSheet(stylesheet)
|
||||
|
||||
|
||||
# Apply default row height after setting up the table
|
||||
self.tableWidget.verticalHeader().setDefaultSectionSize(DEFAULT_ROW_HEIGHT)
|
||||
|
||||
self.clearAllButton = QPushButton("Alle Filteroptionen loeschen", self)
|
||||
self.clearAllButton.clicked.connect(self.clear_all_filters)
|
||||
filterLayout.addWidget(self.clearAllButton)
|
||||
# Adding filter options after table setup
|
||||
self.entityTypeComboBox = QComboBox()
|
||||
filterLayout.addWidget(self.entityTypeComboBox)
|
||||
|
||||
# Initialize filterWidgets before calling setup_table
|
||||
self.filterWidgets = []
|
||||
|
||||
# Create and add QLineEdit widgets to the filter layout
|
||||
for i, column_name in enumerate(COLUMN_NAMES):
|
||||
# Skipping the filter creation for certain columns
|
||||
if column_name in ['Entity Type', 'Context']:
|
||||
continue
|
||||
|
||||
filter_edit = QLineEdit(self)
|
||||
filter_edit.setFixedWidth(FILTER_EDIT_WIDTH)
|
||||
filter_edit.setPlaceholderText(f"Filtern nach {column_name}")
|
||||
filter_edit.textChanged.connect(lambda text, col=i: self.apply_filter(text, col))
|
||||
|
||||
self.filterWidgets.append(filter_edit)
|
||||
filterLayout.addWidget(filter_edit)
|
||||
self.dataLoadTimer = QTimer(self)
|
||||
self.dataLoadTimer.timeout.connect(self.load_more_data)
|
||||
|
||||
# Create and add the Dismiss button
|
||||
self.dismissButton = QPushButton("Schließen", self)
|
||||
self.dismissButton.clicked.connect(self.dataLoadTimer.stop)
|
||||
self.dismissButton.clicked.connect(self.close)
|
||||
mainLayout.addWidget(self.dismissButton)
|
||||
|
||||
self.populate_entity_type_combobox()
|
||||
|
||||
# Adjust column widths and filter widgets' widths
|
||||
self.adjust_column_widths()
|
||||
|
||||
#self.tableWidget.verticalScrollBar().valueChanged.connect(self.check_scroll)
|
||||
|
||||
|
||||
def populate_entity_type_combobox(self):
|
||||
entity_types = DatabaseGUIQuery().get_entity_types()
|
||||
self.entityTypeComboBox.addItem("Alle verfügbaren Typen", None) # Default option
|
||||
for entity_type in entity_types:
|
||||
self.entityTypeComboBox.addItem(entity_type, entity_type)
|
||||
self.entityTypeComboBox.currentIndexChanged.connect(self.filter_by_entity_type)
|
||||
|
||||
def clear_table(self):
|
||||
self.tableWidget.clear()
|
||||
self.tableWidget.setRowCount(0)
|
||||
self.tableWidget.setColumnCount(0)
|
||||
|
||||
def adjust_column_widths(self):
|
||||
for column, width in enumerate(COLUMN_WIDTHS):
|
||||
self.tableWidget.setColumnWidth(column, width)
|
||||
|
||||
|
||||
def execute_query_from_results_window(self):
|
||||
self.dataLoadTimer.start(2000)
|
||||
query_text = self.databaseQueryLineEdit.text()
|
||||
if not query_text:
|
||||
return
|
||||
self.clear_table()
|
||||
self.queryProgressBar.setRange(0, 0)
|
||||
self.query_thread = QueryThread(self.db_query_instance, query_text)
|
||||
self.query_thread.queryCompleted.connect(self.on_query_completed)
|
||||
self.query_thread.start()
|
||||
|
||||
def set_query_and_execute(self, query_text):
|
||||
self.databaseQueryLineEdit.setText(query_text)
|
||||
self.execute_query_from_results_window()
|
||||
|
||||
|
||||
def on_query_completed(self, results, search_terms):
|
||||
logging.debug(f"Query completed with {len(results)} results") # Debug statementself.queryProgressBar.setRange(0, 1)
|
||||
self.total_data = results
|
||||
self.search_terms = search_terms
|
||||
self.loaded_data_count = 0
|
||||
self.setup_table(search_terms)
|
||||
self.apply_all_filters()
|
||||
|
||||
|
||||
def setup_table(self, search_terms=[]):
|
||||
# Set up the table columns and headers
|
||||
self.tableWidget.setColumnCount(7)
|
||||
self.tableWidget.setHorizontalHeaderLabels(['Distinct Entity', 'Entity Type', 'File Name', 'Line Number', 'Timestamp', 'Context', 'Match Score'])
|
||||
highlight_delegate = HighlightDelegate(self, search_terms)
|
||||
self.tableWidget.setItemDelegateForColumn(0, highlight_delegate)
|
||||
self.tableWidget.setItemDelegateForColumn(1, highlight_delegate)
|
||||
self.tableWidget.setItemDelegateForColumn(3, highlight_delegate)
|
||||
# Apply column widths
|
||||
self.adjust_column_widths()
|
||||
# Disable sorting when initially populating data
|
||||
self.tableWidget.setSortingEnabled(False)
|
||||
# Load initial subset of data
|
||||
self.load_more_data()
|
||||
# Enable sorting by 'Match Score' after data is populated
|
||||
self.tableWidget.setSortingEnabled(True)
|
||||
self.tableWidget.sortItems(6, Qt.DescendingOrder)
|
||||
|
||||
def add_table_row(self, row_index, result, score):
|
||||
self.tableWidget.insertRow(row_index)
|
||||
# Distinct Entity with highlighting
|
||||
distinct_entity_item = QTableWidgetItem(str(result[0]))
|
||||
self.tableWidget.setItem(row_index, 0, distinct_entity_item)
|
||||
# Entity Type
|
||||
entity_type_item = QTableWidgetItem(str(result[1]))
|
||||
self.tableWidget.setItem(row_index, 1, entity_type_item)
|
||||
# File Name - using CellWidget
|
||||
file_name_widget = CellWidget(str(result[2]), self.filterWidgets[1], self.search_terms)
|
||||
self.tableWidget.setCellWidget(row_index, 2, file_name_widget)
|
||||
file_name_item = QTableWidgetItem()
|
||||
file_name_item.setData(Qt.UserRole, str(result[2]))
|
||||
self.tableWidget.setItem(row_index, 2, file_name_item)
|
||||
# Line Number
|
||||
line_number_item = QTableWidgetItem(str(result[3]))
|
||||
self.tableWidget.setItem(row_index, 3, line_number_item)
|
||||
# Timestamp - using CellWidget
|
||||
timestamp_widget = CellWidget(str(result[4]), self.filterWidgets[3], self.search_terms)
|
||||
self.tableWidget.setCellWidget(row_index, 4, timestamp_widget)
|
||||
timestamp_item = QTableWidgetItem()
|
||||
timestamp_item.setData(Qt.UserRole, str(result[4]))
|
||||
self.tableWidget.setItem(row_index, 4, timestamp_item)
|
||||
# Context - using ScrollableTextWidget
|
||||
scrollable_widget = ScrollableTextWidget(result[5], self.search_terms, str(result[0]))
|
||||
self.tableWidget.setCellWidget(row_index, 5, scrollable_widget)
|
||||
# Match Score
|
||||
match_score_item = NumericTableWidgetItem("{:.4f}".format(float(score)))
|
||||
self.tableWidget.setItem(row_index, 6, match_score_item)
|
||||
# Apply highlight delegate if needed
|
||||
highlight_delegate = HighlightDelegate(self, self.search_terms)
|
||||
self.tableWidget.setItemDelegateForRow(row_index, highlight_delegate)
|
||||
# Restore sorting, if it was enabled
|
||||
self.tableWidget.setSortingEnabled(True)
|
||||
# Check if total rows exceed 100 and remove the lowest 20% if so
|
||||
if self.tableWidget.rowCount() > 500:
|
||||
self.remove_lowest_scoring_rows(10) # 20% to be removed
|
||||
|
||||
def load_more_data(self):
|
||||
if not self.is_new_data_available():
|
||||
return # No new data available, just return
|
||||
|
||||
start_index = self.loaded_data_count
|
||||
chunk_size = 50 # Adjust this number based on performance
|
||||
end_index = min(start_index + chunk_size, len(self.total_data))
|
||||
|
||||
# Calculate the average match score of the current items
|
||||
average_score = self.calculate_average_score()
|
||||
|
||||
# Sort the chunk by match score in descending order
|
||||
sorted_chunk = sorted(self.total_data[start_index:end_index], key=lambda x: x[1], reverse=True)
|
||||
|
||||
for row_data in sorted_chunk:
|
||||
score = row_data[1]
|
||||
if score > average_score:
|
||||
row_index = start_index + len(sorted_chunk) # Adjust index based on the sorted chunk
|
||||
if self.matches_current_filters(row_index, row_data):
|
||||
self.insert_row_in_sorted_order(row_data)
|
||||
|
||||
# Reapply filters after loading new data
|
||||
self.apply_all_filters()
|
||||
# Update loaded_data_count or other mechanism to keep track of processed data
|
||||
self.update_data_tracking(end_index)
|
||||
|
||||
self.tableWidget.update() # Refresh the table
|
||||
|
||||
def remove_lowest_scoring_rows(self, percentage):
|
||||
total_rows = self.tableWidget.rowCount()
|
||||
rows_to_remove = total_rows * percentage // 100
|
||||
|
||||
# Collect scores and associated row indices
|
||||
score_rows = []
|
||||
for row in range(total_rows):
|
||||
score_item = self.tableWidget.item(row, 6) # Assuming column 6 is Match Score
|
||||
if score_item:
|
||||
score_rows.append((float(score_item.text()), row))
|
||||
|
||||
# Sort by scores (ascending) and select the lowest ones
|
||||
score_rows.sort(key=lambda x: x[0])
|
||||
lowest_score_rows = score_rows[:rows_to_remove]
|
||||
|
||||
# Remove rows with the lowest scores
|
||||
for _, row in sorted(lowest_score_rows, key=lambda x: x[1], reverse=True):
|
||||
self.tableWidget.removeRow(row)
|
||||
|
||||
|
||||
def is_new_data_available(self):
|
||||
return self.loaded_data_count < len(self.total_data)
|
||||
|
||||
def calculate_average_score(self):
|
||||
total_score = 0
|
||||
row_count = self.tableWidget.rowCount()
|
||||
for row_index in range(row_count):
|
||||
score_item = self.tableWidget.item(row_index, 6) # Assuming column 6 is Match Score
|
||||
total_score += float(score_item.text()) if score_item else 0
|
||||
return total_score / row_count if row_count > 0 else 0
|
||||
|
||||
|
||||
def update_data_tracking(self, end_index):
|
||||
# Update loaded_data_count or implement other mechanism to keep track of processed data
|
||||
self.loaded_data_count = end_index
|
||||
|
||||
def insert_row_in_sorted_order(self, row_data):
|
||||
row_index = 0
|
||||
score = row_data[1]
|
||||
# Find the correct position based on match score
|
||||
while row_index < self.tableWidget.rowCount():
|
||||
current_score_item = self.tableWidget.item(row_index, 6) # Assuming column 6 is Match Score
|
||||
current_score = float(current_score_item.text()) if current_score_item else 0
|
||||
if score > current_score:
|
||||
break
|
||||
row_index += 1
|
||||
|
||||
self.add_table_row(row_index, row_data[0], score)
|
||||
|
||||
|
||||
def matches_current_filters(self, row_index, row_data):
|
||||
for column, filter_text in self.current_filters.items():
|
||||
if not self.is_match(row_index, column, filter_text, row_data):
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_match(self, row_index, column, filter_text, row_data):
|
||||
# Extract text from the cell or widget
|
||||
widget = self.tableWidget.cellWidget(row_index, column)
|
||||
if isinstance(widget, CellWidget):
|
||||
# CellWidget contains a QLabel with HTML-formatted text
|
||||
document = QTextDocument()
|
||||
document.setHtml(widget.label.text())
|
||||
text = document.toPlainText()
|
||||
elif isinstance(widget, ScrollableTextWidget):
|
||||
# ScrollableTextWidget contains a QTextEdit with HTML-formatted text
|
||||
text = widget.text_edit.toPlainText()
|
||||
else:
|
||||
# Standard QTableWidgetItem
|
||||
item = self.tableWidget.item(row_index, column)
|
||||
text = item.text() if item else ""
|
||||
|
||||
# Compare the extracted plain text with the filter text
|
||||
return filter_text.lower() in text.lower()
|
||||
|
||||
|
||||
def apply_filter(self, text, column):
|
||||
self.current_filters[column] = text.lower()
|
||||
self.apply_all_filters()
|
||||
|
||||
|
||||
def extract_row_data(self, row_index):
|
||||
# Construct row_data from the table content
|
||||
row_data = []
|
||||
for column in range(self.tableWidget.columnCount()):
|
||||
cell_data = self.get_cell_data(row_index, column)
|
||||
row_data.append(cell_data)
|
||||
return row_data
|
||||
|
||||
def get_cell_data(self, row_index, column):
|
||||
widget = self.tableWidget.cellWidget(row_index, column)
|
||||
if isinstance(widget, CellWidget):
|
||||
document = QTextDocument()
|
||||
document.setHtml(widget.label.text())
|
||||
return document.toPlainText()
|
||||
elif isinstance(widget, ScrollableTextWidget):
|
||||
return widget.text_edit.toPlainText()
|
||||
else:
|
||||
item = self.tableWidget.item(row_index, column)
|
||||
return item.text() if item else ""
|
||||
|
||||
def apply_all_filters(self):
|
||||
for row_index in range(self.tableWidget.rowCount()):
|
||||
row_data = self.extract_row_data(row_index)
|
||||
if self.matches_current_filters(row_index, row_data):
|
||||
self.tableWidget.showRow(row_index)
|
||||
else:
|
||||
self.tableWidget.hideRow(row_index)
|
||||
|
||||
|
||||
def filter_by_entity_type(self):
|
||||
selected_type = self.entityTypeComboBox.currentData()
|
||||
#logging.debug(f"Filtering by entity type: {selected_type}")
|
||||
|
||||
# Update the current filters dictionary
|
||||
entity_type_column = COLUMN_NAMES.index('Entity Type') # Assuming 'Entity Type' is one of the column names
|
||||
if selected_type is None:
|
||||
# Clear the filter for entity type if 'All Entity Types' is selected
|
||||
if entity_type_column in self.current_filters:
|
||||
del self.current_filters[entity_type_column]
|
||||
else:
|
||||
# Set the filter for entity type
|
||||
self.current_filters[entity_type_column] = selected_type.lower()
|
||||
|
||||
# Reapply all filters including the entity type filter
|
||||
self.apply_all_filters()
|
||||
|
||||
|
||||
def on_filter_change(self):
|
||||
# Reapply all filters
|
||||
self.apply_all_filters()
|
||||
|
||||
def clear_all_filters(self):
|
||||
for filter_widget in self.filterWidgets:
|
||||
filter_widget.clear()
|
||||
|
||||
self.current_filters.clear() # Clear all filters
|
||||
#logging.debug("All filters cleared")
|
||||
|
||||
for row in range(self.tableWidget.rowCount()):
|
||||
self.tableWidget.showRow(row) # Show all rows
|
||||
|
||||
# Optionally reapply entity type filter if it should be independent
|
||||
self.filter_by_entity_type()
|
||||
|
||||
@staticmethod
|
||||
def strip_html_tags(text):
|
||||
return re.sub('<[^<]+?>', '', text)
|
||||
|
||||
|
||||
|
||||
|
||||
class QueryLineEdit(QLineEdit):
|
||||
returnPressed = pyqtSignal()
|
||||
|
||||
def keyPressEvent(self, event):
|
||||
if event.key() == Qt.Key_Return:
|
||||
self.returnPressed.emit()
|
||||
else:
|
||||
super().keyPressEvent(event)
|
||||
|
||||
|
||||
class HighlightDelegate(QStyledItemDelegate):
|
||||
def __init__(self, parent=None, search_terms=None):
|
||||
super().__init__(parent)
|
||||
self.search_terms = search_terms or []
|
||||
|
||||
def paint(self, painter, option, index):
|
||||
painter.save()
|
||||
|
||||
# Set text color and other options
|
||||
options = QTextOption()
|
||||
options.setWrapMode(QTextOption.WrapAtWordBoundaryOrAnywhere)
|
||||
document = QTextDocument()
|
||||
document.setDefaultTextOption(options)
|
||||
document.setDefaultFont(option.font)
|
||||
|
||||
# Prepare highlighted text
|
||||
text = index.model().data(index)
|
||||
highlighted_text = self.get_highlighted_text(text)
|
||||
document.setHtml(highlighted_text)
|
||||
|
||||
# Set the width of the document to the cell width
|
||||
document.setTextWidth(option.rect.width())
|
||||
|
||||
# Draw the contents
|
||||
painter.translate(option.rect.topLeft())
|
||||
document.drawContents(painter)
|
||||
painter.restore()
|
||||
|
||||
def get_highlighted_text(self, text):
|
||||
if text is None:
|
||||
text = ""
|
||||
|
||||
text_with_color = f"<span style='color: white;'>{text}</span>"
|
||||
for term in self.search_terms:
|
||||
# Retain the '+' at the beginning and strip other special characters
|
||||
is_positive = term.startswith('+')
|
||||
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-')).lower()
|
||||
|
||||
if is_positive and clean_term.lower() in text.lower():
|
||||
# Use regex for case-insensitive search and replace
|
||||
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
|
||||
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
|
||||
text_with_color = regex.sub(highlighted_term, text_with_color)
|
||||
|
||||
return text_with_color.replace("\n", "<br>")
|
||||
|
||||
|
||||
|
||||
class ScrollableTextWidget(QWidget):
|
||||
def __init__(self, text, search_terms, distinct_entity, parent=None):
|
||||
super().__init__(parent)
|
||||
layout = QVBoxLayout(self)
|
||||
layout.setContentsMargins(0, 0, 0, 0)
|
||||
self.text_edit = CustomTextEdit(self)
|
||||
self.text_edit.setReadOnly(True)
|
||||
|
||||
# Apply styles including scrollbar styles
|
||||
self.text_edit.setStyleSheet("""
|
||||
QTextEdit {
|
||||
background-color: #2A2F35; /* Dark blue-ish background */
|
||||
color: white; /* White text */
|
||||
}
|
||||
QTextEdit QScrollBar:vertical {
|
||||
border: none;
|
||||
background-color: #3A3F44; /* Dark scrollbar background */
|
||||
width: 8px; /* Width of the scrollbar */
|
||||
}
|
||||
QTextEdit QScrollBar::handle:vertical {
|
||||
background-color: #6E6E6E; /* Scroll handle color */
|
||||
border-radius: 4px; /* Rounded corners for the handle */
|
||||
}
|
||||
QTextEdit QScrollBar::add-line:vertical, QTextEdit QScrollBar::sub-line:vertical {
|
||||
background: none;
|
||||
}
|
||||
""")
|
||||
|
||||
# Set the text with highlighting
|
||||
self.setHighlightedText(text, search_terms, distinct_entity)
|
||||
layout.addWidget(self.text_edit)
|
||||
|
||||
# Scroll to the distinct entity
|
||||
self.scroll_to_text(distinct_entity)
|
||||
|
||||
def setHighlightedText(self, text, search_terms, distinct_entity):
|
||||
if text is None:
|
||||
text = ""
|
||||
|
||||
# Wrap the original text in a span to maintain color
|
||||
text_with_color = f"<span style='color: white;'>{text}</span>"
|
||||
|
||||
# Highlight distinct entity in a different color
|
||||
if distinct_entity:
|
||||
distinct_entity_escaped = html.escape(distinct_entity)
|
||||
text_with_color = re.sub(
|
||||
re.escape(distinct_entity_escaped),
|
||||
lambda match: f"<span style='background-color: blue; color: white;'>{match.group()}</span>",
|
||||
text_with_color,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
for term in search_terms:
|
||||
# Check if the term starts with '+'
|
||||
is_positive = term.startswith('+')
|
||||
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-'))
|
||||
|
||||
# If the term starts with '+', highlight all matches regardless of case
|
||||
if is_positive or clean_term.lower() in text.lower():
|
||||
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
|
||||
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
|
||||
text_with_color = regex.sub(highlighted_term, text_with_color)
|
||||
|
||||
self.text_edit.setHtml(text_with_color.replace("\n", "<br>"))
|
||||
|
||||
|
||||
|
||||
def scroll_to_text(self, text):
|
||||
if text:
|
||||
cursor = self.text_edit.document().find(text)
|
||||
self.text_edit.setTextCursor(cursor)
|
||||
|
||||
class CustomTextEdit(QTextEdit):
|
||||
def __init__(self, parent=None):
|
||||
super().__init__(parent)
|
||||
self.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) # Enable vertical scrollbar as needed
|
||||
|
||||
def wheelEvent(self, event):
|
||||
# Always handle the wheel event within QTextEdit
|
||||
super().wheelEvent(event)
|
||||
|
||||
# Stop propagation of the event to parent
|
||||
if self.verticalScrollBar().isVisible():
|
||||
event.accept()
|
||||
else:
|
||||
event.ignore()
|
||||
|
||||
|
||||
class CellWidget(QWidget):
|
||||
def __init__(self, text, filter_edit, search_terms, parent=None):
|
||||
super(CellWidget, self).__init__(parent)
|
||||
self.layout = QHBoxLayout(self)
|
||||
self.label = QLabel(text)
|
||||
self.setHighlightedText(text, search_terms)
|
||||
self.button = QPushButton()
|
||||
icon = self.button.style().standardIcon(QStyle.SP_CommandLink) # Example of a standard icon
|
||||
self.button.setIcon(icon)
|
||||
self.button.setFixedSize(20, 20) # Adjust size as needed
|
||||
self.button.clicked.connect(lambda: filter_edit.setText(text))
|
||||
self.layout.addWidget(self.label)
|
||||
self.layout.addWidget(self.button)
|
||||
self.layout.setContentsMargins(0, 0, 0, 0)
|
||||
self.setLayout(self.layout)
|
||||
|
||||
def setHighlightedText(self, text, search_terms):
|
||||
if text is None:
|
||||
text = ""
|
||||
|
||||
# Wrap the original text in a span to maintain color
|
||||
text_with_color = f"<span style='color: white;'>{text}</span>"
|
||||
|
||||
for term in search_terms:
|
||||
# Strip leading operands (+ or -) and special characters
|
||||
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-'))
|
||||
|
||||
# Use regex for case-insensitive search and replace
|
||||
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
|
||||
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
|
||||
text_with_color = regex.sub(highlighted_term, text_with_color)
|
||||
|
||||
self.label.setText(text_with_color)
|
||||
|
||||
|
||||
class NumericTableWidgetItem(QTableWidgetItem):
|
||||
def __lt__(self, other):
|
||||
return float(self.text()) < float(other.text())
|
228
logline_leviathan/database/query.py
Normal file
228
logline_leviathan/database/query.py
Normal file
@ -0,0 +1,228 @@
|
||||
from sqlalchemy import or_, String
|
||||
from logline_leviathan.database.database_manager import get_db_session, EntitiesTable, DistinctEntitiesTable, EntityTypesTable, ContextTable, FileMetadata, session_scope
|
||||
from PyQt5.QtCore import pyqtSignal, QThread, pyqtSignal
|
||||
from fuzzywuzzy import fuzz
|
||||
import re
|
||||
|
||||
class QueryThread(QThread):
|
||||
queryCompleted = pyqtSignal(dict) # Signal to indicate completion with a dictionary
|
||||
|
||||
def __init__(self, db_query_instance, query_text):
|
||||
super(QueryThread, self).__init__()
|
||||
self.db_query_instance = db_query_instance
|
||||
self.query_text = query_text
|
||||
|
||||
def run(self):
|
||||
base_query, search_terms = self.db_query_instance.prepare_query(self.query_text)
|
||||
query_lambda = self.db_query_instance.parse_query(self.query_text)
|
||||
|
||||
results = base_query.filter(query_lambda).all()
|
||||
|
||||
# Calculate scored results and create a dictionary with entities_id as keys
|
||||
scored_results = {result.entities_id: self.db_query_instance.calculate_match_score(result, self.query_text) for result in results}
|
||||
self.queryCompleted.emit(scored_results)
|
||||
|
||||
|
||||
class DatabaseGUIQuery:
|
||||
def __init__(self):
|
||||
self.db_session = get_db_session()
|
||||
self.entity_types = EntityTypesTable
|
||||
self.entities = EntitiesTable
|
||||
self.distinct_entities = DistinctEntitiesTable
|
||||
self.context = ContextTable
|
||||
self.file_metadata = FileMetadata
|
||||
|
||||
def parse_query(self, query):
|
||||
if not query.strip():
|
||||
return lambda _: False
|
||||
|
||||
# Extract quoted and unquoted parts
|
||||
quoted_parts = re.findall(r'"([^"]+)"', query)
|
||||
unquoted_parts = re.split(r'"[^"]+"', query)
|
||||
|
||||
# Process unquoted parts (case-insensitive)
|
||||
unquoted_tokens = []
|
||||
for part in unquoted_parts:
|
||||
unquoted_tokens.extend(re.findall(r'\S+', part))
|
||||
|
||||
filters = []
|
||||
# Handling unquoted parts with 'ilike' for case-insensitive search
|
||||
for token in unquoted_tokens:
|
||||
search_condition = f'%{token.replace("*", "%")}%'
|
||||
filters.append(
|
||||
or_(
|
||||
self.distinct_entities.distinct_entity.ilike(search_condition),
|
||||
self.entity_types.entity_type.ilike(search_condition),
|
||||
self.entity_types.gui_name.ilike(search_condition),
|
||||
self.entity_types.gui_tooltip.ilike(search_condition),
|
||||
self.file_metadata.file_name.ilike(search_condition),
|
||||
self.file_metadata.file_path.ilike(search_condition),
|
||||
self.file_metadata.file_mimetype.ilike(search_condition),
|
||||
self.entities.line_number.cast(String).ilike(search_condition),
|
||||
self.context.context_large.ilike(search_condition)
|
||||
# ... [add other fields for ilike search]
|
||||
)
|
||||
)
|
||||
|
||||
# Handling quoted parts with 'like' for case-sensitive exact match
|
||||
for token in quoted_parts:
|
||||
exact_condition = f'%{token}%'
|
||||
filters.append(
|
||||
or_(
|
||||
self.distinct_entities.distinct_entity.like(exact_condition),
|
||||
self.entity_types.entity_type.like(exact_condition),
|
||||
self.entity_types.gui_name.like(exact_condition),
|
||||
self.entity_types.gui_tooltip.like(exact_condition),
|
||||
self.file_metadata.file_name.like(exact_condition),
|
||||
self.file_metadata.file_path.like(exact_condition),
|
||||
self.file_metadata.file_mimetype.like(exact_condition),
|
||||
self.entities.line_number.cast(String).like(exact_condition),
|
||||
self.context.context_large.like(exact_condition)
|
||||
# ... [add other fields for exact match search]
|
||||
)
|
||||
)
|
||||
|
||||
return lambda: or_(*filters)
|
||||
|
||||
def parse_search_terms(self, query):
|
||||
tokens = query.split()
|
||||
search_terms = [token.lstrip('+-') for token in tokens if not token.startswith('-') and not token.startswith('+')]
|
||||
return search_terms
|
||||
|
||||
def prepare_query(self, query):
|
||||
search_terms = self.parse_search_terms(query)
|
||||
|
||||
# Construct the base query with proper joins
|
||||
base_query = self.db_session.query(
|
||||
self.distinct_entities.distinct_entity,
|
||||
self.entity_types.gui_name,
|
||||
self.file_metadata.file_name,
|
||||
self.entities.line_number,
|
||||
self.entities.entry_timestamp,
|
||||
self.context.context_large,
|
||||
self.entities.flag,
|
||||
self.entities.entities_id
|
||||
).join(
|
||||
self.entities, self.distinct_entities.distinct_entities_id == self.entities.distinct_entities_id
|
||||
).join(
|
||||
self.file_metadata, self.entities.file_id == self.file_metadata.file_id
|
||||
).join(
|
||||
self.context, self.entities.entities_id == self.context.entities_id
|
||||
).join(
|
||||
self.entity_types, self.entities.entity_types_id == self.entity_types.entity_type_id
|
||||
).distinct()
|
||||
|
||||
# Apply filters and return results
|
||||
return base_query, search_terms
|
||||
|
||||
|
||||
|
||||
def calculate_match_score(self, result, query):
|
||||
# Adjusted weights and thresholds
|
||||
distinct_entity_weight = 4
|
||||
file_name_weight = 4
|
||||
timestamp_weight = 1
|
||||
line_number_weight = 1
|
||||
context_weight = 5
|
||||
multiple_term_weight = 1
|
||||
order_weight = 8 # Increased weight for exact order of terms
|
||||
fuzzy_match_weight = 0.3 # More discerning fuzzy match
|
||||
threshold_for_fuzzy = 90 # Higher threshold for fuzzy matches
|
||||
proximity_weight = 2 # Increased weight for proximity
|
||||
|
||||
positive_operand_weight = 10 # Weight for terms with '+'
|
||||
negative_operand_penalty = -5 # Penalty for terms with '-'
|
||||
exact_match_weight = 10 # Increased weight for exact sequence match
|
||||
|
||||
score = 0
|
||||
|
||||
# Extracting operands and terms
|
||||
tokens = re.findall(r'"[^"]+"|\S+', query)
|
||||
processed_terms = [(token.startswith('+'), token.startswith('-'), token.strip('+-"').lower()) for token in tokens]
|
||||
|
||||
# Normalize result fields
|
||||
lower_distinct_entity = result.distinct_entity.lower()
|
||||
lower_file_name = result.file_name.lower()
|
||||
timestamp_str = str(result.entry_timestamp).lower()
|
||||
line_number_str = str(result.line_number).lower()
|
||||
words_in_context = result.context_large.lower().split()
|
||||
|
||||
# Check matches in various fields with operand consideration
|
||||
for is_positive, is_negative, term in processed_terms:
|
||||
if term in lower_distinct_entity:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else distinct_entity_weight)
|
||||
if term in lower_file_name:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else file_name_weight)
|
||||
if term in timestamp_str:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else timestamp_weight)
|
||||
if term in line_number_str:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else line_number_weight)
|
||||
if term in words_in_context:
|
||||
score += positive_operand_weight if is_positive else (negative_operand_penalty if is_negative else context_weight)
|
||||
|
||||
# Creating a cleaned substring of search terms in the exact order they appear in the query
|
||||
exact_terms_substring = ' '.join([token.strip('+-"').lower() for token in tokens])
|
||||
|
||||
# Check for exact order of terms in the context
|
||||
if exact_terms_substring and exact_terms_substring in ' '.join(words_in_context):
|
||||
score += exact_match_weight
|
||||
|
||||
# Check for exact order of terms
|
||||
if '"' in query:
|
||||
exact_query = ' '.join(term for _, _, term in processed_terms)
|
||||
if exact_query in ' '.join(words_in_context):
|
||||
score += order_weight
|
||||
|
||||
# Additional weight for multiple different terms
|
||||
unique_terms = set(term for _, _, term in processed_terms)
|
||||
score += len(unique_terms) * multiple_term_weight
|
||||
|
||||
# Proximity score calculation
|
||||
for _, _, term in processed_terms:
|
||||
if term in words_in_context:
|
||||
# Find the positions of the term and the entity in the context
|
||||
term_pos = words_in_context.index(term)
|
||||
entity_pos = words_in_context.index(lower_distinct_entity) if lower_distinct_entity in words_in_context else 0
|
||||
|
||||
# Calculate the distance and adjust the score
|
||||
distance = abs(term_pos - entity_pos)
|
||||
proximity_score = max(0, proximity_weight - distance * 0.01) # Reduce score based on distance
|
||||
score += proximity_score
|
||||
|
||||
# Fuzzy matching
|
||||
all_text = f"{result.distinct_entity} {result.file_name} {result.entry_timestamp} {result.line_number} {result.context_large}".lower()
|
||||
for _, _, term in processed_terms:
|
||||
fuzzy_score = max(fuzz.partial_ratio(term, word) for word in all_text.split())
|
||||
if fuzzy_score > threshold_for_fuzzy:
|
||||
score += (fuzzy_score / 100) * fuzzy_match_weight
|
||||
|
||||
# Normalize the score
|
||||
max_possible_positive_score = (
|
||||
distinct_entity_weight + file_name_weight +
|
||||
timestamp_weight + line_number_weight +
|
||||
context_weight * len(processed_terms) + # Assuming each term can match in the context
|
||||
order_weight + exact_match_weight +
|
||||
len(processed_terms) * multiple_term_weight + # Each term contributes to multiple_term_weight
|
||||
len(processed_terms) * positive_operand_weight # Each term could have a positive operand
|
||||
)
|
||||
|
||||
# Considering the negative operand penalty
|
||||
max_possible_negative_score = len(processed_terms) * negative_operand_penalty
|
||||
|
||||
# The maximum score is the sum of the possible positive score and the absolute value of the possible negative score
|
||||
max_possible_score = max_possible_positive_score + abs(max_possible_negative_score)
|
||||
|
||||
# Normalizing the score to a scale of 100
|
||||
score = (score / max_possible_score) * 100
|
||||
|
||||
return score
|
||||
|
||||
|
||||
|
||||
def get_entity_types(self):
|
||||
with session_scope() as session:
|
||||
# Query to filter entity types that have either regex_pattern or script_parser
|
||||
return [entity_type.gui_name for entity_type in session.query(EntityTypesTable)
|
||||
.filter(or_(EntityTypesTable.regex_pattern.isnot(None),
|
||||
EntityTypesTable.script_parser.isnot(None)))
|
||||
.all()]
|
0
logline_leviathan/exporter/__init__.py
Normal file
0
logline_leviathan/exporter/__init__.py
Normal file
115
logline_leviathan/exporter/export_constructor.py
Normal file
115
logline_leviathan/exporter/export_constructor.py
Normal file
@ -0,0 +1,115 @@
|
||||
import logging
|
||||
import pandas as pd
|
||||
from datetime import datetime as dt
|
||||
from PyQt5.QtCore import Qt
|
||||
from sqlalchemy import func, cast, String, distinct
|
||||
from logline_leviathan.database.database_manager import ContextTable, EntityTypesTable, DistinctEntitiesTable, EntitiesTable, FileMetadata
|
||||
|
||||
def generate_dataframe(db_session, tree_items, file_items, context_selection, only_crossmatches=False, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
|
||||
if not db_session:
|
||||
raise ValueError("Database session is None")
|
||||
|
||||
all_data = [] # List to accumulate data from all entity types
|
||||
|
||||
# Extract entity_type from selected tree items
|
||||
selected_entity_types = [item.entity_type for item in tree_items if item.checkState(0) == Qt.Checked]
|
||||
checked_files = [item for item in file_items.getCheckedFiles()]
|
||||
logging.debug(f"Generating dataframe, selected entity types: {selected_entity_types}, passed timestamp range: {start_date} - {end_date}")
|
||||
|
||||
context_field = {
|
||||
'Kompakte Zusammenfassung ohne Kontext': None,
|
||||
'Kontext - gleiche Zeile': ContextTable.context_small,
|
||||
'Kontext - mittelgroß': ContextTable.context_medium,
|
||||
'Kontext - umfangreich': ContextTable.context_large
|
||||
}.get(context_selection)
|
||||
|
||||
# Convert start_date and end_date to datetime objects if they are not None
|
||||
if start_date and end_date:
|
||||
start_datetime = dt.combine(start_date, dt.min.time())
|
||||
end_datetime = dt.combine(end_date, dt.max.time())
|
||||
|
||||
# Creating a subquery to count distinct file IDs
|
||||
file_count_subquery = db_session.query(
|
||||
EntitiesTable.distinct_entities_id,
|
||||
func.count(distinct(EntitiesTable.file_id)).label('file_count')
|
||||
).group_by(EntitiesTable.distinct_entities_id)
|
||||
|
||||
if only_crossmatches:
|
||||
file_count_subquery = file_count_subquery.having(func.count(distinct(EntitiesTable.file_id)) > 1)
|
||||
|
||||
file_count_subquery = file_count_subquery.subquery()
|
||||
|
||||
for entity_type in selected_entity_types:
|
||||
if context_selection == 'Kompakte Zusammenfassung ohne Kontext':
|
||||
query = db_session.query(
|
||||
EntityTypesTable.entity_type,
|
||||
DistinctEntitiesTable.distinct_entity,
|
||||
func.count(EntitiesTable.entities_id).label('occurrences'),
|
||||
func.group_concat(
|
||||
FileMetadata.file_name + ':line' + cast(EntitiesTable.line_number, String)
|
||||
).label('sources'),
|
||||
func.group_concat(
|
||||
cast(EntitiesTable.entry_timestamp, String)
|
||||
).label('timestamps')
|
||||
).join(EntityTypesTable, DistinctEntitiesTable.entity_types_id == EntityTypesTable.entity_type_id
|
||||
).join(EntitiesTable, DistinctEntitiesTable.distinct_entities_id == EntitiesTable.distinct_entities_id
|
||||
).join(FileMetadata, EntitiesTable.file_id == FileMetadata.file_id
|
||||
).join(file_count_subquery, DistinctEntitiesTable.distinct_entities_id == file_count_subquery.c.distinct_entities_id
|
||||
).filter(EntityTypesTable.entity_type == entity_type
|
||||
).group_by(DistinctEntitiesTable.distinct_entity)
|
||||
# Apply timestamp filter if start_date and end_date are provided
|
||||
if start_date and end_date:
|
||||
query = query.filter(EntitiesTable.entry_timestamp.between(start_datetime, end_datetime))
|
||||
if checked_files:
|
||||
query = query.filter(FileMetadata.file_name.in_(checked_files))
|
||||
if include_flagged:
|
||||
if only_flagged:
|
||||
query = query.filter(EntitiesTable.flag == True)
|
||||
elif only_unflagged:
|
||||
query = query.filter(EntitiesTable.flag == False)
|
||||
|
||||
|
||||
for row in query.all():
|
||||
sources = row[3].replace(',', ' // ') if row[3] is not None else ''
|
||||
timestamps = row[4].replace(',', ' // ') if row[4] is not None else ''
|
||||
all_data.append([row[0], row[1], row[2], timestamps, sources, ''])
|
||||
|
||||
else:
|
||||
query = db_session.query(
|
||||
EntityTypesTable.entity_type,
|
||||
DistinctEntitiesTable.distinct_entity,
|
||||
func.count(EntitiesTable.entities_id).over(partition_by=DistinctEntitiesTable.distinct_entity).label('occurrences'),
|
||||
FileMetadata.file_name,
|
||||
EntitiesTable.line_number,
|
||||
context_field,
|
||||
EntitiesTable.entry_timestamp
|
||||
).select_from(EntitiesTable
|
||||
).join(DistinctEntitiesTable, EntitiesTable.distinct_entities_id == DistinctEntitiesTable.distinct_entities_id
|
||||
).join(EntityTypesTable, DistinctEntitiesTable.entity_types_id == EntityTypesTable.entity_type_id
|
||||
).join(FileMetadata, EntitiesTable.file_id == FileMetadata.file_id
|
||||
).outerjoin(ContextTable, EntitiesTable.entities_id == ContextTable.entities_id
|
||||
).join(file_count_subquery, DistinctEntitiesTable.distinct_entities_id == file_count_subquery.c.distinct_entities_id
|
||||
).filter(EntityTypesTable.entity_type == entity_type)
|
||||
# Apply timestamp filter if start_date and end_date are provided
|
||||
if start_date and end_date:
|
||||
query = query.filter(EntitiesTable.entry_timestamp.between(start_datetime, end_datetime))
|
||||
if checked_files:
|
||||
query = query.filter(FileMetadata.file_name.in_(checked_files))
|
||||
if include_flagged:
|
||||
if only_flagged:
|
||||
query = query.filter(EntitiesTable.flag == True)
|
||||
elif only_unflagged:
|
||||
query = query.filter(EntitiesTable.flag == False)
|
||||
|
||||
for row in query.all():
|
||||
file_name = row[3]
|
||||
line_number = row[4]
|
||||
entry_timestamp = row[6].strftime('%Y-%m-%d %H:%M:%S') if row[6] is not None else ''
|
||||
context_info = row[5] if row[5] is not None else ''
|
||||
all_data.append([row[0], row[1], row[2], entry_timestamp, file_name, line_number, context_info])
|
||||
|
||||
# Define the columns for the DataFrame based on context_selection
|
||||
columns = ["Entity Type", "Entity", "Occurrences", "Timestamp", "Sources", "Context"] if context_selection == 'Kompakte Zusammenfassung ohne Kontext' else ["Entity Type", "Entity", "Occurrences", "Timestamp", "Source File", "Line Number", "Context"]
|
||||
|
||||
# Construct and return the DataFrame from all accumulated data
|
||||
return pd.DataFrame(all_data, columns=columns)
|
71
logline_leviathan/exporter/html_export.py
Normal file
71
logline_leviathan/exporter/html_export.py
Normal file
@ -0,0 +1,71 @@
|
||||
|
||||
from logline_leviathan.exporter.export_constructor import generate_dataframe
|
||||
import re
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def create_regex_pattern_from_entity(entity):
|
||||
words = entity.split()
|
||||
regex_pattern = "|".join(re.escape(word) for word in words)
|
||||
return re.compile(regex_pattern, re.IGNORECASE)
|
||||
|
||||
def highlight_entities_in_context(context, entity_regex):
|
||||
def replace_match(match):
|
||||
return f"<mark>{match.group()}</mark>"
|
||||
return re.sub(entity_regex, replace_match, context)
|
||||
|
||||
def generate_html_file(output_file_path, db_session, checkboxes, files, context_selection, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
|
||||
# Fetch data using the new DataFrame constructor
|
||||
df = generate_dataframe(db_session, checkboxes, files, context_selection, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
|
||||
|
||||
# Add line breaks for HTML formatting where needed
|
||||
if context_selection == 'Kompakte Zusammenfassung ohne Kontext':
|
||||
df['Sources'] = df['Sources'].apply(lambda x: x.replace(' // ', ' // <br>'))
|
||||
df['Timestamp'] = df['Timestamp'].apply(lambda x: x.replace(' // ', ' // <br>'))
|
||||
|
||||
# Iterate over the DataFrame to apply regex-based highlighting
|
||||
for index, row in df.iterrows():
|
||||
entity_regex = create_regex_pattern_from_entity(row['Entity'])
|
||||
df.at[index, 'Context'] = highlight_entities_in_context(row['Context'], entity_regex)
|
||||
|
||||
# Replace newline characters with HTML line breaks in the 'Context' column
|
||||
df['Context'] = df['Context'].apply(lambda x: x.replace('\n', '<br>') if x else x)
|
||||
|
||||
# Convert DataFrame to HTML table
|
||||
html_table = df.to_html(classes="table table-bordered", escape=False, index=False)
|
||||
|
||||
html_template = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Logline Leviathan Report</title>
|
||||
<style>
|
||||
.table {{
|
||||
width: 100%;
|
||||
max-width: 100%;
|
||||
margin-bottom: 1rem;
|
||||
background-color: transparent;
|
||||
}}
|
||||
.table th, .table td {{
|
||||
padding: 0.75rem;
|
||||
vertical-align: top;
|
||||
border-top: 1px solid #dee2e6;
|
||||
max-width: 300px; /* Max width */
|
||||
word-wrap: break-word; /* Enable word wrapping */
|
||||
}}
|
||||
.table-bordered {{
|
||||
border: 1px solid #dee2e6;
|
||||
}}
|
||||
.table-bordered th, .table-bordered td {{
|
||||
border: 1px solid #dee2e6;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{html_table}
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
# Write the HTML template to the file
|
||||
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||
file.write(html_template)
|
94
logline_leviathan/exporter/nice_export.py
Normal file
94
logline_leviathan/exporter/nice_export.py
Normal file
@ -0,0 +1,94 @@
|
||||
import re
|
||||
from logline_leviathan.exporter.export_constructor import generate_dataframe
|
||||
|
||||
def create_regex_pattern_from_entity(entity):
|
||||
words = entity.split()
|
||||
regex_pattern = "|".join(re.escape(word) for word in words)
|
||||
return re.compile(regex_pattern, re.IGNORECASE)
|
||||
|
||||
def highlight_entities_in_context(context, entity_regex):
|
||||
def replace_match(match):
|
||||
return f"<mark>{match.group()}</mark>"
|
||||
return re.sub(entity_regex, replace_match, context)
|
||||
|
||||
def generate_niceoutput_file(output_file_path, db_session, checkboxes, files, context_selection, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
|
||||
# Fetch data using the new DataFrame constructor
|
||||
df = generate_dataframe(db_session, checkboxes, files, context_selection, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
|
||||
|
||||
# Add line breaks for HTML formatting where needed
|
||||
if context_selection == 'Kompakte Zusammenfassung ohne Kontext':
|
||||
df['Sources'] = df['Sources'].apply(lambda x: x.replace(' // ', ' // <br>'))
|
||||
df['Timestamp'] = df['Timestamp'].apply(lambda x: x.replace(' // ', ' // <br>'))
|
||||
|
||||
# Iterate over the DataFrame to apply regex-based highlighting
|
||||
for index, row in df.iterrows():
|
||||
entity_regex = create_regex_pattern_from_entity(row['Entity'])
|
||||
df.at[index, 'Context'] = highlight_entities_in_context(row['Context'], entity_regex)
|
||||
|
||||
# Replace newline characters with HTML line breaks in the 'Context' column
|
||||
df['Context'] = df['Context'].apply(lambda x: x.replace('\n', '<br>') if x else x)
|
||||
|
||||
# Convert DataFrame to HTML table
|
||||
html_table = df.to_html(classes="display responsive nowrap", table_id="example", escape=False, index=False)
|
||||
|
||||
# HTML template with doubled curly braces in JavaScript part and additional configurations
|
||||
html_template = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Logline Leviathan Report</title>
|
||||
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.11.5/css/jquery.dataTables.min.css"/>
|
||||
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/buttons/2.2.2/css/buttons.dataTables.min.css"/>
|
||||
<script type="text/javascript" src="https://code.jquery.com/jquery-3.5.1.js"></script>
|
||||
<script type="text/javascript" src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
|
||||
<script type="text/javascript" src="https://cdn.datatables.net/buttons/2.2.2/js/dataTables.buttons.min.js"></script>
|
||||
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.1.3/jszip.min.js"></script>
|
||||
<script type="text/javascript" src="https://cdn.datatables.net/buttons/2.2.2/js/buttons.html5.min.js"></script>
|
||||
<script type="text/javascript" src="https://cdn.datatables.net/buttons/2.2.2/js/buttons.print.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
{0}
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function () {{
|
||||
// DataTables initialization
|
||||
var table = $('#example').DataTable({{
|
||||
"dom": 'Blfrtip',
|
||||
"buttons": ['copy', 'csv', 'excel', 'pdf', 'print'],
|
||||
"searching": true,
|
||||
"fixedHeader": true,
|
||||
"autoWidth": false,
|
||||
"lengthChange": true,
|
||||
"pageLength": 10,
|
||||
"orderCellsTop": true,
|
||||
}});
|
||||
|
||||
// Create dropdown filtering menus
|
||||
$('#example thead tr').clone(true).appendTo('#example thead');
|
||||
$('#example thead tr:eq(1) th').each(function (i) {{
|
||||
var title = $(this).text();
|
||||
if (title === 'Entity Type' || title === 'Entity' || title === 'Occurrences' || title === 'Timestamp' || title === 'Sources' || title === 'Source File' || title === 'Line Number') {{
|
||||
var select = $('<select><option value=""></option></select>')
|
||||
.appendTo($(this).empty())
|
||||
.on('change', function () {{
|
||||
var val = $(this).val();
|
||||
table.column(i)
|
||||
.search(val ? '^' + $(this).val() + '$' : val, true, false)
|
||||
.draw();
|
||||
}});
|
||||
|
||||
table.column(i).data().unique().sort().each(function (d, j) {{
|
||||
select.append('<option value="'+d+'">'+d+'</option>')
|
||||
}});
|
||||
}} else {{
|
||||
$(this).html('');
|
||||
}}
|
||||
}});
|
||||
}});
|
||||
</script>
|
||||
</body>
|
||||
</html>""".format(html_table)
|
||||
|
||||
|
||||
# Write the HTML template to the file
|
||||
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||
file.write(html_template)
|
46
logline_leviathan/exporter/wordlist_export.py
Normal file
46
logline_leviathan/exporter/wordlist_export.py
Normal file
@ -0,0 +1,46 @@
|
||||
from logline_leviathan.database.database_manager import ContextTable, EntityTypesTable, DistinctEntitiesTable, EntitiesTable, FileMetadata
|
||||
from sqlalchemy import func, distinct
|
||||
from PyQt5.QtCore import Qt
|
||||
|
||||
|
||||
|
||||
def generate_wordlist(output_file_path, db_session, checkboxes, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
|
||||
# Check if there are any checkboxes selected
|
||||
if not checkboxes:
|
||||
raise ValueError("No entities selected")
|
||||
|
||||
# Get selected entity types from checkboxes
|
||||
selected_entity_types = [item.entity_type for item in checkboxes if item.checkState(0) == Qt.Checked]
|
||||
|
||||
# Prepare the initial query with proper joins
|
||||
query = db_session.query(
|
||||
DistinctEntitiesTable.distinct_entity
|
||||
).join(
|
||||
EntitiesTable, DistinctEntitiesTable.distinct_entities_id == EntitiesTable.distinct_entities_id
|
||||
).join(
|
||||
EntityTypesTable, EntitiesTable.entity_types_id == EntityTypesTable.entity_type_id
|
||||
).filter(
|
||||
EntityTypesTable.entity_type.in_(selected_entity_types)
|
||||
)
|
||||
|
||||
# Add timestamp filtering if necessary
|
||||
if start_date and end_date:
|
||||
query = query.filter(EntitiesTable.entry_timestamp.between(start_date, end_date))
|
||||
|
||||
# Handle crossmatches, flagged, and unflagged conditions
|
||||
if only_crossmatches:
|
||||
query = query.group_by(DistinctEntitiesTable.distinct_entity).having(func.count(distinct(EntitiesTable.file_id)) > 1)
|
||||
if include_flagged:
|
||||
if only_flagged:
|
||||
query = query.filter(EntitiesTable.flag == True)
|
||||
elif only_unflagged:
|
||||
query = query.filter(EntitiesTable.flag == False)
|
||||
|
||||
# Execute the query and fetch all results
|
||||
results = query.all()
|
||||
|
||||
# Write the results to the file
|
||||
with open(output_file_path, 'w', encoding='utf-8') as file:
|
||||
for result in results:
|
||||
file.write(result.distinct_entity + '\n')
|
||||
|
47
logline_leviathan/exporter/xlsx_export.py
Normal file
47
logline_leviathan/exporter/xlsx_export.py
Normal file
@ -0,0 +1,47 @@
|
||||
import pandas as pd
|
||||
from logline_leviathan.exporter.export_constructor import generate_dataframe
|
||||
|
||||
def ensure_utf8(s):
|
||||
if isinstance(s, str):
|
||||
return s.encode('utf-8', errors='replace').decode('utf-8')
|
||||
return s
|
||||
|
||||
def generate_xlsx_file(output_file_path, db_session, checkboxes, files, context_selection, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False):
|
||||
# Fetch data using the new DataFrame constructor
|
||||
df = generate_dataframe(db_session, checkboxes, files, context_selection, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
|
||||
# Process context field
|
||||
if 'Context' in df.columns:
|
||||
df['Context'] = df['Context'].str.strip() # Trim whitespaces
|
||||
df['Context'] = df['Context'].str.replace(r'[^\x00-\x7F]+', '', regex=True) # Remove non-ASCII characters
|
||||
df['Context'] = df['Context'].apply(lambda x: x[:32767] if isinstance(x, str) else x) # Truncate to 32767 characters (Excel limit)
|
||||
|
||||
# Reorder columns based on whether 'Sources' or 'Source File' and 'Line Number' columns are in the DataFrame
|
||||
if 'Sources' in df.columns:
|
||||
df = df[["Entity Type", "Entity", "Occurrences", "Timestamp", "Sources", "Context"]]
|
||||
elif 'Source File' in df.columns and 'Line Number' in df.columns:
|
||||
df = df[["Entity Type", "Entity", "Occurrences", "Timestamp", "Source File", "Line Number", "Context"]]
|
||||
|
||||
# Apply ensure_utf8 to all string columns in df
|
||||
for col in df.select_dtypes(include=[object]):
|
||||
df[col] = df[col].apply(ensure_utf8)
|
||||
|
||||
# Using pandas.ExcelWriter
|
||||
with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
|
||||
for entity_type in df['Entity Type'].unique():
|
||||
df_filtered = df[df['Entity Type'] == entity_type]
|
||||
df_filtered.to_excel(writer, sheet_name=entity_type, index=False)
|
||||
|
||||
# Get the xlsxwriter workbook and worksheet objects.
|
||||
worksheet = writer.sheets[entity_type]
|
||||
|
||||
# Set column width and enable text wrapping
|
||||
for idx, col in enumerate(df_filtered.columns):
|
||||
# Adjust the column width if necessary
|
||||
worksheet.column_dimensions[chr(65 + idx)].width = 20 # 65 is ASCII for 'A'
|
||||
|
||||
# Set alignment if needed
|
||||
# for row in worksheet.iter_rows(min_row=2, max_col=len(df_filtered.columns), max_row=len(df_filtered) + 1):
|
||||
# for cell in row:
|
||||
# cell.alignment = Alignment(wrap_text=True)
|
||||
|
||||
# The file is saved automatically using the with context
|
0
logline_leviathan/file_processor/__init__.py
Normal file
0
logline_leviathan/file_processor/__init__.py
Normal file
88
logline_leviathan/file_processor/docx_processor.py
Normal file
88
logline_leviathan/file_processor/docx_processor.py
Normal file
@ -0,0 +1,88 @@
|
||||
import logging
|
||||
from docx import Document
|
||||
from datetime import datetime
|
||||
from logline_leviathan.file_processor.parser_thread import parse_content
|
||||
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_distinct_entity, handle_context_snippet
|
||||
import re
|
||||
|
||||
def read_docx_content(file_path):
|
||||
try:
|
||||
doc = Document(file_path)
|
||||
full_content = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
|
||||
return full_content.splitlines(True) # Keep end-of-line characters
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading DOCX file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def get_line_numbers_from_pos(content, start_pos, end_pos):
|
||||
# This function is similar to the one for text and xlsx files
|
||||
# Adjustments might be needed for the nuances of docx content structure
|
||||
start_line = end_line = 0
|
||||
current_pos = 0
|
||||
for i, line in enumerate(content):
|
||||
current_pos += len(line)
|
||||
if start_pos < current_pos:
|
||||
start_line = i
|
||||
break
|
||||
for i, line in enumerate(content[start_line:], start=start_line):
|
||||
current_pos += len(line)
|
||||
if end_pos <= current_pos:
|
||||
end_line = i
|
||||
break
|
||||
return start_line, end_line
|
||||
|
||||
def process_docx_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
|
||||
try:
|
||||
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype)
|
||||
content = read_docx_content(file_path)
|
||||
full_content = ''.join(content) # Join all lines into a single string
|
||||
thread_instance.update_status.emit(f" Verarbeite DOCX-Datei: {file_path}")
|
||||
|
||||
# Call the parser and get matches along with entity types
|
||||
parsed_entities = parse_content(full_content, abort_flag, db_session)
|
||||
|
||||
entity_count = 0
|
||||
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
|
||||
if not match_text.strip():
|
||||
continue
|
||||
|
||||
timestamp = find_timestamp_before_match(full_content, start_pos)
|
||||
match_start_line, match_end_line = get_line_numbers_from_pos(content, start_pos, end_pos)
|
||||
|
||||
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
|
||||
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
|
||||
|
||||
if individual_entity:
|
||||
entity_count += 1
|
||||
handle_context_snippet(db_session, individual_entity, content, match_start_line, match_end_line)
|
||||
|
||||
return entity_count
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
logging.error(f"Error processing DOCX file {file_path}: {e}")
|
||||
return 0
|
||||
|
||||
def find_timestamp_before_match(content, match_start_pos):
|
||||
search_content = content[:match_start_pos]
|
||||
timestamp_patterns = [
|
||||
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
|
||||
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
|
||||
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
|
||||
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
|
||||
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
|
||||
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
|
||||
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
|
||||
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
|
||||
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
|
||||
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
|
||||
# Add more patterns as needed
|
||||
]
|
||||
for pattern, date_format in timestamp_patterns:
|
||||
for timestamp_match in reversed(list(re.finditer(pattern, search_content))):
|
||||
try:
|
||||
# Convert the matched timestamp to the standardized format
|
||||
matched_timestamp = datetime.strptime(timestamp_match.group(), date_format)
|
||||
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
continue # If conversion fails, continue to the next pattern
|
||||
return None
|
136
logline_leviathan/file_processor/file_database_ops.py
Normal file
136
logline_leviathan/file_processor/file_database_ops.py
Normal file
@ -0,0 +1,136 @@
|
||||
import logging
|
||||
import os
|
||||
from logline_leviathan.database.database_manager import FileMetadata, DistinctEntitiesTable, EntitiesTable, ContextTable, session_scope
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def handle_file_metadata(db_session, file_path, file_mimetype, sheet_name=None):
|
||||
#with session_scope() as db_session:
|
||||
try:
|
||||
# Construct file name with or without sheet name
|
||||
base_file_name = os.path.basename(file_path)
|
||||
modified_file_name = f"{base_file_name}_{sheet_name}" if sheet_name else base_file_name
|
||||
|
||||
# Search for existing metadata using the modified file name
|
||||
file_metadata = db_session.query(FileMetadata).filter_by(file_path=file_path, file_name=modified_file_name).first()
|
||||
|
||||
if not file_metadata:
|
||||
logging.debug(f"File metadata {file_metadata} does not exist.")
|
||||
file_metadata = FileMetadata(file_name=modified_file_name, file_path=file_path, file_mimetype=file_mimetype)
|
||||
db_session.add(file_metadata)
|
||||
else:
|
||||
logging.debug(f"File metadata {file_metadata} already exists.")
|
||||
# Update the MIME type if the record already exists
|
||||
file_metadata.file_mimetype = file_mimetype
|
||||
logging.debug(f"Updated file mimetype: {file_metadata.file_mimetype}")
|
||||
logging.debug(f"committing file metadata {file_metadata}")
|
||||
db_session.commit()
|
||||
return file_metadata
|
||||
except Exception as e:
|
||||
logging.error(f"Error handling file metadata for {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def handle_distinct_entity(db_session, match_text, entity_type_id):
|
||||
#with session_scope() as db_session:
|
||||
try:
|
||||
distinct_entity = db_session.query(DistinctEntitiesTable).filter_by(distinct_entity=match_text, entity_types_id=entity_type_id).first()
|
||||
if not distinct_entity:
|
||||
logging.debug(f"Distinct entity {match_text} does not exist.")
|
||||
distinct_entity = DistinctEntitiesTable(distinct_entity=match_text, entity_types_id=entity_type_id)
|
||||
db_session.add(distinct_entity)
|
||||
logging.debug(f"committing distinct entity {distinct_entity}")
|
||||
db_session.commit()
|
||||
else:
|
||||
logging.debug(f"Distinct entity {distinct_entity} already exists.")
|
||||
|
||||
return distinct_entity
|
||||
except Exception as e:
|
||||
logging.error(f"Error handling distinct entity {match_text}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def handle_individual_entity(db_session, entity, file_metadata, line_number, timestamp, entity_types_id, abort_flag, thread_instance):
|
||||
#with session_scope() as db_session:
|
||||
try:
|
||||
if abort_flag():
|
||||
return None
|
||||
if timestamp and isinstance(timestamp, str):
|
||||
try:
|
||||
timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
logging.warning(f"Invalid timestamp format: {timestamp}")
|
||||
timestamp = None
|
||||
|
||||
individual_entity = db_session.query(EntitiesTable).filter_by(
|
||||
distinct_entities_id=entity.distinct_entities_id,
|
||||
file_id=file_metadata.file_id,
|
||||
line_number=line_number
|
||||
).first()
|
||||
|
||||
if not individual_entity:
|
||||
logging.debug(f"Individual entity {individual_entity} does not exist.")
|
||||
individual_entity = EntitiesTable(
|
||||
distinct_entities_id=entity.distinct_entities_id,
|
||||
file_id=file_metadata.file_id,
|
||||
line_number=line_number,
|
||||
entry_timestamp=timestamp,
|
||||
entity_types_id=entity_types_id
|
||||
)
|
||||
db_session.add(individual_entity)
|
||||
logging.debug(f"committing individual entity {individual_entity}")
|
||||
db_session.commit()
|
||||
|
||||
thread_instance.total_entities_count_lock.lock() # Lock the mutex
|
||||
try:
|
||||
thread_instance.total_entities_count += 1
|
||||
finally:
|
||||
thread_instance.total_entities_count_lock.unlock() # Unlock the mutex
|
||||
|
||||
thread_instance.calculate_and_emit_rate()
|
||||
else:
|
||||
logging.debug(f"Individual entity {individual_entity} already exists.")
|
||||
|
||||
return individual_entity
|
||||
except Exception as e:
|
||||
logging.error(f"Error handling individual entity in {file_metadata.file_path}, line {line_number}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
#def count_newlines(content, start, end):
|
||||
# return content[start:end].count('\n')
|
||||
|
||||
def handle_context_snippet(db_session, individual_entity, content, start_line, end_line):
|
||||
#with session_scope() as db_session:
|
||||
try:
|
||||
context_sizes = {
|
||||
'Kontext - gleiche Zeile': 0,
|
||||
'Kontext - mittelgroß': 8,
|
||||
'Kontext - umfangreich': 15
|
||||
#'Index Context': 30
|
||||
}
|
||||
|
||||
context_snippets = {}
|
||||
for size, lines in context_sizes.items():
|
||||
context_start = max(0, start_line - lines)
|
||||
context_end = min(len(content), end_line + lines + 1)
|
||||
context_snippets[size] = "\n".join(content[context_start:context_end])
|
||||
|
||||
# Check if a similar context already exists
|
||||
existing_context = db_session.query(ContextTable).filter_by(entities_id=individual_entity.entities_id).first()
|
||||
if not existing_context:
|
||||
context = ContextTable(entities_id=individual_entity.entities_id,
|
||||
context_small=context_snippets['Kontext - gleiche Zeile'],
|
||||
context_medium=context_snippets['Kontext - mittelgroß'],
|
||||
context_large=context_snippets['Kontext - umfangreich']
|
||||
)
|
||||
db_session.add(context)
|
||||
logging.debug(f"committing context {context}")
|
||||
db_session.commit()
|
||||
else:
|
||||
logging.debug(f"Existing context {existing_context} already exists.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error handling context snippet: {e}")
|
||||
|
218
logline_leviathan/file_processor/file_processor_thread copy.py
Normal file
218
logline_leviathan/file_processor/file_processor_thread copy.py
Normal file
@ -0,0 +1,218 @@
|
||||
from multiprocessing.spawn import import_main_path
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
from PyQt5.QtCore import QThread, pyqtSignal, QMutex
|
||||
from logline_leviathan.database.database_manager import session_scope
|
||||
from logline_leviathan.gui.checkbox_panel import CheckboxPanel
|
||||
from .text_processor import process_text_file
|
||||
from .xlsx_processor import process_xlsx_file
|
||||
from .pdf_processor import process_pdf_file
|
||||
from .docx_processor import process_docx_file
|
||||
import magic
|
||||
import logging
|
||||
import pathlib
|
||||
|
||||
|
||||
class FileProcessorThread(QThread):
|
||||
update_progress = pyqtSignal(int)
|
||||
update_status = pyqtSignal(str)
|
||||
update_tree_signal = pyqtSignal()
|
||||
update_checkboxes_signal = pyqtSignal()
|
||||
update_rate = pyqtSignal(float, int, float, int, float, float)
|
||||
last_update_time = 0
|
||||
|
||||
|
||||
def __init__(self, file_paths):
|
||||
super().__init__()
|
||||
self.start_time = time.time()
|
||||
self.total_entities_count = 0
|
||||
self.total_entities_count_lock = QMutex()
|
||||
self.abort_mutex = QMutex()
|
||||
|
||||
self.abort_flag = False
|
||||
self.file_paths = file_paths
|
||||
self.unsupported_files_count = 0
|
||||
self.processed_files_count = 0
|
||||
self.total_data_processed_kb = 0
|
||||
self.total_files_size_kb = sum(os.path.getsize(f) / 1024 for f in file_paths)
|
||||
|
||||
self.unsupported_files_list = []
|
||||
self.all_unsupported_files = []
|
||||
|
||||
self.checkbox_panel = CheckboxPanel()
|
||||
|
||||
@property
|
||||
def abort_flag(self):
|
||||
# This is the getter method for the property
|
||||
self.abort_mutex.lock()
|
||||
flag = self._abort_flag
|
||||
self.abort_mutex.unlock()
|
||||
return flag
|
||||
|
||||
@abort_flag.setter
|
||||
def abort_flag(self, value):
|
||||
# This is the setter method for the property
|
||||
self.abort_mutex.lock()
|
||||
self._abort_flag = value
|
||||
self.abort_mutex.unlock()
|
||||
|
||||
def classify_file_type(self, file_path):
|
||||
# Mapping of file extensions to MIME types
|
||||
mime_types = {
|
||||
'.txt': 'text/plain',
|
||||
'.pdf': 'application/pdf',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.csv': 'text/csv',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.xml': 'text/xml',
|
||||
'.json': 'application/json',
|
||||
'.yaml': 'text/yaml',
|
||||
'.yml': 'text/yaml',
|
||||
'.md': 'text/markdown',
|
||||
'.rtf': 'application/rtf',
|
||||
'.odt': 'application/vnd.oasis.opendocument.text',
|
||||
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'.odp': 'application/vnd.oasis.opendocument.presentation',
|
||||
'.log': 'text/plain',
|
||||
'.ini': 'text/plain',
|
||||
'.conf': 'text/plain',
|
||||
'.cfg': 'text/plain',
|
||||
'.js': 'application/javascript',
|
||||
'.css': 'text/css',
|
||||
'.php': 'text/php',
|
||||
'.py': 'text/x-python',
|
||||
'.rb': 'text/x-ruby',
|
||||
'.java': 'text/x-java-source',
|
||||
'.c': 'text/x-c',
|
||||
'.cpp': 'text/x-c++',
|
||||
'.h': 'text/x-c-header',
|
||||
'.hpp': 'text/x-c++-header',
|
||||
'.sh': 'application/x-sh',
|
||||
'.bat': 'application/x-bat',
|
||||
'.ps1': 'application/x-powershell',
|
||||
'.sql': 'text/x-sql',
|
||||
# Add more mappings as needed
|
||||
}
|
||||
try:
|
||||
mime = magic.Magic(mime=True)
|
||||
file_type = mime.from_file(file_path)
|
||||
return file_type
|
||||
except FileNotFoundError as e:
|
||||
logging.error(f"File not found: {file_path}. Encoding: {sys.getfilesystemencoding()}", exc_info=True)
|
||||
except Exception as e:
|
||||
try:
|
||||
clean_file_path = pathlib.Path(file_path)
|
||||
mime = magic.Magic(mime=True)
|
||||
file_type = mime.from_file(clean_file_path)
|
||||
return file_type
|
||||
except Exception as e:
|
||||
logging.error(f"The magic library failed classifying the file type: {e} // falling back to file extension")
|
||||
_, file_extension = os.path.splitext(file_path)
|
||||
return mime_types.get(file_extension.lower(), 'application/octet-stream') # Default to binary type if unknown
|
||||
|
||||
|
||||
def run(self):
|
||||
logging.debug("Thread run method started.")
|
||||
try:
|
||||
for index, file_path in enumerate(self.file_paths):
|
||||
#if not self.debugFileProcessor(file_path):
|
||||
# continue
|
||||
file_size_kb = os.path.getsize(file_path) / 1024 # Get file size in KiB
|
||||
self.total_data_processed_kb += file_size_kb
|
||||
if self.abort_flag:
|
||||
self.update_status.emit("Analyse abgebrochen")
|
||||
return
|
||||
logging.debug(f"Attempting to process file: {file_path}")
|
||||
file_type = self.classify_file_type(file_path)
|
||||
logging.info(f"ANALYZING {file_path} TYPE {file_type}")
|
||||
|
||||
with session_scope() as session:
|
||||
if 'text/' in file_type:
|
||||
process_text_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
elif 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' in file_type:
|
||||
process_xlsx_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
elif 'application/pdf' in file_type or file_type == ".pdf":
|
||||
process_pdf_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
elif 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' in file_type:
|
||||
# Handling DOCX file
|
||||
process_docx_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
else:
|
||||
logging.info(f"Skipping unsupported file type: {file_type}")
|
||||
self.all_unsupported_files.append(file_path)
|
||||
self.unsupported_files_count += 1
|
||||
if len(self.unsupported_files_list) < 20:
|
||||
self.unsupported_files_list.append(f"{file_path} (Type: {file_type})")
|
||||
continue
|
||||
self.update_tree_signal.emit()
|
||||
self.update_checkboxes_signal.emit()
|
||||
self.processed_files_count = index + 1
|
||||
self.update_progress.emit(index + 1)
|
||||
self.update_status.emit(f" Verarbeitung abgeschlossen. {index + 1 - self.unsupported_files_count} von {len(self.file_paths)} Dateien verarbeitet.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing files: {e}")
|
||||
self.update_status.emit(f"Fehler beim Verarbeiten von Dateien {e}", exc_info=True)
|
||||
|
||||
def calculate_and_emit_rate(self):
|
||||
current_time = time.time()
|
||||
if current_time - self.last_update_time >= 1: # Check if 1 second has passed
|
||||
entity_rate = self.calculate_rate()
|
||||
file_rate = self.calculate_file_rate()
|
||||
data_rate_kibs = self.calculate_data_rate()
|
||||
estimated_time = self.calculate_estimated_time_to_completion(data_rate_kibs)
|
||||
self.update_rate.emit(entity_rate, self.total_entities_count, file_rate, self.processed_files_count, estimated_time, data_rate_kibs)
|
||||
self.last_update_time = current_time
|
||||
|
||||
def calculate_data_rate(self):
|
||||
elapsed_time = time.time() - self.start_time
|
||||
return self.total_data_processed_kb / elapsed_time if elapsed_time > 0 else 0
|
||||
|
||||
def calculate_estimated_time_to_completion(self, data_rate_kibs):
|
||||
remaining_data_kb = self.total_files_size_kb - self.total_data_processed_kb
|
||||
if data_rate_kibs > 0:
|
||||
estimated_time = remaining_data_kb / data_rate_kibs
|
||||
else:
|
||||
estimated_time = float('inf') # Indefinite time if rate is zero
|
||||
return estimated_time
|
||||
|
||||
def calculate_file_rate(self):
|
||||
elapsed_time = time.time() - self.start_time
|
||||
return self.processed_files_count / elapsed_time if elapsed_time > 0 else 0
|
||||
|
||||
|
||||
def calculate_rate(self):
|
||||
elapsed_time = time.time() - self.start_time
|
||||
rate = self.total_entities_count / elapsed_time if elapsed_time > 0 else 0
|
||||
return rate
|
||||
|
||||
def abort(self):
|
||||
self.abort_flag = True
|
||||
|
||||
def getUnsupportedFilesCount(self):
|
||||
return self.unsupported_files_count
|
||||
|
||||
def getUnsupportedFilesList(self):
|
||||
return self.unsupported_files_list
|
||||
|
||||
def debugFileProcessor(self, file_path):
|
||||
logging.debug(f"Attempting to process file: {file_path}")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
logging.warning(f"File does not exist: {file_path}")
|
||||
return False
|
||||
elif not os.access(file_path, os.R_OK):
|
||||
logging.warning(f"File is not accessible: {file_path}")
|
||||
return False
|
||||
|
||||
try:
|
||||
detected_encoding = magic.from_file(file_path, mime=True)
|
||||
logging.debug(f"Detected encoding for {file_path}: {detected_encoding}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to detect encoding for {file_path}: {e}", exc_info=True)
|
||||
|
||||
file_type = self.classify_file_type(file_path)
|
||||
logging.debug(f"Classified file type for {file_path}: {file_type}")
|
||||
|
||||
return True
|
240
logline_leviathan/file_processor/file_processor_thread.py
Normal file
240
logline_leviathan/file_processor/file_processor_thread.py
Normal file
@ -0,0 +1,240 @@
|
||||
from multiprocessing.spawn import import_main_path
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
from PyQt5.QtCore import QThread, pyqtSignal, QMutex
|
||||
from logline_leviathan.database.database_manager import session_scope
|
||||
from logline_leviathan.gui.checkbox_panel import CheckboxPanel
|
||||
from .text_processor import process_text_file
|
||||
from .xlsx_processor import process_xlsx_file
|
||||
from .pdf_processor import process_pdf_file
|
||||
from .docx_processor import process_docx_file
|
||||
import magic
|
||||
import logging
|
||||
import pathlib
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.exc import OperationalError
|
||||
|
||||
class FileProcessorThread(QThread):
|
||||
update_progress = pyqtSignal(int)
|
||||
update_status = pyqtSignal(str)
|
||||
update_tree_signal = pyqtSignal()
|
||||
update_checkboxes_signal = pyqtSignal()
|
||||
update_rate = pyqtSignal(float, int, float, int, float, float)
|
||||
last_update_time = 0
|
||||
|
||||
|
||||
def __init__(self, file_paths):
|
||||
super().__init__()
|
||||
self.start_time = time.time()
|
||||
self.total_entities_count = 0
|
||||
self.total_entities_count_lock = QMutex()
|
||||
self.abort_mutex = QMutex()
|
||||
|
||||
self.abort_flag = False
|
||||
self.file_paths = file_paths
|
||||
self.unsupported_files_count = 0
|
||||
self.processed_files_count = 0
|
||||
self.total_data_processed_kb = 0
|
||||
self.total_files_size_kb = sum(os.path.getsize(f) / 1024 for f in file_paths)
|
||||
|
||||
self.unsupported_files_list = []
|
||||
self.all_unsupported_files = []
|
||||
|
||||
self.checkbox_panel = CheckboxPanel()
|
||||
|
||||
@property
|
||||
def abort_flag(self):
|
||||
# This is the getter method for the property
|
||||
self.abort_mutex.lock()
|
||||
flag = self._abort_flag
|
||||
self.abort_mutex.unlock()
|
||||
return flag
|
||||
|
||||
@abort_flag.setter
|
||||
def abort_flag(self, value):
|
||||
# This is the setter method for the property
|
||||
self.abort_mutex.lock()
|
||||
self._abort_flag = value
|
||||
self.abort_mutex.unlock()
|
||||
|
||||
def classify_file_type(self, file_path):
|
||||
# Mapping of file extensions to MIME types
|
||||
mime_types = {
|
||||
'.txt': 'text/plain',
|
||||
'.pdf': 'application/pdf',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.csv': 'text/csv',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.xml': 'text/xml',
|
||||
'.json': 'application/json',
|
||||
'.yaml': 'text/yaml',
|
||||
'.yml': 'text/yaml',
|
||||
'.md': 'text/markdown',
|
||||
'.rtf': 'application/rtf',
|
||||
'.odt': 'application/vnd.oasis.opendocument.text',
|
||||
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'.odp': 'application/vnd.oasis.opendocument.presentation',
|
||||
'.log': 'text/plain',
|
||||
'.ini': 'text/plain',
|
||||
'.conf': 'text/plain',
|
||||
'.cfg': 'text/plain',
|
||||
'.js': 'application/javascript',
|
||||
'.css': 'text/css',
|
||||
'.php': 'text/php',
|
||||
'.py': 'text/x-python',
|
||||
'.rb': 'text/x-ruby',
|
||||
'.java': 'text/x-java-source',
|
||||
'.c': 'text/x-c',
|
||||
'.cpp': 'text/x-c++',
|
||||
'.h': 'text/x-c-header',
|
||||
'.hpp': 'text/x-c++-header',
|
||||
'.sh': 'application/x-sh',
|
||||
'.bat': 'application/x-bat',
|
||||
'.ps1': 'application/x-powershell',
|
||||
'.sql': 'text/x-sql',
|
||||
# Add more mappings as needed
|
||||
}
|
||||
try:
|
||||
mime = magic.Magic(mime=True)
|
||||
file_type = mime.from_file(file_path)
|
||||
return file_type
|
||||
except FileNotFoundError as e:
|
||||
logging.error(f"File not found: {file_path}. Encoding: {sys.getfilesystemencoding()}", exc_info=True)
|
||||
except Exception as e:
|
||||
try:
|
||||
clean_file_path = pathlib.Path(file_path)
|
||||
mime = magic.Magic(mime=True)
|
||||
file_type = mime.from_file(clean_file_path)
|
||||
return file_type
|
||||
except Exception as e:
|
||||
logging.error(f"The magic library failed classifying the file type: {e} // falling back to file extension")
|
||||
_, file_extension = os.path.splitext(file_path)
|
||||
return mime_types.get(file_extension.lower(), 'application/octet-stream') # Default to binary type if unknown
|
||||
|
||||
|
||||
def run(self):
|
||||
logging.debug("Thread run method started.")
|
||||
try:
|
||||
for index, file_path in enumerate(self.file_paths):
|
||||
#if not self.debugFileProcessor(file_path):
|
||||
# continue
|
||||
file_size_kb = os.path.getsize(file_path) / 1024 # Get file size in KiB
|
||||
self.total_data_processed_kb += file_size_kb
|
||||
if self.abort_flag:
|
||||
self.update_status.emit("Analyse abgebrochen")
|
||||
return
|
||||
logging.debug(f"Attempting to process file: {file_path}")
|
||||
file_type = self.classify_file_type(file_path)
|
||||
logging.info(f"ANALYZING {file_path} TYPE {file_type}")
|
||||
# Check and potentially re-establish the database connection
|
||||
if not self.check_and_restore_db_connection():
|
||||
logging.error(f"Database connection could not be established for {file_path}. Skipping file.")
|
||||
continue
|
||||
with session_scope() as session:
|
||||
if 'text/' in file_type:
|
||||
process_text_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
elif 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' in file_type:
|
||||
process_xlsx_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
elif 'application/pdf' in file_type or file_type == ".pdf":
|
||||
process_pdf_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
elif 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' in file_type:
|
||||
# Handling DOCX file
|
||||
process_docx_file(file_path, file_type, self, session, lambda: self.abort_flag)
|
||||
else:
|
||||
logging.info(f"Skipping unsupported file type: {file_type}")
|
||||
self.all_unsupported_files.append(file_path)
|
||||
self.unsupported_files_count += 1
|
||||
if len(self.unsupported_files_list) < 20:
|
||||
self.unsupported_files_list.append(f"{file_path} (Type: {file_type})")
|
||||
continue
|
||||
self.update_tree_signal.emit()
|
||||
self.update_checkboxes_signal.emit()
|
||||
self.processed_files_count = index + 1
|
||||
self.update_progress.emit(index + 1)
|
||||
self.update_status.emit(f" Verarbeitung abgeschlossen. {index + 1 - self.unsupported_files_count} von {len(self.file_paths)} Dateien verarbeitet.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing files: {e}")
|
||||
self.update_status.emit(f"Fehler beim Verarbeiten von Dateien {e}", exc_info=True)
|
||||
|
||||
|
||||
def check_and_restore_db_connection(self):
|
||||
attempts = 0
|
||||
max_attempts = 5
|
||||
while attempts < max_attempts:
|
||||
try:
|
||||
with session_scope() as session:
|
||||
session.execute(text('SELECT 1'))
|
||||
return True
|
||||
except OperationalError:
|
||||
attempts += 1
|
||||
time.sleep(2 ** attempts) # Exponential backoff
|
||||
continue
|
||||
logging.error("Failed to re-establish database connection after several attempts.")
|
||||
return False
|
||||
|
||||
|
||||
def calculate_and_emit_rate(self):
|
||||
current_time = time.time()
|
||||
if current_time - self.last_update_time >= 1: # Check if 1 second has passed
|
||||
entity_rate = self.calculate_rate()
|
||||
file_rate = self.calculate_file_rate()
|
||||
data_rate_kibs = self.calculate_data_rate()
|
||||
estimated_time = self.calculate_estimated_time_to_completion(data_rate_kibs)
|
||||
self.update_rate.emit(entity_rate, self.total_entities_count, file_rate, self.processed_files_count, estimated_time, data_rate_kibs)
|
||||
self.last_update_time = current_time
|
||||
|
||||
def calculate_data_rate(self):
|
||||
elapsed_time = time.time() - self.start_time
|
||||
return self.total_data_processed_kb / elapsed_time if elapsed_time > 0 else 0
|
||||
|
||||
def calculate_estimated_time_to_completion(self, data_rate_kibs):
|
||||
remaining_data_kb = self.total_files_size_kb - self.total_data_processed_kb
|
||||
if data_rate_kibs > 0:
|
||||
estimated_time = remaining_data_kb / data_rate_kibs
|
||||
else:
|
||||
estimated_time = float('inf') # Indefinite time if rate is zero
|
||||
return estimated_time
|
||||
|
||||
def calculate_file_rate(self):
|
||||
elapsed_time = time.time() - self.start_time
|
||||
return self.processed_files_count / elapsed_time if elapsed_time > 0 else 0
|
||||
|
||||
|
||||
def calculate_rate(self):
|
||||
elapsed_time = time.time() - self.start_time
|
||||
rate = self.total_entities_count / elapsed_time if elapsed_time > 0 else 0
|
||||
return rate
|
||||
|
||||
def abort(self):
|
||||
self.abort_flag = True
|
||||
|
||||
def getUnsupportedFilesCount(self):
|
||||
return self.unsupported_files_count
|
||||
|
||||
def getUnsupportedFilesList(self):
|
||||
return self.unsupported_files_list
|
||||
|
||||
def debugFileProcessor(self, file_path):
|
||||
logging.debug(f"Attempting to process file: {file_path}")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
logging.warning(f"File does not exist: {file_path}")
|
||||
return False
|
||||
elif not os.access(file_path, os.R_OK):
|
||||
logging.warning(f"File is not accessible: {file_path}")
|
||||
return False
|
||||
|
||||
try:
|
||||
detected_encoding = magic.from_file(file_path, mime=True)
|
||||
logging.debug(f"Detected encoding for {file_path}: {detected_encoding}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to detect encoding for {file_path}: {e}", exc_info=True)
|
||||
|
||||
file_type = self.classify_file_type(file_path)
|
||||
logging.debug(f"Classified file type for {file_path}: {file_type}")
|
||||
|
||||
return True
|
||||
|
101
logline_leviathan/file_processor/parser_thread.py
Normal file
101
logline_leviathan/file_processor/parser_thread.py
Normal file
@ -0,0 +1,101 @@
|
||||
# the parse_content receives the full_content string from the methods process_text_file, process_xlsx_file, process_pdf_file or similar along the abort_flag
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import logging
|
||||
import importlib.util
|
||||
import multiprocessing
|
||||
from logline_leviathan.database.database_manager import EntityTypesTable
|
||||
|
||||
#logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
#multiprocessing.set_start_method('spawn')
|
||||
|
||||
def parse_with_script(parser_module_name, full_content):
|
||||
parser_module_name = parser_module_name.replace('.py', '') # Remove .py extension
|
||||
|
||||
if getattr(sys, 'frozen', False):
|
||||
# The base path is the directory of the executable
|
||||
base_dir = os.path.dirname(sys.executable)
|
||||
# Construct the path to the 'data/parser' directory
|
||||
base_path = os.path.join(base_dir, 'data', 'parser')
|
||||
else:
|
||||
# Running in a normal Python environment
|
||||
base_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'parser')
|
||||
|
||||
# Construct the path to the parser module
|
||||
parser_module_path = os.path.join(base_path, parser_module_name + '.py')
|
||||
|
||||
if not os.path.exists(parser_module_path):
|
||||
logging.error(f"Parser module not found: {parser_module_path}")
|
||||
return []
|
||||
|
||||
# Dynamically import the module using its file path
|
||||
spec = importlib.util.spec_from_file_location(parser_module_name, parser_module_path)
|
||||
parser_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(parser_module)
|
||||
|
||||
try:
|
||||
script_results = parser_module.parse(full_content)
|
||||
return script_results
|
||||
except Exception as e:
|
||||
logging.error(f"Error using parser module {parser_module_name}: {e}")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
def parse_with_regex(regex_pattern, full_content):
|
||||
try:
|
||||
#logging.debug(f"Using regex pattern: {regex_pattern}")
|
||||
regex_results = [(match.group(), match.start(), match.end()) for match in re.finditer(regex_pattern, full_content)]
|
||||
#logging.debug(f"Regex parser results: {regex_results}")
|
||||
return regex_results
|
||||
except re.error as e:
|
||||
logging.error(f"Invalid regex pattern: {regex_pattern}. Error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def parse_entity_type(entity_type, full_content):
|
||||
try:
|
||||
if entity_type.script_parser and os.path.exists(os.path.join('data', 'parser', entity_type.script_parser)):
|
||||
# Use the script_parser name directly
|
||||
parser_module_name = entity_type.script_parser.replace('.py', '')
|
||||
return [(entity_type.entity_type_id, *match) for match in parse_with_script(parser_module_name, full_content)]
|
||||
elif entity_type.regex_pattern:
|
||||
return [(entity_type.entity_type_id, *match) for match in parse_with_regex(entity_type.regex_pattern, full_content)]
|
||||
else:
|
||||
return []
|
||||
except Exception as e:
|
||||
logging.error(f"Error in parse_entity_type for {entity_type}: {e}")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def parse_content(full_content, abort_flag, db_session):
|
||||
#logging.debug("Starting parsing content")
|
||||
entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parser_enabled == True).all()
|
||||
matches = []
|
||||
|
||||
with multiprocessing.Pool() as pool:
|
||||
results = [pool.apply_async(parse_entity_type, (et, full_content)) for et in entity_types]
|
||||
|
||||
for result in results:
|
||||
if abort_flag():
|
||||
logging.debug("Aborting parsing due to flag")
|
||||
break
|
||||
try:
|
||||
match_result = result.get()
|
||||
#logging.debug(f"Match result: {match_result}")
|
||||
matches.extend(match_result)
|
||||
except Exception as e:
|
||||
logging.error(f"Error parsing entity type: {e}")
|
||||
for match in matches:
|
||||
if len(match) != 4:
|
||||
logging.error(f"Unexpected format for parsd entity: {match}")
|
||||
#logging.debug(f"Finished parsing content. Total matches: {len(matches)}")
|
||||
return matches
|
||||
|
||||
|
||||
|
153
logline_leviathan/file_processor/pdf_processor.py
Normal file
153
logline_leviathan/file_processor/pdf_processor.py
Normal file
@ -0,0 +1,153 @@
|
||||
import logging
|
||||
import re
|
||||
import os
|
||||
from datetime import datetime
|
||||
from logline_leviathan.file_processor.parser_thread import parse_content
|
||||
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_distinct_entity, handle_context_snippet
|
||||
import fitz
|
||||
logging.getLogger('pdfminer').setLevel(logging.INFO)
|
||||
|
||||
|
||||
def read_pdf_content(file_path):
|
||||
try:
|
||||
with fitz.open(file_path) as pdf:
|
||||
pages = [page.get_text("text") for page in pdf]
|
||||
return pages
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading PDF file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def process_pdf_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
|
||||
try:
|
||||
logging.info(f"Starting processing of PDF file: {file_path}")
|
||||
with fitz.open(file_path) as pdf: # Open the PDF with fitz
|
||||
pages = [page.get_text("text") for page in pdf]
|
||||
|
||||
if pages is None:
|
||||
return 0
|
||||
|
||||
entity_count = 0
|
||||
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype)
|
||||
|
||||
for page_number, content in enumerate(pages):
|
||||
if content is None:
|
||||
continue # Skip empty pages
|
||||
|
||||
if abort_flag():
|
||||
logging.info("Processing aborted.")
|
||||
return entity_count
|
||||
thread_instance.update_status.emit(f" Verarbeite PDF-Datei: {file_path}, Seite {page_number + 1}")
|
||||
|
||||
parsed_entities = parse_content(content, abort_flag, db_session)
|
||||
|
||||
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
|
||||
if not match_text.strip():
|
||||
continue
|
||||
|
||||
timestamp = find_timestamp_before_match(content, start_pos, file_path)
|
||||
match_start_line, match_end_line = get_line_numbers_from_pos(pdf, page_number, start_pos, end_pos)
|
||||
|
||||
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
|
||||
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
|
||||
|
||||
if individual_entity:
|
||||
handle_context_snippet(db_session, individual_entity, [content], match_start_line, match_end_line)
|
||||
entity_count += 1
|
||||
|
||||
logging.info(f"Finished processing PDF file: {file_path}")
|
||||
return entity_count
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
logging.error(f"Error processing PDF file {file_path}: {e}")
|
||||
return 0
|
||||
|
||||
def alternative_get_line_numbers_from_pos(pdf, page_number, start_pos, end_pos):
|
||||
cumulative_line_number = 0
|
||||
start_line_number = end_line_number = None
|
||||
|
||||
for current_page in range(page_number + 1):
|
||||
page = pdf[current_page]
|
||||
text_blocks = page.get_text("dict")["blocks"]
|
||||
|
||||
for block in text_blocks:
|
||||
if 'lines' in block:
|
||||
for line_number, line in enumerate(block['lines']):
|
||||
if current_page == page_number:
|
||||
line_text = "".join([span['text'] for span in line['spans']])
|
||||
current_pos = len(line_text)
|
||||
|
||||
if start_pos < current_pos and start_line_number is None:
|
||||
start_line_number = cumulative_line_number + line_number
|
||||
if end_pos <= current_pos:
|
||||
end_line_number = cumulative_line_number + line_number
|
||||
return start_line_number, end_line_number
|
||||
|
||||
cumulative_line_number += 1
|
||||
|
||||
return start_line_number, end_line_number
|
||||
|
||||
|
||||
def get_line_numbers_from_pos(pdf, page_number, start_pos, end_pos):
|
||||
page = pdf[page_number]
|
||||
text_blocks = page.get_text("dict")["blocks"]
|
||||
start_line_number = end_line_number = 0
|
||||
current_pos = 0
|
||||
|
||||
for block in text_blocks:
|
||||
if 'lines' in block:
|
||||
for line_number, line in enumerate(block['lines']):
|
||||
line_text = "".join([span['text'] for span in line['spans']])
|
||||
current_pos += len(line_text)
|
||||
if start_pos < current_pos and start_line_number == 0:
|
||||
start_line_number = line_number
|
||||
if end_pos <= current_pos:
|
||||
end_line_number = line_number
|
||||
#logging.debug(f"start_line_number: {start_line_number}, end_line_number: {end_line_number}, line_number: {line_number}, page_number: {page_number}")
|
||||
return start_line_number, end_line_number
|
||||
return start_line_number, end_line_number
|
||||
|
||||
|
||||
|
||||
def find_timestamp_before_match(content, match_start_pos, file_path):
|
||||
# Function to search for timestamps across line breaks
|
||||
def search_timestamps(search_content):
|
||||
# Modified patterns to account for potential line breaks
|
||||
timestamp_patterns = [
|
||||
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
|
||||
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
|
||||
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
|
||||
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
|
||||
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
|
||||
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
|
||||
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
|
||||
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
|
||||
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
|
||||
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
|
||||
# Add more patterns as needed
|
||||
]
|
||||
|
||||
for pattern, date_format in timestamp_patterns:
|
||||
for timestamp_match in reversed(list(re.finditer(pattern, search_content, re.DOTALL))):
|
||||
try:
|
||||
# Convert the matched timestamp to the standardized format
|
||||
matched_timestamp = datetime.strptime(timestamp_match.group().replace('\n', ''), date_format)
|
||||
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
# First, try to find a timestamp in the content
|
||||
timestamp = search_timestamps(content[:match_start_pos])
|
||||
if timestamp:
|
||||
return timestamp
|
||||
|
||||
# If not found in content, try to find a timestamp in the file path
|
||||
basename = os.path.basename(file_path)
|
||||
return search_timestamps(basename)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
104
logline_leviathan/file_processor/text_processor.py
Normal file
104
logline_leviathan/file_processor/text_processor.py
Normal file
@ -0,0 +1,104 @@
|
||||
import re
|
||||
import os
|
||||
import logging
|
||||
from logline_leviathan.file_processor.parser_thread import parse_content
|
||||
from datetime import datetime
|
||||
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_context_snippet, handle_distinct_entity
|
||||
from logline_leviathan.database.database_manager import session_scope
|
||||
|
||||
def read_file_content(file_path):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
return file.readlines()
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def process_text_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
|
||||
#with session_scope() as db_session:
|
||||
try:
|
||||
#logging.info(f"Starting processing of text file: {file_path}")
|
||||
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype)
|
||||
content = read_file_content(file_path)
|
||||
full_content = ''.join(content) # Join all lines into a single string
|
||||
thread_instance.update_status.emit(f" Verarbeite textbasierte Datei: {file_path}")
|
||||
|
||||
# Call the new parser and get matches along with entity types
|
||||
parsed_entities = parse_content(full_content, abort_flag, db_session)
|
||||
|
||||
entity_count = 0
|
||||
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
|
||||
if not match_text.strip():
|
||||
continue
|
||||
|
||||
timestamp = find_timestamp_before_match(full_content, start_pos, file_path)
|
||||
match_start_line, match_end_line = get_line_numbers_from_pos(content, start_pos, end_pos)
|
||||
|
||||
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
|
||||
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
|
||||
|
||||
if individual_entity:
|
||||
entity_count += 1
|
||||
handle_context_snippet(db_session, individual_entity, content, match_start_line, match_end_line)
|
||||
|
||||
return entity_count
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
logging.error(f"Error processing text file {file_path}: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def get_line_numbers_from_pos(content, start_pos, end_pos):
|
||||
start_line = end_line = 0
|
||||
current_pos = 0
|
||||
for i, line in enumerate(content):
|
||||
current_pos += len(line)
|
||||
if start_pos < current_pos:
|
||||
start_line = i
|
||||
break
|
||||
for i, line in enumerate(content[start_line:], start=start_line):
|
||||
current_pos += len(line)
|
||||
if end_pos <= current_pos:
|
||||
end_line = i
|
||||
break
|
||||
return start_line, end_line
|
||||
|
||||
|
||||
def find_timestamp_before_match(content, match_start_pos, file_path):
|
||||
# Function to search for timestamps across line breaks
|
||||
def search_timestamps(search_content):
|
||||
# Modified patterns to account for potential line breaks
|
||||
timestamp_patterns = [
|
||||
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
|
||||
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
|
||||
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
|
||||
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
|
||||
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
|
||||
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
|
||||
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
|
||||
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
|
||||
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
|
||||
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
|
||||
# Add more patterns as needed
|
||||
]
|
||||
|
||||
for pattern, date_format in timestamp_patterns:
|
||||
for timestamp_match in reversed(list(re.finditer(pattern, search_content, re.DOTALL))):
|
||||
try:
|
||||
# Convert the matched timestamp to the standardized format
|
||||
matched_timestamp = datetime.strptime(timestamp_match.group().replace('\n', ''), date_format)
|
||||
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
# First, try to find a timestamp in the content
|
||||
timestamp = search_timestamps(content[:match_start_pos])
|
||||
if timestamp:
|
||||
return timestamp
|
||||
|
||||
# If not found in content, try to find a timestamp in the file path
|
||||
basename = os.path.basename(file_path)
|
||||
return search_timestamps(basename)
|
||||
|
120
logline_leviathan/file_processor/xlsx_processor.py
Normal file
120
logline_leviathan/file_processor/xlsx_processor.py
Normal file
@ -0,0 +1,120 @@
|
||||
import logging
|
||||
import datetime
|
||||
import re
|
||||
import os
|
||||
from openpyxl import load_workbook
|
||||
from logline_leviathan.file_processor.parser_thread import parse_content
|
||||
from logline_leviathan.file_processor.file_database_ops import handle_file_metadata, handle_individual_entity, handle_context_snippet, handle_distinct_entity
|
||||
|
||||
def read_xlsx_content(file_path):
|
||||
try:
|
||||
workbook = load_workbook(filename=file_path)
|
||||
return workbook
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading XLSX file {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def get_line_numbers_from_pos(content, start_pos, end_pos):
|
||||
# For XLSX, the line number is the row number in the current sheet
|
||||
start_line = end_line = 0
|
||||
current_pos = 0
|
||||
for i, line in enumerate(content):
|
||||
current_pos += len(line)
|
||||
if start_pos < current_pos:
|
||||
start_line = i
|
||||
break
|
||||
for i, line in enumerate(content[start_line:], start=start_line):
|
||||
current_pos += len(line)
|
||||
if end_pos <= current_pos:
|
||||
end_line = i
|
||||
break
|
||||
return start_line, end_line
|
||||
|
||||
|
||||
def find_timestamp_before_match(content, match_start_pos, file_path):
|
||||
# Function to search for timestamps across line breaks
|
||||
def search_timestamps(search_content):
|
||||
# Modified patterns to account for potential line breaks
|
||||
timestamp_patterns = [
|
||||
(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', '%Y-%m-%d %H:%M:%S'), # ISO 8601 Extended
|
||||
(r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}', '%Y/%m/%d %H:%M:%S'), # ISO 8601 with slashes
|
||||
(r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}', '%d/%m/%Y %H:%M:%S'), # European Date Format
|
||||
(r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}', '%m-%d-%Y %H:%M:%S'), # US Date Format
|
||||
(r'\d{8}_\d{6}', '%Y%m%d_%H%M%S'), # Compact Format
|
||||
(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', '%Y-%m-%dT%H:%M:%S'), # ISO 8601 Basic
|
||||
(r'\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2}', '%d.%m.%Y %H:%M:%S'),# German Date Format
|
||||
(r'\d{4}\d{2}\d{2} \d{2}:\d{2}:\d{2}', '%Y%m%d %H:%M:%S'), # Basic Format without Separators
|
||||
(r'\d{1,2}-[A-Za-z]{3}-\d{4} \d{2}:\d{2}:\d{2}', '%d-%b-%Y %H:%M:%S'), # English Date Format with Month Name
|
||||
(r'(?:19|20)\d{10}', '%Y%m%d%H%M'), # Compact Numeric Format
|
||||
# Add more patterns as needed
|
||||
]
|
||||
|
||||
for pattern, date_format in timestamp_patterns:
|
||||
for timestamp_match in reversed(list(re.finditer(pattern, search_content, re.DOTALL))):
|
||||
try:
|
||||
# Convert the matched timestamp to the standardized format
|
||||
matched_timestamp = datetime.strptime(timestamp_match.group().replace('\n', ''), date_format)
|
||||
return matched_timestamp.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
# First, try to find a timestamp in the content
|
||||
timestamp = search_timestamps(content[:match_start_pos])
|
||||
if timestamp:
|
||||
return timestamp
|
||||
|
||||
# If not found in content, try to find a timestamp in the file path
|
||||
basename = os.path.basename(file_path)
|
||||
return search_timestamps(basename)
|
||||
|
||||
|
||||
def process_xlsx_file(file_path, file_mimetype, thread_instance, db_session, abort_flag):
|
||||
try:
|
||||
logging.info(f"Starting processing of XLSX file: {file_path}")
|
||||
workbook = read_xlsx_content(file_path)
|
||||
|
||||
if workbook is None:
|
||||
return 0
|
||||
|
||||
entity_count = 0
|
||||
|
||||
for sheet in workbook:
|
||||
sheet_name = sheet.title
|
||||
file_metadata = handle_file_metadata(db_session, file_path, file_mimetype, sheet_name=sheet_name)
|
||||
|
||||
if abort_flag():
|
||||
logging.info("Processing aborted.")
|
||||
return entity_count
|
||||
|
||||
# Combining all cells into a single string for parsing
|
||||
content = [' '.join([str(cell.value) if cell.value is not None else '' for cell in row]) for row in sheet.iter_rows()]
|
||||
full_content = '\n'.join(content)
|
||||
thread_instance.update_status.emit(f"Processing Excel file: {file_path} Sheet {sheet_name}")
|
||||
|
||||
parsed_entities = parse_content(full_content, abort_flag, db_session)
|
||||
|
||||
for entity_type_id, match_text, start_pos, end_pos in parsed_entities:
|
||||
if not match_text.strip():
|
||||
continue
|
||||
|
||||
match_start_line, match_end_line = get_line_numbers_from_pos(content, start_pos, end_pos)
|
||||
|
||||
# Find timestamp before match
|
||||
timestamp = find_timestamp_before_match(full_content, start_pos, file_path)
|
||||
|
||||
entity = handle_distinct_entity(db_session, match_text, entity_type_id)
|
||||
individual_entity = handle_individual_entity(db_session, entity, file_metadata, match_start_line, timestamp, entity_type_id, abort_flag, thread_instance)
|
||||
if individual_entity:
|
||||
handle_context_snippet(db_session, individual_entity, content, match_start_line, match_end_line)
|
||||
entity_count += 1
|
||||
|
||||
logging.info(f"Finished processing XLSX file: {file_path}")
|
||||
return entity_count
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
logging.error(f"Error processing XLSX file {file_path}: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
|
0
logline_leviathan/gui/__init__.py
Normal file
0
logline_leviathan/gui/__init__.py
Normal file
398
logline_leviathan/gui/checkbox_panel.py
Normal file
398
logline_leviathan/gui/checkbox_panel.py
Normal file
@ -0,0 +1,398 @@
|
||||
from PyQt5.QtWidgets import QWidget, QVBoxLayout, QCheckBox, QToolTip, QTreeWidget, QTreeWidgetItem, QHBoxLayout, QLabel, QScrollArea
|
||||
from PyQt5.QtCore import Qt
|
||||
from PyQt5.QtGui import QColor
|
||||
import logging
|
||||
from logline_leviathan.database.database_manager import EntitiesTable, DistinctEntitiesTable, EntityTypesTable, FileMetadata, session_scope
|
||||
|
||||
class CustomCheckBox(QCheckBox):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CustomCheckBox, self).__init__(*args, **kwargs)
|
||||
self.setMouseTracking(True) # Enable mouse tracking
|
||||
self.setStyleSheet("QCheckBox { color: white; }")
|
||||
|
||||
def mouseMoveEvent(self, event):
|
||||
QToolTip.showText(event.globalPos(), self.toolTip()) # Show tooltip at mouse position
|
||||
super(CustomCheckBox, self).mouseMoveEvent(event)
|
||||
|
||||
class FileCheckboxItem(QWidget):
|
||||
def __init__(self, text, parent=None):
|
||||
super(FileCheckboxItem, self).__init__(parent)
|
||||
layout = QHBoxLayout(self)
|
||||
|
||||
self.checkBox = QCheckBox()
|
||||
self.checkBox.setChecked(True)
|
||||
self.label = QLabel(text)
|
||||
self.label.setStyleSheet("QLabel { color: white; }") # Set text color
|
||||
|
||||
layout.addWidget(self.checkBox)
|
||||
layout.addWidget(self.label)
|
||||
layout.addStretch(1) # Add stretch factor to push content to the left
|
||||
|
||||
self.setLayout(layout)
|
||||
|
||||
class CheckboxPanel(QWidget):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
layout = QVBoxLayout(self)
|
||||
|
||||
self.treeWidget = QTreeWidget()
|
||||
self.treeWidget.setHeaderHidden(True)
|
||||
self.treeWidget.setStyleSheet("""
|
||||
QTreeWidget::branch {color: white; /* White color for branches */
|
||||
}
|
||||
""")
|
||||
layout.addWidget(self.treeWidget)
|
||||
|
||||
def _addChildren(self, parentItem, parent_entity_type, db_session, used_ids, depth=0):
|
||||
try:
|
||||
# Log the depth of recursion
|
||||
#logging.debug(f"Adding children at depth: {depth}, parent entity type: {parent_entity_type}")
|
||||
|
||||
child_entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parent_type == parent_entity_type).all()
|
||||
for child_entity_type in child_entity_types:
|
||||
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == child_entity_type.entity_type_id).count()
|
||||
text = f"{child_entity_type.gui_name}"
|
||||
childItem = QTreeWidgetItem(parentItem)
|
||||
|
||||
isCheckable = not child_entity_type.entity_type.startswith("category_")
|
||||
childItem.setFlags(childItem.flags() | Qt.ItemIsUserCheckable) if isCheckable else None
|
||||
childItem.setCheckState(0, Qt.Unchecked) if isCheckable else None
|
||||
text += f" ({count} Erwähnungen)" if isCheckable else ""
|
||||
childItem.setText(0, text)
|
||||
childItem.setToolTip(0, child_entity_type.gui_tooltip)
|
||||
childItem.entity_type_id = child_entity_type.entity_type_id
|
||||
childItem.entity_type = child_entity_type.entity_type
|
||||
if child_entity_type.entity_type_id in used_ids and not child_entity_type.parser_enabled == False:
|
||||
color = QColor('green')
|
||||
elif not child_entity_type.entity_type.startswith("category_") and not child_entity_type.parser_enabled == True:
|
||||
color = QColor('red')
|
||||
else:
|
||||
color = QColor('white')
|
||||
childItem.setForeground(0, color)
|
||||
|
||||
# Recursive call with increased depth
|
||||
depth = depth + 1
|
||||
self._addChildren(childItem, child_entity_type.entity_type, db_session, used_ids, depth)
|
||||
except Exception as e:
|
||||
logging.error(f"Error adding children: {e}")
|
||||
|
||||
|
||||
|
||||
def updateCheckboxes(self, db_session):
|
||||
#logging.info("Updating checkboxes with database content")
|
||||
with session_scope() as db_session:
|
||||
try:
|
||||
# Query database for entity types
|
||||
entity_types = db_session.query(EntityTypesTable).all()
|
||||
used_ids = {d.entity_types_id for d in db_session.query(DistinctEntitiesTable.entity_types_id).distinct()}
|
||||
#logging.debug(f"Used IDs: {used_ids}")
|
||||
# Clear existing items
|
||||
self.treeWidget.clear()
|
||||
rootItems = {}
|
||||
|
||||
# Construct hierarchical tree structure
|
||||
for entity_type in entity_types:
|
||||
if entity_type.parent_type != 'root': # Skip non-root items
|
||||
continue
|
||||
|
||||
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == entity_type.entity_type_id).count()
|
||||
text = f"{entity_type.gui_name}"
|
||||
treeItem = QTreeWidgetItem()
|
||||
treeItem.setToolTip(0, entity_type.gui_tooltip)
|
||||
treeItem.entity_type_id = entity_type.entity_type_id
|
||||
treeItem.entity_type = entity_type.entity_type
|
||||
if not entity_type.entity_type.startswith("category_"):
|
||||
treeItem.setFlags(treeItem.flags() | Qt.ItemIsUserCheckable)
|
||||
treeItem.setCheckState(0, Qt.Unchecked)
|
||||
text = f"{entity_type.gui_name} ({count} Erwähnungen)"
|
||||
treeItem.setText(0, text)
|
||||
# Add item to tree widget
|
||||
self.treeWidget.addTopLevelItem(treeItem)
|
||||
rootItems[entity_type.entity_type_id] = treeItem
|
||||
|
||||
# Call recursive function to add children
|
||||
self._addChildren(treeItem, entity_type.entity_type, db_session, used_ids)
|
||||
# Optionally expand all tree items
|
||||
self.treeWidget.expandAll()
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Error updating checkboxes", exc_info=True)
|
||||
|
||||
def filterCheckboxes(self, filter_text):
|
||||
def filterTreeItem(treeItem):
|
||||
# Check if the current item or any of its properties match the filter text
|
||||
try:
|
||||
match = filter_text.lower() in treeItem.text(0).lower() or filter_text.lower() in treeItem.toolTip(0).lower()
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking filter match for tree item: {e}")
|
||||
match = False
|
||||
|
||||
# Recursively check child items and set 'childMatch' if any child matches
|
||||
childMatch = False
|
||||
for j in range(treeItem.childCount()):
|
||||
if filterTreeItem(treeItem.child(j)):
|
||||
childMatch = True
|
||||
|
||||
# Unhide the item and its parents if there's a match in the item or its children
|
||||
if match or childMatch:
|
||||
treeItem.setHidden(False)
|
||||
parent = treeItem.parent()
|
||||
while parent:
|
||||
parent.setHidden(False)
|
||||
parent = parent.parent()
|
||||
return True
|
||||
else:
|
||||
treeItem.setHidden(True)
|
||||
return False
|
||||
|
||||
# Filter all top-level items
|
||||
for i in range(self.treeWidget.topLevelItemCount()):
|
||||
filterTreeItem(self.treeWidget.topLevelItem(i))
|
||||
|
||||
|
||||
def checkAllVisible(self):
|
||||
with session_scope() as db_session:
|
||||
used_ids = self.getUsedIds(db_session)
|
||||
self._setCheckStateForVisibleItems(Qt.Checked, used_ids)
|
||||
|
||||
def uncheckAllVisible(self):
|
||||
with session_scope() as db_session:
|
||||
used_ids = self.getUsedIds(db_session)
|
||||
self._setCheckStateForVisibleItems(Qt.Unchecked, used_ids)
|
||||
|
||||
|
||||
|
||||
def _setCheckStateForVisibleItems(self, state, used_ids):
|
||||
def setCheckState(item):
|
||||
try:
|
||||
if (item.flags() & Qt.ItemIsUserCheckable) and not item.isHidden(): # and item.parent():
|
||||
# Check if entity_type_id is in used_ids
|
||||
if hasattr(item, 'entity_type_id') and item.entity_type_id in used_ids:
|
||||
item.setCheckState(0, state)
|
||||
#logging.debug(f"Set check state for item with entity_type_id: {item.entity_type_id}")
|
||||
#else:
|
||||
#logging.debug(f"Item with entity_type_id: {getattr(item, 'entity_type_id', 'N/A')} skipped")
|
||||
|
||||
for i in range(item.childCount()):
|
||||
childItem = item.child(i)
|
||||
setCheckState(childItem)
|
||||
except Exception as e:
|
||||
logging.error(f"Error in setCheckState: {e}")
|
||||
|
||||
try:
|
||||
for i in range(self.treeWidget.topLevelItemCount()):
|
||||
topItem = self.treeWidget.topLevelItem(i)
|
||||
setCheckState(topItem)
|
||||
except Exception as e:
|
||||
logging.error(f"Error in _setCheckStateForVisibleItems: {e}")
|
||||
|
||||
|
||||
|
||||
def getUsedIds(self, db_session):
|
||||
# Assuming db_session is your database session object
|
||||
try:
|
||||
used_ids = {d.entity_types_id for d in db_session.query(DistinctEntitiesTable.entity_types_id).distinct()}
|
||||
return used_ids
|
||||
except Exception as e:
|
||||
logging.error(f"Error in getUsedIds: {e}")
|
||||
return set()
|
||||
|
||||
def expandAllTreeItems(self):
|
||||
for i in range(self.treeWidget.topLevelItemCount()):
|
||||
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), True)
|
||||
|
||||
def collapseAllTreeItems(self):
|
||||
for i in range(self.treeWidget.topLevelItemCount()):
|
||||
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), False)
|
||||
|
||||
def _expandCollapseRecursive(self, treeItem, expand=True):
|
||||
if treeItem is not None:
|
||||
treeItem.setExpanded(expand)
|
||||
for j in range(treeItem.childCount()):
|
||||
self._expandCollapseRecursive(treeItem.child(j), expand)
|
||||
|
||||
class DatabasePanel(QWidget):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
layout = QVBoxLayout(self)
|
||||
self.treeWidget = QTreeWidget()
|
||||
self.treeWidget.setHeaderHidden(True) # Hide the header
|
||||
self.treeWidget.setStyleSheet("""QTreeWidget::branch {color: white; /* White color for branches */}""")
|
||||
layout.addWidget(self.treeWidget)
|
||||
|
||||
|
||||
def _getTotalCountForChildren(self, entity_type, db_session):
|
||||
# Recursive function to get total count
|
||||
total_count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == entity_type.entity_type_id).count()
|
||||
child_entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parent_type == entity_type.entity_type).all()
|
||||
for child_entity_type in child_entity_types:
|
||||
total_count += self._getTotalCountForChildren(child_entity_type, db_session)
|
||||
return total_count
|
||||
|
||||
def _addChildren(self, parentItem, parent_entity_type, db_session, used_ids, depth=0):
|
||||
try:
|
||||
# Log the depth of recursion
|
||||
#logging.debug(f"Adding children at depth: {depth}, parent entity type: {parent_entity_type}")
|
||||
|
||||
child_entity_types = db_session.query(EntityTypesTable).filter(EntityTypesTable.parent_type == parent_entity_type).all()
|
||||
for child_entity_type in child_entity_types:
|
||||
if not child_entity_type.entity_type.startswith("category_"):
|
||||
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == child_entity_type.entity_type_id).count()
|
||||
text = f" {count} - {child_entity_type.gui_name} ({child_entity_type.entity_type})"
|
||||
else:
|
||||
# Use the new method to get the total count for this category
|
||||
total_count = self._getTotalCountForChildren(child_entity_type, db_session)
|
||||
text = f" {total_count} - {child_entity_type.gui_name} (Total)"
|
||||
|
||||
childItem = QTreeWidgetItem(parentItem)
|
||||
childItem.setText(0, text)
|
||||
childItem.setToolTip(0, child_entity_type.gui_tooltip)
|
||||
childItem.entity_type_id = child_entity_type.entity_type_id
|
||||
childItem.entity_type = child_entity_type.entity_type
|
||||
if child_entity_type.entity_type_id in used_ids and child_entity_type.parser_enabled:
|
||||
color = QColor('green')
|
||||
elif not child_entity_type.entity_type.startswith("category_") and not child_entity_type.parser_enabled:
|
||||
color = QColor('red')
|
||||
else:
|
||||
color = QColor('white')
|
||||
childItem.setForeground(0, color)
|
||||
|
||||
# Recursive call with increased depth
|
||||
depth = depth + 1
|
||||
self._addChildren(childItem, child_entity_type.entity_type, db_session, used_ids, depth)
|
||||
except Exception as e:
|
||||
logging.error(f"Error in _addChildren: {e}")
|
||||
|
||||
|
||||
|
||||
def updateTree(self, db_session):
|
||||
#logging.info("Updating checkboxes with database content")
|
||||
with session_scope() as db_session:
|
||||
try:
|
||||
# Query database for entity types
|
||||
entity_types = db_session.query(EntityTypesTable).all()
|
||||
used_ids = {d.entity_types_id for d in db_session.query(DistinctEntitiesTable.entity_types_id).distinct()}
|
||||
#logging.debug(f"Used IDs: {used_ids}")
|
||||
# Clear existing items
|
||||
self.treeWidget.clear()
|
||||
rootItems = {}
|
||||
|
||||
# Construct hierarchical tree structure
|
||||
for entity_type in entity_types:
|
||||
if entity_type.parent_type != 'root': # Skip non-root items
|
||||
continue
|
||||
|
||||
if not entity_type.entity_type.startswith("category_"):
|
||||
count = db_session.query(EntitiesTable).filter(EntitiesTable.entity_types_id == entity_type.entity_type_id).count()
|
||||
text = f"{count} - {entity_type.gui_name} {entity_type.entity_type}"
|
||||
else:
|
||||
# Use the new method to get the total count for this category
|
||||
total_count = self._getTotalCountForChildren(entity_type, db_session)
|
||||
text = f"{total_count} - {entity_type.gui_name} (Total)"
|
||||
|
||||
treeItem = QTreeWidgetItem()
|
||||
treeItem.setText(0, text)
|
||||
treeItem.setToolTip(0, entity_type.gui_tooltip)
|
||||
treeItem.entity_type_id = entity_type.entity_type_id
|
||||
treeItem.entity_type = entity_type.entity_type
|
||||
if entity_type.entity_type_id in used_ids and entity_type.parser_enabled:
|
||||
color = QColor('green')
|
||||
elif not entity_type.entity_type.startswith("category_") and not entity_type.parser_enabled:
|
||||
color = QColor('red')
|
||||
else:
|
||||
color = QColor('white')
|
||||
treeItem.setForeground(0, color)
|
||||
self.treeWidget.addTopLevelItem(treeItem)
|
||||
|
||||
# Call recursive function to add children
|
||||
self._addChildren(treeItem, entity_type.entity_type, db_session, used_ids)
|
||||
# Optionally expand all tree items
|
||||
self.treeWidget.expandAll()
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Error updating database tree", exc_info=True)
|
||||
|
||||
def expandAllTreeItems(self):
|
||||
for i in range(self.treeWidget.topLevelItemCount()):
|
||||
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), True)
|
||||
|
||||
def collapseAllTreeItems(self):
|
||||
for i in range(self.treeWidget.topLevelItemCount()):
|
||||
self._expandCollapseRecursive(self.treeWidget.topLevelItem(i), False)
|
||||
|
||||
def _expandCollapseRecursive(self, treeItem, expand=True):
|
||||
if treeItem is not None:
|
||||
treeItem.setExpanded(expand)
|
||||
for j in range(treeItem.childCount()):
|
||||
self._expandCollapseRecursive(treeItem.child(j), expand)
|
||||
|
||||
class FileCheckboxPanel(QWidget):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.mainLayout = QVBoxLayout(self)
|
||||
self.scrollArea = QScrollArea(self)
|
||||
self.scrollArea.setWidgetResizable(True)
|
||||
self.scrollAreaContents = QWidget()
|
||||
self.scrollLayout = QVBoxLayout(self.scrollAreaContents)
|
||||
self.scrollArea.setWidget(self.scrollAreaContents)
|
||||
|
||||
self.mainLayout.addWidget(self.scrollArea)
|
||||
self.items = [] # Keep track of the custom widgets
|
||||
|
||||
def updateCheckboxes(self, db_session):
|
||||
try:
|
||||
# Clear existing items
|
||||
for item in self.items:
|
||||
item.deleteLater()
|
||||
self.items.clear()
|
||||
with session_scope() as db_session:
|
||||
try:
|
||||
file_metadata = db_session.query(FileMetadata).all()
|
||||
for file in file_metadata:
|
||||
entity_count = db_session.query(EntitiesTable).filter(EntitiesTable.file_id == file.file_id).count()
|
||||
|
||||
item_text = f"{file.file_name} ({entity_count})"
|
||||
|
||||
custom_widget = FileCheckboxItem(item_text)
|
||||
self.scrollLayout.addWidget(custom_widget)
|
||||
self.items.append(custom_widget)
|
||||
|
||||
# Add a stretch to push everything up
|
||||
self.scrollLayout.addStretch(1)
|
||||
except Exception as e:
|
||||
logging.error("Error updating file checkboxes", exc_info=True)
|
||||
except Exception as e:
|
||||
logging.error("Error updating file checkboxes", exc_info=True)
|
||||
|
||||
def filterCheckboxes(self, filter_text):
|
||||
for item in self.items:
|
||||
if filter_text.lower() in item.label.text().lower():
|
||||
item.show()
|
||||
else:
|
||||
item.hide()
|
||||
|
||||
def checkAllVisible(self):
|
||||
for item in self.items:
|
||||
if not item.isHidden():
|
||||
item.checkBox.setChecked(True)
|
||||
|
||||
def uncheckAllVisible(self):
|
||||
for item in self.items:
|
||||
if not item.isHidden():
|
||||
item.checkBox.setChecked(False)
|
||||
|
||||
def getCheckedFiles(self):
|
||||
checked_files = []
|
||||
for custom_widget in self.items:
|
||||
if custom_widget.checkBox.isChecked():
|
||||
# Extract the file name from the item text
|
||||
file_name = custom_widget.label.text().split(" (")[0]
|
||||
checked_files.append(file_name)
|
||||
return checked_files
|
||||
|
||||
def _setCheckStateForVisibleItems(self, state):
|
||||
for custom_widget in self.items:
|
||||
if not custom_widget.isHidden():
|
||||
custom_widget.checkBox.setChecked(state)
|
77
logline_leviathan/gui/customize_results.py
Normal file
77
logline_leviathan/gui/customize_results.py
Normal file
@ -0,0 +1,77 @@
|
||||
from PyQt5.QtWidgets import QTableWidget, QTableWidgetItem, QDialog, QVBoxLayout, QHBoxLayout, QComboBox, QPushButton
|
||||
from logline_leviathan.database.database_manager import EntitiesTable, DistinctEntitiesTable, EntityTypesTable, ContextTable, FileMetadata, session_scope
|
||||
from sqlalchemy import func, label
|
||||
from sqlalchemy.orm import aliased
|
||||
|
||||
class CustomizeResultsDialog(QDialog):
|
||||
def __init__(self, parent=None):
|
||||
super().__init__(parent)
|
||||
self.db_session = session_scope()
|
||||
self.setWindowTitle("Report anpassen")
|
||||
stylesheet = """
|
||||
/* Style for the main window */
|
||||
QWidget {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
}
|
||||
|
||||
/* Style for buttons */
|
||||
QPushButton {
|
||||
background-color: #4B5563; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 60px;
|
||||
min-height: 20px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #6E6E6E; /* Slightly lighter grey on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #484848; /* Even darker grey when pressed */
|
||||
}
|
||||
"""
|
||||
self.layout = QVBoxLayout(self)
|
||||
self.setStyleSheet(stylesheet)
|
||||
|
||||
self.comboBoxLayout = QHBoxLayout()
|
||||
self.layout.addLayout(self.comboBoxLayout)
|
||||
|
||||
# Initially add one combo box
|
||||
self.addComboBox()
|
||||
|
||||
# Button to add more combo boxes
|
||||
self.addButton = QPushButton("Spalte hinzufügen", self)
|
||||
self.addButton.clicked.connect(self.addComboBox)
|
||||
self.layout.addWidget(self.addButton)
|
||||
|
||||
# OK and Cancel buttons
|
||||
self.okButton = QPushButton("OK", self)
|
||||
self.okButton.clicked.connect(self.accept)
|
||||
self.cancelButton = QPushButton("Abbruch", self)
|
||||
self.cancelButton.clicked.connect(self.reject)
|
||||
self.buttonLayout = QHBoxLayout()
|
||||
self.buttonLayout.addWidget(self.okButton)
|
||||
self.buttonLayout.addWidget(self.cancelButton)
|
||||
self.layout.addLayout(self.buttonLayout)
|
||||
|
||||
self.selectedColumns = []
|
||||
|
||||
def addComboBox(self):
|
||||
comboBox = QComboBox(self)
|
||||
comboBox.addItems(['Entitätentyp', 'Entität', 'Anzahl Erwähnungen', 'Dateiname', 'Zeilennummer', 'Zeitstempel', 'Kontext - gleiche Zeile', 'Kontext - mittelgroß', 'Kontext - umfangreich'])
|
||||
self.comboBoxLayout.addWidget(comboBox)
|
||||
|
||||
def comboBoxes(self):
|
||||
# Utility method to get all combo boxes
|
||||
return [self.comboBoxLayout.itemAt(i).widget() for i in range(self.comboBoxLayout.count())]
|
||||
|
||||
def on_accept(self):
|
||||
self.selectedColumns = [comboBox.currentText() for comboBox in self.comboBoxes()]
|
||||
self.accept()
|
||||
|
328
logline_leviathan/gui/db_browser.py
Normal file
328
logline_leviathan/gui/db_browser.py
Normal file
@ -0,0 +1,328 @@
|
||||
import os
|
||||
from PyQt5.QtWidgets import (QMessageBox, QWidget, QApplication,
|
||||
QFileDialog, QLabel, QPushButton, QGridLayout, QGroupBox, QHBoxLayout, QVBoxLayout, QLineEdit)
|
||||
from PyQt5.QtCore import Qt
|
||||
from logline_leviathan.gui.checkbox_panel import *
|
||||
from logline_leviathan.gui.ui_helper import UIHelper
|
||||
from logline_leviathan.database.database_manager import session_scope
|
||||
from logline_leviathan.exporter.wordlist_export import generate_wordlist
|
||||
from logline_leviathan.gui.checkbox_panel import *
|
||||
import shutil
|
||||
import glob
|
||||
|
||||
|
||||
|
||||
|
||||
class DBBrowserWindow(QWidget):
|
||||
def __init__(self, app):
|
||||
super().__init__()
|
||||
self.app = app
|
||||
|
||||
self.initialize_dbbrowser_window(app)
|
||||
|
||||
|
||||
|
||||
def initialize_dbbdrowser_window(dbbrowser_window, app):
|
||||
dbbrowser_window.setWindowTitle('Logline Leviathan - Database-Browser')
|
||||
dbbrowser_window.mainLayout = QVBoxLayout(dbbrowser_window)
|
||||
#dbbrowser_window.extendedLayout = QHBoxLayout(dbbrowser_window)
|
||||
dbbrowser_window.db_session = None
|
||||
stylesheet = """
|
||||
/* Style for the main window */
|
||||
QWidget {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
}
|
||||
|
||||
/* Style for buttons */
|
||||
QPushButton {
|
||||
background-color: #4B5563; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 50px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #6E6E6E; /* Slightly lighter grey on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #484848; /* Even darker grey when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
highlited_button_style = """
|
||||
QPushButton {
|
||||
background-color: #3C8CCE; /* Lighter blue background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 50px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #7EC0EE; /* Even lighter blue on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #4A86E8; /* Slightly darker blue when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
dbbrowser_window.setStyleSheet(stylesheet)
|
||||
dbbrowser_window.statusLabel = QLabel(' Erwarte Selektion der Entitätentypen', dbbrowser_window)
|
||||
dbbrowser_window.statusLabel.setWordWrap(True)
|
||||
dbbrowser_window.statusLabel.setMinimumHeight(40)
|
||||
dbbrowser_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
dbbrowser_window.mainLayout.addWidget(dbbrowser_window.statusLabel)
|
||||
# Create a GroupBox for the CheckboxPanel
|
||||
exportOptionsGroupBox = QGroupBox("SELEKTION", dbbrowser_window)
|
||||
exportOptionsLayout = QVBoxLayout(exportOptionsGroupBox)
|
||||
dbbrowser_window.checkboxPanel = CheckboxPanel()
|
||||
# Create a horizontal layout
|
||||
filterLayout = QHBoxLayout()
|
||||
|
||||
# Create the "Check All" button
|
||||
checkAllButton = QPushButton("Alle markieren", dbbrowser_window)
|
||||
checkAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.checkAllVisible())
|
||||
|
||||
# Create the "Uncheck All" button
|
||||
uncheckAllButton = QPushButton("Keine markieren", dbbrowser_window)
|
||||
uncheckAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.uncheckAllVisible())
|
||||
|
||||
expandAllButton = QPushButton("Expandieren", dbbrowser_window)
|
||||
expandAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.expandAllTreeItems())
|
||||
|
||||
collapseAllButton = QPushButton("Komprimieren", dbbrowser_window)
|
||||
collapseAllButton.clicked.connect(lambda: dbbrowser_window.checkboxPanel.collapseAllTreeItems())
|
||||
|
||||
# Add buttons to the filter layout, to the left of the filter label
|
||||
filterLayout.addWidget(checkAllButton)
|
||||
filterLayout.addWidget(uncheckAllButton)
|
||||
filterLayout.addWidget(expandAllButton)
|
||||
filterLayout.addWidget(collapseAllButton)
|
||||
|
||||
# Create the label for the filter
|
||||
filterLabel = QLabel("Filtern:")
|
||||
filterLayout.addWidget(filterLabel) # Add label to the horizontal layout
|
||||
|
||||
# Add Text Input for Filtering
|
||||
filterLineEdit = QLineEdit(dbbrowser_window)
|
||||
filterLineEdit.setPlaceholderText(" nach Typ, Tooltip oder Kurzbezeichnung filtern...")
|
||||
filterLineEdit.setStyleSheet("""
|
||||
QLineEdit {
|
||||
background-color: #3C4043; /* Background color */
|
||||
color: white; /* Text color */
|
||||
min-height: 20px;
|
||||
}
|
||||
""")
|
||||
filterLayout.addWidget(filterLineEdit) # Add line edit to the horizontal layout
|
||||
|
||||
exportOptionsLayout.addLayout(filterLayout) # Add the horizontal layout to the export options layout
|
||||
|
||||
|
||||
# Add CheckboxPanel to the GroupBox's Layout
|
||||
exportOptionsLayout.addWidget(dbbrowser_window.checkboxPanel)
|
||||
|
||||
|
||||
# Connect the textChanged signal of QLineEdit to a new method
|
||||
filterLineEdit.textChanged.connect(dbbrowser_window.checkboxPanel.filterCheckboxes)
|
||||
|
||||
dbbrowser_window.mainLayout.addWidget(exportOptionsGroupBox)
|
||||
|
||||
copyWordlistToParserDirButton = QPushButton('Soeben generierte Wordlist zur Analyse hinzufügen (kopiert erzeugte Datei)', dbbrowser_window)
|
||||
copyWordlistToParserDirButton.clicked.connect(dbbrowser_window.copyWordlistToParserDir)
|
||||
dbbrowser_window.mainLayout.addWidget(copyWordlistToParserDirButton)
|
||||
|
||||
# Exit Button Layout
|
||||
bottomLayout = QGridLayout()
|
||||
|
||||
|
||||
dbbrowser_window.openWordlistPathButton = QPushButton('Ziel-Dateipfad...', dbbrowser_window)
|
||||
dbbrowser_window.openWordlistPathButton.clicked.connect(dbbrowser_window.openWordlistPath)
|
||||
bottomLayout.addWidget(dbbrowser_window.openWordlistPathButton, 1, 1)
|
||||
|
||||
# Start Export Button
|
||||
dbbrowser_window.startExportButton = QPushButton('Wordlist erstellen', dbbrowser_window)
|
||||
dbbrowser_window.startExportButton.clicked.connect(dbbrowser_window.start_export_process)
|
||||
dbbrowser_window.startExportButton.setStyleSheet(highlited_button_style)
|
||||
bottomLayout.addWidget(dbbrowser_window.startExportButton, 1, 2)
|
||||
|
||||
|
||||
# Output File Directory
|
||||
dbbrowser_window.selectOutputFileButton = QPushButton('Zieldateipfad setzen...', dbbrowser_window)
|
||||
dbbrowser_window.selectOutputFileButton.clicked.connect(dbbrowser_window.selectOutputFile)
|
||||
bottomLayout.addWidget(dbbrowser_window.selectOutputFileButton, 2, 1)
|
||||
|
||||
# Exit Button
|
||||
dbbrowser_window.exitButton = QPushButton('Schließen', dbbrowser_window)
|
||||
dbbrowser_window.exitButton.clicked.connect(dbbrowser_window.close)
|
||||
bottomLayout.addWidget(dbbrowser_window.exitButton, 2, 2)
|
||||
|
||||
|
||||
|
||||
dbbrowser_window.crossmatchesCheckbox = QCheckBox('Nur Kreuztreffer (Entitäten, die in mehreren Dateien vorkommen)', dbbrowser_window)
|
||||
bottomLayout.addWidget(dbbrowser_window.crossmatchesCheckbox, 0, 1)
|
||||
|
||||
# Output File Path Label
|
||||
dbbrowser_window.WordlistPathLabel = QLabel('', dbbrowser_window)
|
||||
dbbrowser_window.updateWordlistPathLabel() # Call this method to set the initial text
|
||||
bottomLayout.addWidget(dbbrowser_window.WordlistPathLabel, 0, 2)
|
||||
|
||||
dbbrowser_window.mainLayout.addLayout(bottomLayout)
|
||||
dbbrowser_window.setLayout(dbbrowser_window.mainLayout)
|
||||
|
||||
|
||||
def updateCheckboxes(self):
|
||||
with session_scope() as session:
|
||||
self.checkboxPanel.updateCheckboxes(session)
|
||||
|
||||
def getSelectedCheckboxes(self):
|
||||
selected_checkboxes = []
|
||||
def traverseTreeItems(treeItem):
|
||||
if treeItem.checkState(0) == Qt.Checked:
|
||||
selected_checkboxes.append(treeItem)
|
||||
for i in range(treeItem.childCount()):
|
||||
traverseTreeItems(treeItem.child(i))
|
||||
for i in range(self.checkboxPanel.treeWidget.topLevelItemCount()):
|
||||
traverseTreeItems(self.checkboxPanel.treeWidget.topLevelItem(i))
|
||||
return selected_checkboxes
|
||||
|
||||
def updateWordlistPathLabel(self):
|
||||
outputDirPath = os.path.dirname(self.WordlistPath)
|
||||
display_text = f'{outputDirPath}/'
|
||||
self.WordlistPathLabel.setText(display_text)
|
||||
|
||||
def openWordlistPath(self):
|
||||
outputDirPath = os.path.dirname(self.WordlistPath)
|
||||
wordlistPath = os.path.join(outputDirPath, 'wordlist')
|
||||
self.ui_helper.openFile(wordlistPath)
|
||||
|
||||
|
||||
|
||||
def selectOutputFile(self):
|
||||
options = QFileDialog.Options()
|
||||
output_format = self.outputFormatList.currentItem().text().lower()
|
||||
extension_map = {'html': '.html', 'xlsx': '.xlsx'}
|
||||
default_extension = extension_map.get(output_format, '')
|
||||
|
||||
selected_file, _ = QFileDialog.getSaveFileName(
|
||||
self,
|
||||
"Selektieren des Ziel-Dateipfads",
|
||||
self.WordlistPath,
|
||||
f"{output_format.upper()} Files (*{default_extension});;All Files (*)",
|
||||
options=options
|
||||
)
|
||||
|
||||
if selected_file:
|
||||
if not selected_file.endswith(default_extension):
|
||||
selected_file += default_extension
|
||||
self.WordlistPath = selected_file
|
||||
self.outputDir = os.path.dirname(selected_file)
|
||||
self.updateWordlistPathLabel()
|
||||
|
||||
|
||||
def get_unique_filename(self, base_path):
|
||||
directory, filename = os.path.split(base_path)
|
||||
name, extension = os.path.splitext(filename)
|
||||
counter = 1
|
||||
|
||||
new_path = base_path
|
||||
while os.path.exists(new_path):
|
||||
new_filename = f"{name}_{counter}{extension}"
|
||||
new_path = os.path.join(directory, new_filename)
|
||||
counter += 1
|
||||
|
||||
return new_path
|
||||
|
||||
def copyWordlistToParserDir(self):
|
||||
try:
|
||||
# Path to the parser directory
|
||||
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
|
||||
|
||||
# Ensure the parser directory exists
|
||||
os.makedirs(parser_dir, exist_ok=True)
|
||||
|
||||
# Find the newest .txt file in the WordlistPath directory
|
||||
list_of_files = glob.glob(os.path.join(self.WordlistPath, '*.txt'))
|
||||
if not list_of_files:
|
||||
raise FileNotFoundError("No .txt files found in the WordlistPath directory.")
|
||||
|
||||
newest_file = max(list_of_files, key=os.path.getctime)
|
||||
|
||||
# Destination file path
|
||||
destination_file = os.path.join(parser_dir, 'generated_wordlist.txt')
|
||||
|
||||
# Copy and overwrite the newest file to the destination
|
||||
shutil.copy2(newest_file, destination_file)
|
||||
|
||||
self.statusLabel.setText(f" Wordlist erfolgreich kopiert nach {destination_file}")
|
||||
except Exception as e:
|
||||
self.message("Fehler beim kopieren", f"Fehler beim kopieren: {str(e)}")
|
||||
|
||||
|
||||
|
||||
def start_export_process(self):
|
||||
# Base filename for the wordlist file
|
||||
base_filename = "wordlist.txt"
|
||||
|
||||
# Construct the full path with the base filename
|
||||
full_output_path = os.path.join(self.WordlistPath, base_filename)
|
||||
|
||||
# Generate a unique filename to avoid overwriting existing files
|
||||
unique_output_path = self.get_unique_filename(full_output_path)
|
||||
|
||||
try:
|
||||
with session_scope() as session:
|
||||
selected_checkboxes = self.getSelectedCheckboxes()
|
||||
if not selected_checkboxes:
|
||||
self.message("Generieren nicht möglich", "Keine Typen selektiert. Auswahl vornehmen.")
|
||||
return
|
||||
|
||||
only_crossmatches = self.crossmatchesCheckbox.isChecked()
|
||||
|
||||
generate_wordlist(unique_output_path, session, selected_checkboxes, only_crossmatches)
|
||||
self.statusLabel.setText(f" Generierte Liste gespeichert unter {unique_output_path}")
|
||||
except Exception as e:
|
||||
self.statusLabel.setText(f" Fehler beim speichern: {str(e)}")
|
||||
logging.error(f"Export Error: {str(e)}")
|
||||
|
||||
|
||||
def message(self, title, text, extra_widget=None):
|
||||
msgBox = QMessageBox()
|
||||
msgBox.setStyleSheet("""
|
||||
QMessageBox {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
}
|
||||
QLabel {
|
||||
color: white; /* White text */
|
||||
}
|
||||
QPushButton {
|
||||
color: white; /* White text for buttons */
|
||||
background-color: #4B5563; /* Dark grey background for buttons */
|
||||
border-style: solid;
|
||||
border-width: 2px;
|
||||
border-radius: 5px;
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 80px;
|
||||
min-height: 30px;
|
||||
}
|
||||
""")
|
||||
msgBox.setIcon(QMessageBox.Warning)
|
||||
msgBox.setWindowTitle(title)
|
||||
msgBox.setText(text)
|
||||
if extra_widget:
|
||||
msgBox.setInformativeText('')
|
||||
msgBox.layout().addWidget(extra_widget, 1, 1)
|
||||
msgBox.exec_()
|
188
logline_leviathan/gui/generate_report.py
Normal file
188
logline_leviathan/gui/generate_report.py
Normal file
@ -0,0 +1,188 @@
|
||||
import os
|
||||
import logging
|
||||
from PyQt5.QtWidgets import (QMessageBox, QWidget, QApplication,
|
||||
QFileDialog)
|
||||
from PyQt5.QtCore import Qt
|
||||
from logline_leviathan.gui.initui_report_window import initialize_generate_report_window
|
||||
from logline_leviathan.gui.checkbox_panel import CheckboxPanel, FileCheckboxPanel
|
||||
from logline_leviathan.gui.ui_helper import UIHelper
|
||||
from logline_leviathan.gui.customize_results import CustomizeResultsDialog
|
||||
from logline_leviathan.database.database_manager import session_scope
|
||||
from logline_leviathan.exporter.html_export import generate_html_file
|
||||
from logline_leviathan.exporter.xlsx_export import generate_xlsx_file
|
||||
from logline_leviathan.exporter.nice_export import generate_niceoutput_file
|
||||
|
||||
|
||||
|
||||
|
||||
class GenerateReportWindow(QWidget):
|
||||
def __init__(self, app):
|
||||
super().__init__()
|
||||
self.app = app
|
||||
self.checkboxPanel = CheckboxPanel()
|
||||
self.fileCheckboxPanel = FileCheckboxPanel()
|
||||
self.ui_helper = UIHelper(self)
|
||||
self.outputFilePath = os.path.join(os.getcwd(), 'output')
|
||||
|
||||
initialize_generate_report_window(self, app)
|
||||
|
||||
self.updateCheckboxes()
|
||||
|
||||
|
||||
|
||||
|
||||
def updateCheckboxes(self):
|
||||
with session_scope() as session:
|
||||
self.checkboxPanel.updateCheckboxes(session)
|
||||
self.fileCheckboxPanel.updateCheckboxes(session)
|
||||
|
||||
def getSelectedCheckboxes(self):
|
||||
selected_checkboxes = []
|
||||
def traverseTreeItems(treeItem):
|
||||
if treeItem.checkState(0) == Qt.Checked:
|
||||
selected_checkboxes.append(treeItem)
|
||||
for i in range(treeItem.childCount()):
|
||||
traverseTreeItems(treeItem.child(i))
|
||||
for i in range(self.checkboxPanel.treeWidget.topLevelItemCount()):
|
||||
traverseTreeItems(self.checkboxPanel.treeWidget.topLevelItem(i))
|
||||
return selected_checkboxes
|
||||
|
||||
def updateOutputFilePathLabel(self):
|
||||
self.outputFilePathLabel.setText(self.outputFilePath)
|
||||
|
||||
def openOutputFilepath(self):
|
||||
if not os.path.isdir(self.outputFilePath):
|
||||
self.outputFilePath = os.path.dirname(self.outputFilePath)
|
||||
self.ui_helper.openFile(self.outputFilePath)
|
||||
|
||||
def openCustomizeResultsDialog(self):
|
||||
dialog = CustomizeResultsDialog()
|
||||
if dialog.exec_():
|
||||
selected_columns = [dialog.comboBoxLayout.itemAt(i).widget().currentText() for i in range(dialog.comboBoxLayout.count())]
|
||||
|
||||
def start_export_process(self):
|
||||
current_item = self.outputFormatList.currentItem()
|
||||
if current_item is not None:
|
||||
output_format = current_item.text().lower()
|
||||
extension_map = {'html': '.html', 'interactive html': '.html', 'xlsx': '.xlsx'}
|
||||
selected_extension = extension_map.get(output_format, '.html')
|
||||
only_crossmatches = self.crossmatchesCheckbox.isChecked()
|
||||
|
||||
include_flagged = self.flaggedEntriesCheckbox.isChecked()
|
||||
only_flagged = self.flaggedRadioButton.isChecked()
|
||||
only_unflagged = self.notflaggedRadioButton.isChecked()
|
||||
|
||||
# Get custom filename from QLineEdit or use default
|
||||
custom_filename = self.setOutputFileNameLineEdit.text().strip()
|
||||
if not custom_filename:
|
||||
custom_filename = "entities_export" # Default filename if not specified
|
||||
initial_output_path = os.path.join(self.outputFilePath, f"{custom_filename}{selected_extension}")
|
||||
|
||||
unique_output_path = self.get_unique_filename(initial_output_path)
|
||||
|
||||
|
||||
try:
|
||||
with session_scope() as session:
|
||||
selected_checkboxes = self.getSelectedCheckboxes() # Get selected checkboxes from the tree
|
||||
selected_files = self.fileCheckboxPanel.getCheckedFiles()
|
||||
if not selected_checkboxes:
|
||||
self.message("Export nicht möglich", "Keine Entitäten ausgewählt. Auf dem Panel eine Selektion vornehmen.")
|
||||
return
|
||||
if not selected_files:
|
||||
self.message("Export nicht möglich", "Keine Dateien ausgewählt. Auf dem Panel eine Selektion vornehmen.")
|
||||
return
|
||||
if self.timestampFilterCheckbox.isChecked():
|
||||
start_date = self.startDateEdit.date().toPyDate()
|
||||
end_date = self.endDateEdit.date().toPyDate()
|
||||
else:
|
||||
start_date = end_date = None
|
||||
if output_format == 'html':
|
||||
logging.debug(f"only_crossmatches: {only_crossmatches}")
|
||||
generate_html_file(unique_output_path, session, selected_checkboxes, self.fileCheckboxPanel, self.exportContextList.currentItem().text(), only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
|
||||
elif output_format == 'interactive html':
|
||||
logging.debug(f"only_crossmatches: {only_crossmatches}")
|
||||
generate_niceoutput_file(unique_output_path, session, selected_checkboxes, self.fileCheckboxPanel, self.exportContextList.currentItem().text(), only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
|
||||
elif output_format == 'xlsx':
|
||||
logging.debug(f"only_crossmatches: {only_crossmatches}")
|
||||
generate_xlsx_file(unique_output_path, session, selected_checkboxes, self.fileCheckboxPanel, self.exportContextList.currentItem().text(), only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {output_format}")
|
||||
|
||||
self.statusLabel.setText(f" Export gespeichert unter: {unique_output_path}")
|
||||
# Check if 'Open After Export' is checked, and open the file if so
|
||||
if self.openAfterExportCheckbox.isChecked():
|
||||
if os.path.exists(unique_output_path):
|
||||
self.ui_helper.openFile(unique_output_path)
|
||||
|
||||
except Exception as e:
|
||||
self.statusLabel.setText(f" Export Error: {str(e)}")
|
||||
logging.error(f"Export Error: {str(e)}")
|
||||
else:
|
||||
self.message("Export nicht möglich", "Ausgabeformat und Umfang des Kontexts spezifizieren.")
|
||||
|
||||
def selectOutputFile(self):
|
||||
options = QFileDialog.Options()
|
||||
# Set the dialog for directory selection
|
||||
options |= QFileDialog.ShowDirsOnly
|
||||
|
||||
# Open a dialog to select a directory
|
||||
selected_directory = QFileDialog.getExistingDirectory(
|
||||
self,
|
||||
"Select Output Directory",
|
||||
self.outputFilePath, # Start at the current output file path
|
||||
options=options
|
||||
)
|
||||
|
||||
# If a directory was selected, update the output file path
|
||||
if selected_directory:
|
||||
self.outputFilePath = selected_directory
|
||||
self.updateOutputFilePathLabel() # Update any labels or UI components as necessary
|
||||
|
||||
|
||||
|
||||
def get_unique_filename(self, base_path):
|
||||
directory, filename = os.path.split(base_path)
|
||||
name, extension = os.path.splitext(filename)
|
||||
counter = 1
|
||||
|
||||
new_path = base_path
|
||||
while os.path.exists(new_path):
|
||||
new_filename = f"{name}_{counter}{extension}"
|
||||
new_path = os.path.join(directory, new_filename)
|
||||
counter += 1
|
||||
|
||||
return new_path
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def message(self, title, text, extra_widget=None):
|
||||
msgBox = QMessageBox()
|
||||
msgBox.setStyleSheet("""
|
||||
QMessageBox {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
}
|
||||
QLabel {
|
||||
color: white; /* White text */
|
||||
}
|
||||
QPushButton {
|
||||
color: white; /* White text for buttons */
|
||||
background-color: #4B5563; /* Dark grey background for buttons */
|
||||
border-style: solid;
|
||||
border-width: 2px;
|
||||
border-radius: 5px;
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 80px;
|
||||
min-height: 30px;
|
||||
}
|
||||
""")
|
||||
msgBox.setIcon(QMessageBox.Warning)
|
||||
msgBox.setWindowTitle(title)
|
||||
msgBox.setText(text)
|
||||
if extra_widget:
|
||||
msgBox.setInformativeText('')
|
||||
msgBox.layout().addWidget(extra_widget, 1, 1)
|
||||
msgBox.exec_()
|
416
logline_leviathan/gui/generate_wordlist.py
Normal file
416
logline_leviathan/gui/generate_wordlist.py
Normal file
@ -0,0 +1,416 @@
|
||||
import os
|
||||
from PyQt5.QtWidgets import (QMessageBox, QWidget, QRadioButton,
|
||||
QFileDialog, QLabel, QPushButton, QGridLayout, QGroupBox, QHBoxLayout, QVBoxLayout, QLineEdit, QDateTimeEdit)
|
||||
from PyQt5.QtCore import Qt, QDate
|
||||
from logline_leviathan.gui.checkbox_panel import *
|
||||
from logline_leviathan.gui.ui_helper import UIHelper
|
||||
from logline_leviathan.database.database_manager import session_scope
|
||||
from logline_leviathan.database.database_operations import DatabaseOperations
|
||||
from logline_leviathan.exporter.wordlist_export import generate_wordlist
|
||||
from logline_leviathan.gui.checkbox_panel import *
|
||||
import shutil
|
||||
import glob
|
||||
|
||||
|
||||
|
||||
|
||||
class GenerateWordlistWindow(QWidget):
|
||||
def __init__(self, app):
|
||||
super().__init__()
|
||||
self.app = app
|
||||
self.checkboxPanel = CheckboxPanel()
|
||||
self.database_operations = DatabaseOperations(self, app)
|
||||
self.ui_helper = UIHelper(self)
|
||||
self.WordlistPath = os.path.join(os.getcwd(), 'data', 'wordlist')
|
||||
os.makedirs(self.WordlistPath, exist_ok=True)
|
||||
|
||||
self.initialize_generate_wordlist_window(app)
|
||||
|
||||
|
||||
self.updateCheckboxes()
|
||||
|
||||
|
||||
|
||||
def initialize_generate_wordlist_window(generate_wordlist_window, app):
|
||||
generate_wordlist_window.setWindowTitle('Logline Leviathan - Wordlist-Generator')
|
||||
generate_wordlist_window.mainLayout = QVBoxLayout(generate_wordlist_window)
|
||||
#generate_wordlist_window.extendedLayout = QHBoxLayout(generate_wordlist_window)
|
||||
generate_wordlist_window.db_session = None
|
||||
stylesheet = """
|
||||
/* Style for the main window */
|
||||
QWidget {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
}
|
||||
|
||||
/* Style for buttons */
|
||||
QPushButton {
|
||||
background-color: #4B5563; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 50px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #6E6E6E; /* Slightly lighter grey on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #484848; /* Even darker grey when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
highlited_button_style = """
|
||||
QPushButton {
|
||||
background-color: #3C8CCE; /* Lighter blue background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 50px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #7EC0EE; /* Even lighter blue on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #4A86E8; /* Slightly darker blue when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
generate_wordlist_window.setStyleSheet(stylesheet)
|
||||
generate_wordlist_window.statusLabel = QLabel(' Erwarte Selektion der Entitätentypen', generate_wordlist_window)
|
||||
generate_wordlist_window.statusLabel.setWordWrap(True)
|
||||
generate_wordlist_window.statusLabel.setMinimumHeight(40)
|
||||
generate_wordlist_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
generate_wordlist_window.mainLayout.addWidget(generate_wordlist_window.statusLabel)
|
||||
# Create a GroupBox for the CheckboxPanel
|
||||
exportOptionsGroupBox = QGroupBox("SELEKTION", generate_wordlist_window)
|
||||
exportOptionsLayout = QVBoxLayout(exportOptionsGroupBox)
|
||||
generate_wordlist_window.checkboxPanel = CheckboxPanel()
|
||||
# Create a horizontal layout
|
||||
filterLayout = QHBoxLayout()
|
||||
|
||||
# Create the "Check All" button
|
||||
checkAllButton = QPushButton("Alle markieren", generate_wordlist_window)
|
||||
checkAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.checkAllVisible())
|
||||
|
||||
# Create the "Uncheck All" button
|
||||
uncheckAllButton = QPushButton("Keine markieren", generate_wordlist_window)
|
||||
uncheckAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.uncheckAllVisible())
|
||||
|
||||
expandAllButton = QPushButton("Expandieren", generate_wordlist_window)
|
||||
expandAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.expandAllTreeItems())
|
||||
|
||||
collapseAllButton = QPushButton("Komprimieren", generate_wordlist_window)
|
||||
collapseAllButton.clicked.connect(lambda: generate_wordlist_window.checkboxPanel.collapseAllTreeItems())
|
||||
|
||||
# Add buttons to the filter layout, to the left of the filter label
|
||||
filterLayout.addWidget(checkAllButton)
|
||||
filterLayout.addWidget(uncheckAllButton)
|
||||
filterLayout.addWidget(expandAllButton)
|
||||
filterLayout.addWidget(collapseAllButton)
|
||||
|
||||
# Create the label for the filter
|
||||
filterLabel = QLabel("Filtern:")
|
||||
filterLayout.addWidget(filterLabel) # Add label to the horizontal layout
|
||||
|
||||
# Add Text Input for Filtering
|
||||
filterLineEdit = QLineEdit(generate_wordlist_window)
|
||||
filterLineEdit.setPlaceholderText(" nach Typ, Tooltip oder Kurzbezeichnung filtern...")
|
||||
filterLineEdit.setStyleSheet("""
|
||||
QLineEdit {
|
||||
background-color: #3C4043; /* Background color */
|
||||
color: white; /* Text color */
|
||||
min-height: 20px;
|
||||
}
|
||||
""")
|
||||
filterLayout.addWidget(filterLineEdit) # Add line edit to the horizontal layout
|
||||
|
||||
exportOptionsLayout.addLayout(filterLayout) # Add the horizontal layout to the export options layout
|
||||
|
||||
|
||||
# Add CheckboxPanel to the GroupBox's Layout
|
||||
exportOptionsLayout.addWidget(generate_wordlist_window.checkboxPanel)
|
||||
|
||||
|
||||
# Connect the textChanged signal of QLineEdit to a new method
|
||||
filterLineEdit.textChanged.connect(generate_wordlist_window.checkboxPanel.filterCheckboxes)
|
||||
|
||||
generate_wordlist_window.mainLayout.addWidget(exportOptionsGroupBox)
|
||||
|
||||
copyWordlistToParserDirButton = QPushButton('Soeben generierte Wordlist zur Analyse hinzufügen (kopiert erzeugte Datei)', generate_wordlist_window)
|
||||
copyWordlistToParserDirButton.clicked.connect(generate_wordlist_window.copyWordlistToParserDir)
|
||||
generate_wordlist_window.mainLayout.addWidget(copyWordlistToParserDirButton)
|
||||
|
||||
purgeWordlistEntriesButton = QPushButton('Alte Wordlist-Eintraege aus Datenbank entfernen (empfohlen, sofern neue Wordlist generiert)', generate_wordlist_window)
|
||||
purgeWordlistEntriesButton.clicked.connect(generate_wordlist_window.purgeWordlistEntries)
|
||||
generate_wordlist_window.mainLayout.addWidget(purgeWordlistEntriesButton)
|
||||
|
||||
twoWordlistButtonsLayout = QHBoxLayout()
|
||||
|
||||
openActiveWordlistButton = QPushButton('Bestehende Wordlist oeffnen', generate_wordlist_window)
|
||||
openActiveWordlistButton.clicked.connect(generate_wordlist_window.openActiveWordlist)
|
||||
twoWordlistButtonsLayout.addWidget(openActiveWordlistButton)
|
||||
|
||||
deleteActiveWordlistButton = QPushButton('Bestehende Wordlist entfernen', generate_wordlist_window)
|
||||
deleteActiveWordlistButton.clicked.connect(generate_wordlist_window.deleteActiveWordlist)
|
||||
twoWordlistButtonsLayout.addWidget(deleteActiveWordlistButton)
|
||||
|
||||
generate_wordlist_window.mainLayout.addLayout(twoWordlistButtonsLayout)
|
||||
|
||||
# Exit Button Layout
|
||||
bottomLayout = QGridLayout()
|
||||
|
||||
|
||||
generate_wordlist_window.openWordlistPathButton = QPushButton('Wordlist-Dateipfad...', generate_wordlist_window)
|
||||
generate_wordlist_window.openWordlistPathButton.clicked.connect(generate_wordlist_window.openWordlistPath)
|
||||
bottomLayout.addWidget(generate_wordlist_window.openWordlistPathButton, 3, 1)
|
||||
|
||||
# Start Export Button
|
||||
generate_wordlist_window.startExportButton = QPushButton('Wordlist erstellen', generate_wordlist_window)
|
||||
generate_wordlist_window.startExportButton.clicked.connect(generate_wordlist_window.start_export_process)
|
||||
generate_wordlist_window.startExportButton.setStyleSheet(highlited_button_style)
|
||||
bottomLayout.addWidget(generate_wordlist_window.startExportButton, 3, 2)
|
||||
|
||||
|
||||
# Output File Directory
|
||||
generate_wordlist_window.selectOutputFileButton = QPushButton('Wordlist-Ausgabepfad setzen...', generate_wordlist_window)
|
||||
generate_wordlist_window.selectOutputFileButton.clicked.connect(generate_wordlist_window.selectOutputFile)
|
||||
bottomLayout.addWidget(generate_wordlist_window.selectOutputFileButton, 4, 1)
|
||||
|
||||
# Exit Button
|
||||
generate_wordlist_window.exitButton = QPushButton('Schließen', generate_wordlist_window)
|
||||
generate_wordlist_window.exitButton.clicked.connect(generate_wordlist_window.close)
|
||||
bottomLayout.addWidget(generate_wordlist_window.exitButton, 4, 2)
|
||||
|
||||
|
||||
|
||||
generate_wordlist_window.crossmatchesCheckbox = QCheckBox('Nur Kreuztreffer (Entitäten, die in mehreren Dateien vorkommen)', generate_wordlist_window)
|
||||
bottomLayout.addWidget(generate_wordlist_window.crossmatchesCheckbox, 0, 1)
|
||||
|
||||
generate_wordlist_window.timestampFilterCheckbox = QCheckBox('Nach Zeitstempel filtern:', generate_wordlist_window)
|
||||
generate_wordlist_window.startDateEdit = QDateTimeEdit(generate_wordlist_window)
|
||||
generate_wordlist_window.startDateEdit.setCalendarPopup(True)
|
||||
generate_wordlist_window.startDateEdit.setDate(QDate.currentDate())
|
||||
generate_wordlist_window.endDateEdit = QDateTimeEdit(generate_wordlist_window)
|
||||
generate_wordlist_window.endDateEdit.setCalendarPopup(True)
|
||||
generate_wordlist_window.endDateEdit.setDate(QDate.currentDate())
|
||||
generate_wordlist_window.timestampFilterQHBoxLayout = QHBoxLayout()
|
||||
generate_wordlist_window.timestampFilterQHBoxLayout.addWidget(generate_wordlist_window.timestampFilterCheckbox)
|
||||
generate_wordlist_window.timestampFilterQHBoxLayout.addWidget(generate_wordlist_window.startDateEdit)
|
||||
generate_wordlist_window.timestampFilterQHBoxLayout.addWidget(generate_wordlist_window.endDateEdit)
|
||||
bottomLayout.addLayout(generate_wordlist_window.timestampFilterQHBoxLayout, 1, 1)
|
||||
|
||||
generate_wordlist_window.flaggedEntriesLayout = QHBoxLayout()
|
||||
generate_wordlist_window.flaggedEntriesCheckbox = QCheckBox('Markierte Einträge berücksichtigen', generate_wordlist_window)
|
||||
generate_wordlist_window.flaggedEntriesLayout.addWidget(generate_wordlist_window.flaggedEntriesCheckbox)
|
||||
generate_wordlist_window.flaggedRadioButtonLayout = QHBoxLayout()
|
||||
generate_wordlist_window.flaggedRadioButton = QRadioButton('Nur markierte Einträge')
|
||||
generate_wordlist_window.notflaggedRadioButton = QRadioButton('Nur nicht markierte Einträge')
|
||||
generate_wordlist_window.flaggedRadioButtonLayout.addWidget(generate_wordlist_window.flaggedRadioButton)
|
||||
generate_wordlist_window.flaggedRadioButtonLayout.addWidget(generate_wordlist_window.notflaggedRadioButton)
|
||||
generate_wordlist_window.flaggedRadioButton.setChecked(True)
|
||||
generate_wordlist_window.flaggedEntriesLayout.addLayout(generate_wordlist_window.flaggedRadioButtonLayout)
|
||||
bottomLayout.addLayout(generate_wordlist_window.flaggedEntriesLayout, 2, 1)
|
||||
|
||||
# Output File Path Label
|
||||
generate_wordlist_window.WordlistPathLabel = QLabel('', generate_wordlist_window)
|
||||
generate_wordlist_window.updateWordlistPathLabel() # Call this method to set the initial text
|
||||
bottomLayout.addWidget(generate_wordlist_window.WordlistPathLabel, 0, 2)
|
||||
|
||||
generate_wordlist_window.mainLayout.addLayout(bottomLayout)
|
||||
generate_wordlist_window.setLayout(generate_wordlist_window.mainLayout)
|
||||
|
||||
|
||||
def updateCheckboxes(self):
|
||||
with session_scope() as session:
|
||||
self.checkboxPanel.updateCheckboxes(session)
|
||||
|
||||
def getSelectedCheckboxes(self):
|
||||
selected_checkboxes = []
|
||||
def traverseTreeItems(treeItem):
|
||||
if treeItem.checkState(0) == Qt.Checked:
|
||||
selected_checkboxes.append(treeItem)
|
||||
for i in range(treeItem.childCount()):
|
||||
traverseTreeItems(treeItem.child(i))
|
||||
for i in range(self.checkboxPanel.treeWidget.topLevelItemCount()):
|
||||
traverseTreeItems(self.checkboxPanel.treeWidget.topLevelItem(i))
|
||||
return selected_checkboxes
|
||||
|
||||
def updateWordlistPathLabel(self):
|
||||
outputDirPath = os.path.dirname(self.WordlistPath)
|
||||
display_text = f'{outputDirPath}/'
|
||||
self.WordlistPathLabel.setText(display_text)
|
||||
|
||||
def openWordlistPath(self):
|
||||
outputDirPath = os.path.dirname(self.WordlistPath)
|
||||
wordlistPath = os.path.join(outputDirPath, 'wordlist')
|
||||
self.ui_helper.openFile(wordlistPath)
|
||||
|
||||
|
||||
|
||||
def selectOutputFile(self):
|
||||
options = QFileDialog.Options()
|
||||
output_format = self.outputFormatList.currentItem().text().lower()
|
||||
extension_map = {'html': '.html', 'xlsx': '.xlsx'}
|
||||
default_extension = extension_map.get(output_format, '')
|
||||
|
||||
selected_file, _ = QFileDialog.getSaveFileName(
|
||||
self,
|
||||
"Selektieren des Ziel-Dateipfads",
|
||||
self.WordlistPath,
|
||||
f"{output_format.upper()} Files (*{default_extension});;All Files (*)",
|
||||
options=options
|
||||
)
|
||||
|
||||
if selected_file:
|
||||
if not selected_file.endswith(default_extension):
|
||||
selected_file += default_extension
|
||||
self.WordlistPath = selected_file
|
||||
self.outputDir = os.path.dirname(selected_file)
|
||||
self.updateWordlistPathLabel()
|
||||
|
||||
|
||||
def get_unique_filename(self, base_path):
|
||||
directory, filename = os.path.split(base_path)
|
||||
name, extension = os.path.splitext(filename)
|
||||
counter = 1
|
||||
|
||||
new_path = base_path
|
||||
while os.path.exists(new_path):
|
||||
new_filename = f"{name}_{counter}{extension}"
|
||||
new_path = os.path.join(directory, new_filename)
|
||||
counter += 1
|
||||
|
||||
return new_path
|
||||
|
||||
def copyWordlistToParserDir(self):
|
||||
try:
|
||||
# Path to the parser directory
|
||||
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
|
||||
|
||||
# Ensure the parser directory exists
|
||||
os.makedirs(parser_dir, exist_ok=True)
|
||||
|
||||
# Find the newest .txt file in the WordlistPath directory
|
||||
list_of_files = glob.glob(os.path.join(self.WordlistPath, '*.txt'))
|
||||
if not list_of_files:
|
||||
raise FileNotFoundError("No .txt files found in the WordlistPath directory.")
|
||||
|
||||
newest_file = max(list_of_files, key=os.path.getctime)
|
||||
|
||||
# Destination file path
|
||||
destination_file = os.path.join(parser_dir, 'generated_wordlist.txt')
|
||||
|
||||
# Copy and overwrite the newest file to the destination
|
||||
shutil.copy2(newest_file, destination_file)
|
||||
|
||||
self.statusLabel.setText(f" Wordlist erfolgreich kopiert nach {destination_file}")
|
||||
except Exception as e:
|
||||
self.message("Fehler beim kopieren", f"Fehler beim kopieren: {str(e)}")
|
||||
|
||||
def openActiveWordlist(self):
|
||||
try:
|
||||
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
|
||||
wordlist_file = os.path.join(parser_dir, 'generated_wordlist.txt')
|
||||
|
||||
if os.path.exists(wordlist_file):
|
||||
self.ui_helper.openFile(wordlist_file)
|
||||
else:
|
||||
raise FileNotFoundError("Wordlist file not found.")
|
||||
|
||||
except Exception as e:
|
||||
self.message("Fehler beim Öffnen", f"Fehler beim Öffnen: {str(e)}")
|
||||
|
||||
def deleteActiveWordlist(self):
|
||||
try:
|
||||
parser_dir = os.path.join(os.getcwd(), 'data', 'parser')
|
||||
wordlist_file = os.path.join(parser_dir, 'generated_wordlist.txt')
|
||||
|
||||
if os.path.exists(wordlist_file):
|
||||
os.remove(wordlist_file)
|
||||
self.statusLabel.setText(" Wordlist erfolgreich gelöscht.")
|
||||
else:
|
||||
raise FileNotFoundError("Wordlist file not found.")
|
||||
|
||||
except Exception as e:
|
||||
self.message("Fehler beim Löschen", f"Fehler beim Löschen: {str(e)}")
|
||||
|
||||
def start_export_process(self):
|
||||
# Base filename for the wordlist file
|
||||
base_filename = "wordlist.txt"
|
||||
|
||||
# Construct the full path with the base filename
|
||||
full_output_path = os.path.join(self.WordlistPath, base_filename)
|
||||
|
||||
# Generate a unique filename to avoid overwriting existing files
|
||||
unique_output_path = self.get_unique_filename(full_output_path)
|
||||
|
||||
# Retrieve dates from QDateEdit widgets
|
||||
start_date = self.startDateEdit.date().toPyDate() if self.timestampFilterCheckbox.isChecked() else None
|
||||
end_date = self.endDateEdit.date().toPyDate() if self.timestampFilterCheckbox.isChecked() else None
|
||||
|
||||
include_flagged = self.flaggedEntriesCheckbox.isChecked()
|
||||
only_flagged = self.flaggedRadioButton.isChecked()
|
||||
only_unflagged = self.notflaggedRadioButton.isChecked()
|
||||
|
||||
try:
|
||||
with session_scope() as session:
|
||||
selected_checkboxes = self.getSelectedCheckboxes()
|
||||
if not selected_checkboxes:
|
||||
self.message("Generieren nicht möglich", "Keine Typen selektiert. Auswahl vornehmen.")
|
||||
return
|
||||
|
||||
only_crossmatches = self.crossmatchesCheckbox.isChecked()
|
||||
|
||||
# Call the generate_wordlist function with timestamp parameters
|
||||
generate_wordlist(unique_output_path, session, selected_checkboxes, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged)
|
||||
self.statusLabel.setText(f" Generierte Liste gespeichert unter {unique_output_path}")
|
||||
except Exception as e:
|
||||
self.statusLabel.setText(f" Fehler beim speichern: {str(e)}")
|
||||
logging.error(f"Export Error: {str(e)}")
|
||||
|
||||
|
||||
def purgeWordlistEntries(self):
|
||||
self.database_operations.purgeWordlistEntries()
|
||||
self.updateCheckboxes()
|
||||
|
||||
def message(self, title, text, extra_widget=None):
|
||||
msgBox = QMessageBox()
|
||||
msgBox.setStyleSheet("""
|
||||
QMessageBox {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
}
|
||||
QLabel {
|
||||
color: white; /* White text */
|
||||
}
|
||||
QPushButton {
|
||||
color: white; /* White text for buttons */
|
||||
background-color: #4B5563; /* Dark grey background for buttons */
|
||||
border-style: solid;
|
||||
border-width: 2px;
|
||||
border-radius: 5px;
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 80px;
|
||||
min-height: 30px;
|
||||
}
|
||||
""")
|
||||
msgBox.setIcon(QMessageBox.Warning)
|
||||
msgBox.setWindowTitle(title)
|
||||
msgBox.setText(text)
|
||||
if extra_widget:
|
||||
msgBox.setInformativeText('')
|
||||
msgBox.layout().addWidget(extra_widget, 1, 1)
|
||||
msgBox.exec_()
|
306
logline_leviathan/gui/initui_mainwindow.py
Normal file
306
logline_leviathan/gui/initui_mainwindow.py
Normal file
@ -0,0 +1,306 @@
|
||||
import os
|
||||
from PyQt5.QtWidgets import (QGridLayout, QPushButton, QLabel, QHBoxLayout, QApplication,
|
||||
QVBoxLayout, QProgressBar, QGroupBox)
|
||||
from PyQt5.QtGui import QPixmap
|
||||
from PyQt5.QtCore import Qt
|
||||
import logline_leviathan.gui.versionvars as versionvars
|
||||
from logline_leviathan.gui.query_window import QueryLineEdit
|
||||
|
||||
QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
|
||||
|
||||
|
||||
|
||||
|
||||
def initialize_main_window(main_window, app):
|
||||
main_window.setWindowTitle('Logline Leviathan')
|
||||
main_window.mainLayout = QVBoxLayout(main_window)
|
||||
#main_window.extendedLayout = QHBoxLayout(main_window)
|
||||
main_window.db_session = None
|
||||
|
||||
# Logo
|
||||
pixmap = QPixmap(os.path.join('logline_leviathan', 'gui', 'logo.png'))
|
||||
scaled_pixmap = pixmap.scaled(400, 400, Qt.KeepAspectRatio, Qt.SmoothTransformation)
|
||||
logoLabel = QLabel(main_window)
|
||||
logoLabel.setPixmap(scaled_pixmap)
|
||||
# Version label
|
||||
versionLabel = QLabel(versionvars.version_string, main_window) # Replace X.X.X with your actual version number
|
||||
versionLabel.setAlignment(Qt.AlignLeft | Qt.AlignVCenter)
|
||||
|
||||
# Horizontal layout
|
||||
hbox = QHBoxLayout()
|
||||
hbox.addWidget(versionLabel) # Add version label to the left
|
||||
hbox.addStretch() # Add stretchable space between the version label and logo
|
||||
hbox.addWidget(logoLabel, alignment=Qt.AlignRight) # Add logo label to the right
|
||||
|
||||
# Add horizontal layout to the main layout
|
||||
main_window.mainLayout.addLayout(hbox)
|
||||
|
||||
stylesheet = """
|
||||
/* Style for the main window */
|
||||
QWidget {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
}
|
||||
|
||||
/* Style for buttons */
|
||||
QPushButton {
|
||||
background-color: #4B5563; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 60px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #6E6E6E; /* Slightly lighter grey on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #484848; /* Even darker grey when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
highlited_button_style = """
|
||||
QPushButton {
|
||||
background-color: #3C8CCE; /* Lighter blue background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 50px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #7EC0EE; /* Even lighter blue on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #4A86E8; /* Slightly darker blue when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
main_window.setStyleSheet(stylesheet)
|
||||
|
||||
|
||||
|
||||
# Data Ingestion Settings Label
|
||||
main_window.dataIngestionLabel = QLabel(' Willkommen beim LoglineLeviathan - Analyse/Export-Modul.\n Der Quick-Start-Button ermöglicht eine schnelle Selektion der zu analysierenden Daten.\n Nach Abschluss der Selektion über den Abbrechen-Button startet die Analyse sofort.')
|
||||
main_window.dataIngestionLabel.setWordWrap(True)
|
||||
main_window.dataIngestionLabel.setMinimumHeight(60)
|
||||
main_window.dataIngestionLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
|
||||
# Quick Start Button
|
||||
quickStartButton = QPushButton('Quick Start', main_window)
|
||||
quickStartButton.setStyleSheet(highlited_button_style)
|
||||
quickStartButton.setFixedSize(270, 55)
|
||||
quickStartButton.clicked.connect(main_window.quickStartWorkflow)
|
||||
|
||||
# Horizontal layout for label and button
|
||||
hBoxLayout = QHBoxLayout()
|
||||
hBoxLayout.addWidget(quickStartButton)
|
||||
hBoxLayout.addWidget(main_window.dataIngestionLabel)
|
||||
|
||||
|
||||
# Add horizontal layout to the main layout
|
||||
main_window.mainLayout.addLayout(hBoxLayout)
|
||||
|
||||
|
||||
# Grid Layout for Top Buttons
|
||||
topButtonGridLayout = QGridLayout()
|
||||
|
||||
# Create Buttons
|
||||
main_window.openButton = QPushButton('Einzelne Dateien selektieren...', main_window)
|
||||
main_window.openButton.clicked.connect(main_window.openFileNameDialog)
|
||||
|
||||
main_window.addDirButton = QPushButton('Pfad zur rekursiven Analyse selektieren...', main_window)
|
||||
main_window.addDirButton.clicked.connect(main_window.openDirNameDialog)
|
||||
|
||||
main_window.openFileSettingsButton = QPushButton('Selektierte Dateien...', main_window)
|
||||
main_window.openFileSettingsButton.clicked.connect(lambda: main_window.openFileSettingsWindow())
|
||||
|
||||
main_window.createDbButton = QPushButton('Lokale Datenbank neu erstellen', main_window)
|
||||
main_window.createDbButton.clicked.connect(main_window.purgeDatabase)
|
||||
|
||||
main_window.importDbButton = QPushButton('Existierende Datenbank importieren...', main_window)
|
||||
main_window.importDbButton.clicked.connect(main_window.importDatabase)
|
||||
|
||||
main_window.exportDBButton = QPushButton('Lokale Datenbank speichern/exportieren...', main_window)
|
||||
main_window.exportDBButton.clicked.connect(main_window.exportDatabase)
|
||||
|
||||
main_window.openAnalysisSettingsButton = QPushButton('Analyse-Einstellungen...', main_window)
|
||||
main_window.openAnalysisSettingsButton.clicked.connect(lambda: main_window.openAnalysisSettingsWindow())
|
||||
|
||||
main_window.processButton = QPushButton('Verarbeitung beginnen', main_window)
|
||||
main_window.processButton.setStyleSheet(highlited_button_style)
|
||||
main_window.processButton.clicked.connect(main_window.processFiles)
|
||||
|
||||
main_window.abortAnalysisButton = QPushButton('Verarbeitung abbrechen', main_window)
|
||||
main_window.abortAnalysisButton.clicked.connect(main_window.abortAnalysis)
|
||||
|
||||
# Create GroupBoxes
|
||||
fileSelectionGroup = QGroupBox("Datenselektion")
|
||||
databaseGroup = QGroupBox("Datenbank - Management")
|
||||
analysisGroup = QGroupBox("Analyse - Management")
|
||||
|
||||
# Create Layouts for each GroupBox
|
||||
fileSelectionLayout = QVBoxLayout()
|
||||
databaseLayout = QVBoxLayout()
|
||||
analysisLayout = QVBoxLayout()
|
||||
|
||||
# Add Buttons to their respective Layout
|
||||
fileSelectionLayout.addWidget(main_window.openButton)
|
||||
fileSelectionLayout.addWidget(main_window.addDirButton)
|
||||
fileSelectionLayout.addWidget(main_window.openFileSettingsButton)
|
||||
|
||||
databaseLayout.addWidget(main_window.createDbButton)
|
||||
databaseLayout.addWidget(main_window.importDbButton)
|
||||
databaseLayout.addWidget(main_window.exportDBButton)
|
||||
|
||||
analysisLayout.addWidget(main_window.openAnalysisSettingsButton)
|
||||
analysisLayout.addWidget(main_window.processButton)
|
||||
analysisLayout.addWidget(main_window.abortAnalysisButton)
|
||||
|
||||
# Set Layouts to GroupBoxes
|
||||
fileSelectionGroup.setLayout(fileSelectionLayout)
|
||||
databaseGroup.setLayout(databaseLayout)
|
||||
analysisGroup.setLayout(analysisLayout)
|
||||
|
||||
# Add GroupBoxes to Grid
|
||||
topButtonGridLayout.addWidget(fileSelectionGroup, 0, 0)
|
||||
topButtonGridLayout.addWidget(databaseGroup, 0, 1)
|
||||
topButtonGridLayout.addWidget(analysisGroup, 0, 2)
|
||||
|
||||
# Set uniform spacing
|
||||
topButtonGridLayout.setHorizontalSpacing(20)
|
||||
topButtonGridLayout.setVerticalSpacing(10)
|
||||
|
||||
# Add the Grid Layout to the Main Layout
|
||||
main_window.mainLayout.addLayout(topButtonGridLayout)
|
||||
|
||||
# Progress Bar, Status Label, Entity Rate Label, File Count Label
|
||||
main_window.progressBar = QProgressBar(main_window)
|
||||
main_window.mainLayout.addWidget(main_window.progressBar)
|
||||
|
||||
main_window.statusLabel = QLabel(' Bereit // Analyse starten oder Export generieren', main_window)
|
||||
main_window.statusLabel.setWordWrap(True)
|
||||
main_window.statusLabel.setMinimumHeight(40)
|
||||
main_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
main_window.mainLayout.addWidget(main_window.statusLabel)
|
||||
|
||||
main_window.entityRateLabel = QLabel(' Bereit // Analyse starten oder Export generieren', main_window)
|
||||
main_window.mainLayout.addWidget(main_window.entityRateLabel)
|
||||
|
||||
main_window.fileCountLabel = QLabel(' Keine Dateien selektiert', main_window)
|
||||
main_window.fileCountLabel.setMinimumHeight(40)
|
||||
main_window.fileCountLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
main_window.mainLayout.addWidget(main_window.fileCountLabel)
|
||||
|
||||
# Create the new QGroupBox for Database Query
|
||||
databaseQueryGroupBox = QGroupBox("Datensatz durchsuchen", main_window)
|
||||
databaseQueryLayout = QVBoxLayout(databaseQueryGroupBox)
|
||||
databaseQueryLayout.setAlignment(Qt.AlignTop)
|
||||
|
||||
# Create QLineEdit for text input
|
||||
databaseQueryLineEdit = QueryLineEdit(main_window)
|
||||
databaseQueryLineEdit.setPlaceholderText(" Suchbegriff...")
|
||||
databaseQueryLineEdit.setStyleSheet("""
|
||||
QLineEdit {
|
||||
background-color: #3C4043;
|
||||
color: white;
|
||||
min-height: 20px;
|
||||
}
|
||||
""")
|
||||
databaseQueryLineEdit.returnPressed.connect(lambda: main_window.execute_query_wrapper(databaseQueryLineEdit.text()))
|
||||
databaseQueryLabel = QLabel("\nIntelligentes Durchsuchen der Datenbank nach jeglichem Suchbegriff. Die Nutzung von Suchoperatoren +, - und '' ist möglich. Als Suchbegriffe können jegliche Entitäten, aber auch Dateinamen oder Sätze im Kontext, sowie Entitätentyp-Kurzbezeichnungen (s. rechts) verwendet werden.\n\n", main_window)
|
||||
databaseQueryLabel.setWordWrap(True)
|
||||
# Create QPushButton for executing the query
|
||||
executeQueryButton = QPushButton("Ausführen", main_window)
|
||||
executeQueryButton.clicked.connect(lambda: main_window.execute_query_wrapper(databaseQueryLineEdit.text()))
|
||||
main_window.databaseStatusLabel = QLabel(" Datenbank noch nicht initialisiert", main_window)
|
||||
# Add QLineEdit and QPushButton to the QVBoxLayout
|
||||
databaseQueryLayout.addWidget(databaseQueryLineEdit)
|
||||
databaseQueryLayout.addWidget(databaseQueryLabel)
|
||||
databaseQueryLayout.addWidget(executeQueryButton)
|
||||
databaseQueryLayout.addWidget(main_window.databaseStatusLabel)
|
||||
|
||||
# Set the QVBoxLayout as the layout for the QGroupBox
|
||||
databaseQueryGroupBox.setLayout(databaseQueryLayout)
|
||||
|
||||
databaseContentsGroupBox = QGroupBox("Datensatz", main_window)
|
||||
databaseContentsLayout = QHBoxLayout(databaseContentsGroupBox)
|
||||
databaseContentSwitchLayout = QVBoxLayout()
|
||||
|
||||
expandAllButton = QPushButton("Expandieren", main_window)
|
||||
expandAllButton.clicked.connect(lambda: main_window.databaseTree.expandAllTreeItems())
|
||||
|
||||
collapseAllButton = QPushButton("Komprimieren", main_window)
|
||||
collapseAllButton.clicked.connect(lambda: main_window.databaseTree.collapseAllTreeItems())
|
||||
databaseContentSwitchLayout.addWidget(expandAllButton)
|
||||
databaseContentSwitchLayout.addWidget(collapseAllButton)
|
||||
databaseContentSwitchLayout.setAlignment(Qt.AlignTop)
|
||||
databaseContentsLayout.addWidget(main_window.databaseTree)
|
||||
databaseContentsLayout.addLayout(databaseContentSwitchLayout)
|
||||
|
||||
|
||||
generationOptionsGroupBox = QGroupBox("Generator - Selektion", main_window)
|
||||
generationOptionsLayout = QVBoxLayout(generationOptionsGroupBox)
|
||||
generationOptionsLayout.setAlignment(Qt.AlignTop)
|
||||
# Corrected button creation
|
||||
openGenerateReportWindowButton = QPushButton("Report-Datei generieren", main_window)
|
||||
openGenerateReportWindowButton.clicked.connect(main_window.openGenerateReportWindow)
|
||||
openGenerateReportWindowButtonDescriptor = QLabel("REPORT-DATEI GENERIEREN:\nGeneriert eine Report-Datei, die einfach extern geteilt oder inspiziert werden kann. Im Auswahlfenster sind detaillierte Einstellungen verfügbar.\n", main_window)
|
||||
openGenerateReportWindowButtonDescriptor.setWordWrap(True)
|
||||
|
||||
openGenerateWordlistButton = QPushButton("Wortliste generieren", main_window)
|
||||
openGenerateWordlistButton.clicked.connect(main_window.openGenerateWordlistWindow)
|
||||
openGenerateWordlistButtonDescriptor = QLabel("WORTLISTE GENERIEREN:\nGeneriert eine Wortliste, die entweder für die Analyse (auch für weitere Datensätze) genutzt werden, oder extern weiterverwendet werden kann. Im Auswahlfenster sind detaillierte Einstellungen verfügbar.\n\n\n", main_window)
|
||||
openGenerateWordlistButtonDescriptor.setWordWrap(True)
|
||||
|
||||
generationOptionsLayout.addWidget(openGenerateReportWindowButton)
|
||||
generationOptionsLayout.addWidget(openGenerateReportWindowButtonDescriptor)
|
||||
generationOptionsLayout.addWidget(openGenerateWordlistButton)
|
||||
generationOptionsLayout.addWidget(openGenerateWordlistButtonDescriptor)
|
||||
|
||||
|
||||
# Create a new QGridLayout for arranging QGroupBoxes
|
||||
groupBoxLayout = QGridLayout()
|
||||
|
||||
databaseQueryGroupBox.setFixedWidth(300)
|
||||
databaseContentsGroupBox.setFixedWidth(500)
|
||||
generationOptionsGroupBox.setFixedWidth(300)
|
||||
# Add databaseQueryGroupBox to the grid layout
|
||||
groupBoxLayout.addWidget(databaseQueryGroupBox, 0, 0)
|
||||
groupBoxLayout.addWidget(databaseContentsGroupBox, 0, 1)
|
||||
groupBoxLayout.addWidget(generationOptionsGroupBox, 0, 2)
|
||||
|
||||
|
||||
|
||||
# Link to GitHub Repo
|
||||
main_window.githubLink = QLabel(f'<a href="{versionvars.repo_link}">{versionvars.repo_link_text}</a>', main_window)
|
||||
main_window.githubLink.setOpenExternalLinks(True)
|
||||
|
||||
main_window.openLogDirButton = QPushButton('Log-Verzeichnis', main_window)
|
||||
main_window.openLogDirButton.clicked.connect(main_window.openLogDir)
|
||||
|
||||
# Exit Button
|
||||
main_window.exitButton = QPushButton('Beenden', main_window)
|
||||
main_window.exitButton.clicked.connect(main_window.close)
|
||||
|
||||
groupBoxLayout.addWidget(main_window.githubLink, 1, 1)
|
||||
groupBoxLayout.addWidget(main_window.openLogDirButton, 1, 0)
|
||||
groupBoxLayout.addWidget(main_window.exitButton, 1, 2)
|
||||
|
||||
# Add this grid layout to the main layout of the main window
|
||||
main_window.mainLayout.addLayout(groupBoxLayout)
|
||||
|
||||
|
||||
main_window.update()
|
||||
|
||||
|
339
logline_leviathan/gui/initui_report_window.py
Normal file
339
logline_leviathan/gui/initui_report_window.py
Normal file
@ -0,0 +1,339 @@
|
||||
from PyQt5.QtWidgets import QRadioButton, QDateTimeEdit, QVBoxLayout, QCheckBox, QHBoxLayout, QGroupBox, QPushButton, QLineEdit, QGridLayout, QLabel, QListWidget, QGridLayout
|
||||
from PyQt5.QtCore import QDate
|
||||
import logline_leviathan.gui.versionvars as versionvars
|
||||
from logline_leviathan.gui.checkbox_panel import *
|
||||
|
||||
|
||||
|
||||
def initialize_generate_report_window(generate_report_window, app):
|
||||
generate_report_window.setWindowTitle('Logline Leviathan - Report - Generator')
|
||||
generate_report_window.mainLayout = QVBoxLayout(generate_report_window)
|
||||
#generate_report_window.extendedLayout = QHBoxLayout(generate_report_window)
|
||||
generate_report_window.db_session = None
|
||||
stylesheet = """
|
||||
/* Style for the main window */
|
||||
QWidget {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
}
|
||||
|
||||
/* Style for buttons */
|
||||
QPushButton {
|
||||
background-color: #4B5563; /* Dark grey background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 50px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #6E6E6E; /* Slightly lighter grey on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #484848; /* Even darker grey when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
highlited_button_style = """
|
||||
QPushButton {
|
||||
background-color: #3C8CCE; /* Lighter blue background */
|
||||
color: white; /* White text */
|
||||
border-style: outset;
|
||||
border-width: 2px;
|
||||
border-radius: 1px; /* Rounded corners */
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 50px;
|
||||
min-height: 15px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #7EC0EE; /* Even lighter blue on hover */
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #4A86E8; /* Slightly darker blue when pressed */
|
||||
}
|
||||
"""
|
||||
|
||||
# Update function for output format selection label with custom text and line breaks
|
||||
def update_output_format_label(current):
|
||||
if current is not None:
|
||||
format_text = current.text()
|
||||
format_descriptions = {
|
||||
'HTML': " HTML\n Generiert eine einzelne HTML-Datei, die einfach\n geteilt und mit jedem Browser geöffnet werden kann.\n Geeignet für Übersichtsanalyse, sofern der Datensatz nicht zu\n umfangreich ist.",
|
||||
'Interactive HTML': " Interaktive HTML-Datei.\n Generiert eine einzelne HTML-Datei, die mit einem Webbrowser,\n der JavaScript unterstützt, angezeigt wird.\n Geeignet für umfangreichere Datensätze.",
|
||||
'XLSX': " XLSX\n Exportiert Daten in eine Excel-Datei.\n Schreibe sämtliche Entitätentypen in\n separate Sheets, unterstützt keine visuelle Hervorhebung.\n Geeignet für weitere Analyse über MS-Excel."
|
||||
}
|
||||
generate_report_window.outputFormatSelectionLabel.setText(format_descriptions.get(format_text, ""))
|
||||
|
||||
|
||||
def update_export_context_label(current):
|
||||
if current is not None:
|
||||
context_text = current.text() # Get the text of the current item
|
||||
context_descriptions = {
|
||||
"Kontext - gleiche Zeile": " Kontext - gleiche Zeile\n Eine Zeile pro Fundstelle einer Entität.\n Der Kontext innerhalb der gleichen Zeile\n wird dargestellt.\n",
|
||||
"Kontext - mittelgroß": " Kontext - mittelgroß\n Eine Zeile pro Fundstelle einer Entität.\n Der Kontext von +/- 8 Zeilen wird dargestellt.\n",
|
||||
"Kontext - umfangreich": " Kontext - umfangreich\n Eine Zeile pro Fundstelle einer Entität.\n Der Kontext von +/- 15 Zeilen wird dargestellt.\n",
|
||||
"Kompakte Zusammenfassung ohne Kontext": " Kompakte Zusammenfassung ohne Kontext\n Listet Entitäten untereinander auf.\n Die jeweiligen Fundstellen werden komprimiert\n dargestellt.\n Kontext wird nicht unterstützt."
|
||||
}
|
||||
generate_report_window.exportContextSelectionLabel.setText(context_descriptions.get(context_text, ""))
|
||||
|
||||
|
||||
generate_report_window.setStyleSheet(stylesheet)
|
||||
generate_report_window.statusLabel = QLabel(' Erwarte Selektion von Entitätentypen, die im Export dargestellt werden.', generate_report_window)
|
||||
generate_report_window.statusLabel.setWordWrap(True)
|
||||
generate_report_window.statusLabel.setMinimumHeight(40)
|
||||
generate_report_window.statusLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
generate_report_window.mainLayout.addWidget(generate_report_window.statusLabel)
|
||||
# Create a GroupBox for the CheckboxPanel
|
||||
exportOptionsGroupBox = QGroupBox("Typen - Selektion", generate_report_window)
|
||||
exportOptionsLayout = QVBoxLayout(exportOptionsGroupBox)
|
||||
generate_report_window.checkboxPanel = CheckboxPanel()
|
||||
|
||||
# Checkbox Panel Filter Layout
|
||||
checkboxFilterLayout = QHBoxLayout()
|
||||
|
||||
# Create the "Check All" button
|
||||
checkAllButton = QPushButton("Alle markieren", generate_report_window)
|
||||
checkAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.checkAllVisible())
|
||||
|
||||
# Create the "Uncheck All" button
|
||||
uncheckAllButton = QPushButton("Keine markieren", generate_report_window)
|
||||
uncheckAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.uncheckAllVisible())
|
||||
|
||||
expandAllButton = QPushButton("Expandieren", generate_report_window)
|
||||
expandAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.expandAllTreeItems())
|
||||
|
||||
collapseAllButton = QPushButton("Komprimieren", generate_report_window)
|
||||
collapseAllButton.clicked.connect(lambda: generate_report_window.checkboxPanel.collapseAllTreeItems())
|
||||
|
||||
checkboxFilterLayout.addWidget(checkAllButton)
|
||||
checkboxFilterLayout.addWidget(uncheckAllButton)
|
||||
checkboxFilterLayout.addWidget(expandAllButton)
|
||||
checkboxFilterLayout.addWidget(collapseAllButton)
|
||||
|
||||
checkboxFilterLabel = QLabel("Filtern:")
|
||||
checkboxFilterLayout.addWidget(checkboxFilterLabel)
|
||||
|
||||
checkboxFilterLineEdit = QLineEdit(generate_report_window)
|
||||
checkboxFilterLineEdit.setPlaceholderText(" nach Typ, Tooltip oder Kurzbezeichnung filtern...")
|
||||
checkboxFilterLineEdit.setStyleSheet("""
|
||||
QLineEdit {
|
||||
background-color: #3C4043;
|
||||
color: white;
|
||||
min-height: 20px;
|
||||
}
|
||||
""")
|
||||
checkboxFilterLayout.addWidget(checkboxFilterLineEdit)
|
||||
|
||||
# Connect the textChanged signal of QLineEdit to the filter method
|
||||
checkboxFilterLineEdit.textChanged.connect(generate_report_window.checkboxPanel.filterCheckboxes)
|
||||
|
||||
exportOptionsLayout.addLayout(checkboxFilterLayout)
|
||||
exportOptionsLayout.addWidget(generate_report_window.checkboxPanel)
|
||||
|
||||
# Create a GroupBox for the FileCheckboxPanel
|
||||
fileSelectionGroupBox = QGroupBox("Dateien - Selektion", generate_report_window)
|
||||
fileExportOptionsLayout = QVBoxLayout(fileSelectionGroupBox)
|
||||
generate_report_window.fileCheckboxPanel = FileCheckboxPanel()
|
||||
|
||||
# File Checkbox Panel Filter Layout
|
||||
fileCheckboxFilterLayout = QHBoxLayout()
|
||||
|
||||
# Create the "Check All" button
|
||||
filCheckAllButton = QPushButton("Alle markieren", generate_report_window)
|
||||
filCheckAllButton.clicked.connect(lambda: generate_report_window.fileCheckboxPanel.checkAllVisible())
|
||||
|
||||
# Create the "Uncheck All" button
|
||||
fileUncheckAllButton = QPushButton("Keine markieren", generate_report_window)
|
||||
fileUncheckAllButton.clicked.connect(lambda: generate_report_window.fileCheckboxPanel.uncheckAllVisible())
|
||||
|
||||
|
||||
fileCheckboxFilterLayout.addWidget(filCheckAllButton)
|
||||
fileCheckboxFilterLayout.addWidget(fileUncheckAllButton)
|
||||
|
||||
fileFilterLabel = QLabel("Filtern:")
|
||||
fileCheckboxFilterLayout.addWidget(fileFilterLabel)
|
||||
|
||||
fileFilterLineEdit = QLineEdit(generate_report_window)
|
||||
fileFilterLineEdit.setPlaceholderText(" nach Dateiname filtern...")
|
||||
fileFilterLineEdit.setStyleSheet("""
|
||||
QLineEdit {
|
||||
background-color: #3C4043;
|
||||
color: white;
|
||||
min-height: 20px;
|
||||
}
|
||||
""")
|
||||
fileCheckboxFilterLayout.addWidget(fileFilterLineEdit)
|
||||
|
||||
# Connect the textChanged signal of QLineEdit to the filter method
|
||||
fileFilterLineEdit.textChanged.connect(generate_report_window.fileCheckboxPanel.filterCheckboxes)
|
||||
|
||||
fileExportOptionsLayout.addLayout(fileCheckboxFilterLayout)
|
||||
fileExportOptionsLayout.addWidget(generate_report_window.fileCheckboxPanel)
|
||||
|
||||
# First Horizontal Layout for Database Query and Export Options
|
||||
topHBoxLayout = QHBoxLayout()
|
||||
topHBoxLayout.addWidget(exportOptionsGroupBox)
|
||||
topHBoxLayout.addWidget(fileSelectionGroupBox)
|
||||
|
||||
generate_report_window.mainLayout.addLayout(topHBoxLayout)
|
||||
|
||||
# Export Settings as a Grid Layout
|
||||
exportCustomizationLayout = QGridLayout()
|
||||
item_height = 20
|
||||
visible_items = 3
|
||||
|
||||
# Set a fixed width for both QListWidgets (adjust the width as needed)
|
||||
outputFormatGroupBox = QGroupBox("Ausgabeformat - Selektion", generate_report_window)
|
||||
outputFormatGroupBox.setFixedHeight(200)
|
||||
outputFormatLayout = QVBoxLayout(outputFormatGroupBox)
|
||||
|
||||
generate_report_window.outputFormatList = QListWidget()
|
||||
generate_report_window.outputFormatList.addItems(['HTML', 'Interactive HTML', 'XLSX'])
|
||||
generate_report_window.outputFormatList.setCurrentRow(0)
|
||||
generate_report_window.outputFormatList.setFixedHeight(item_height * visible_items)
|
||||
outputFormatLayout.addWidget(generate_report_window.outputFormatList)
|
||||
|
||||
# Label to display current selection of output format
|
||||
generate_report_window.outputFormatSelectionLabel = QLabel('')
|
||||
generate_report_window.outputFormatSelectionLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
generate_report_window.outputFormatSelectionLabel.setWordWrap(True)
|
||||
generate_report_window.outputFormatSelectionLabel.setFixedHeight(80)
|
||||
outputFormatLayout.addWidget(generate_report_window.outputFormatSelectionLabel)
|
||||
|
||||
exportCustomizationLayout.addWidget(outputFormatGroupBox, 0, 0)
|
||||
|
||||
# Export Context Group Box
|
||||
exportContextGroupBox = QGroupBox("Ausgabedatei - Aufbau", generate_report_window)
|
||||
exportContextGroupBox.setFixedHeight(200)
|
||||
exportContextLayout = QVBoxLayout(exportContextGroupBox)
|
||||
|
||||
generate_report_window.exportContextList = QListWidget()
|
||||
generate_report_window.exportContextList.addItems(['Kontext - gleiche Zeile', 'Kontext - mittelgroß', 'Kontext - umfangreich', 'Kompakte Zusammenfassung ohne Kontext'])
|
||||
generate_report_window.exportContextList.setCurrentRow(0)
|
||||
generate_report_window.exportContextList.setFixedHeight(item_height * visible_items)
|
||||
exportContextLayout.addWidget(generate_report_window.exportContextList)
|
||||
|
||||
# Label to display current selection of export context
|
||||
generate_report_window.exportContextSelectionLabel = QLabel('')
|
||||
generate_report_window.exportContextSelectionLabel.setStyleSheet("QLabel { background-color: #3C4043; color: white; }")
|
||||
generate_report_window.exportContextSelectionLabel.setWordWrap(True)
|
||||
generate_report_window.exportContextSelectionLabel.setFixedHeight(80)
|
||||
exportContextLayout.addWidget(generate_report_window.exportContextSelectionLabel)
|
||||
|
||||
exportCustomizationLayout.addWidget(exportContextGroupBox, 0, 1)
|
||||
|
||||
# Connect signals to the update functions
|
||||
generate_report_window.outputFormatList.currentItemChanged.connect(update_output_format_label)
|
||||
generate_report_window.exportContextList.currentItemChanged.connect(update_export_context_label)
|
||||
|
||||
# Initially update the labels
|
||||
update_output_format_label(generate_report_window.outputFormatList.currentItem())
|
||||
update_export_context_label(generate_report_window.exportContextList.currentItem())
|
||||
|
||||
|
||||
|
||||
# Initially update the label
|
||||
update_output_format_label(generate_report_window.outputFormatList.currentItem())
|
||||
|
||||
# Initially update the label
|
||||
update_export_context_label(generate_report_window.exportContextList.currentItem())
|
||||
|
||||
|
||||
|
||||
|
||||
#exportLayout.addLayout(exportCustomizationLayout)
|
||||
generate_report_window.mainLayout.addLayout(exportCustomizationLayout)
|
||||
|
||||
exportSettingsLayout = QGridLayout()
|
||||
|
||||
# Add a checkbox for Crossmatches
|
||||
generate_report_window.crossmatchesCheckbox = QCheckBox('Nur Kreuztreffer (Gibt Entitäten an, die in mehreren Dateien gefunden wurden)', generate_report_window)
|
||||
exportSettingsLayout.addWidget(generate_report_window.crossmatchesCheckbox, 0, 0)
|
||||
|
||||
generate_report_window.timestampFilterCheckbox = QCheckBox('Nach Zeitstempel filtern:', generate_report_window)
|
||||
generate_report_window.startDateEdit = QDateTimeEdit(generate_report_window)
|
||||
generate_report_window.startDateEdit.setCalendarPopup(True)
|
||||
generate_report_window.startDateEdit.setDate(QDate.currentDate())
|
||||
generate_report_window.endDateEdit = QDateTimeEdit(generate_report_window)
|
||||
generate_report_window.endDateEdit.setCalendarPopup(True)
|
||||
generate_report_window.endDateEdit.setDate(QDate.currentDate())
|
||||
generate_report_window.timestampFilterQHBoxLayout = QHBoxLayout()
|
||||
generate_report_window.timestampFilterQHBoxLayout.addWidget(generate_report_window.timestampFilterCheckbox)
|
||||
generate_report_window.timestampFilterQHBoxLayout.addWidget(generate_report_window.startDateEdit)
|
||||
generate_report_window.timestampFilterQHBoxLayout.addWidget(generate_report_window.endDateEdit)
|
||||
exportSettingsLayout.addLayout(generate_report_window.timestampFilterQHBoxLayout, 1, 0)
|
||||
|
||||
generate_report_window.flaggedEntriesLayout = QHBoxLayout()
|
||||
generate_report_window.flaggedEntriesCheckbox = QCheckBox('Markierte Einträge berücksichtigen', generate_report_window)
|
||||
generate_report_window.flaggedEntriesLayout.addWidget(generate_report_window.flaggedEntriesCheckbox)
|
||||
generate_report_window.flaggedRadioButtonLayout = QHBoxLayout()
|
||||
generate_report_window.flaggedRadioButton = QRadioButton('Nur markierte Einträge')
|
||||
generate_report_window.notflaggedRadioButton = QRadioButton('Nur nicht markierte Einträge')
|
||||
generate_report_window.flaggedRadioButtonLayout.addWidget(generate_report_window.flaggedRadioButton)
|
||||
generate_report_window.flaggedRadioButtonLayout.addWidget(generate_report_window.notflaggedRadioButton)
|
||||
generate_report_window.flaggedRadioButton.setChecked(True)
|
||||
generate_report_window.flaggedEntriesLayout.addLayout(generate_report_window.flaggedRadioButtonLayout)
|
||||
exportSettingsLayout.addLayout(generate_report_window.flaggedEntriesLayout, 2, 0)
|
||||
|
||||
generate_report_window.openAfterExportCheckbox = QCheckBox('Datei nach dem Export oeffnen', generate_report_window)
|
||||
exportSettingsLayout.addWidget(generate_report_window.openAfterExportCheckbox, 3, 0)
|
||||
|
||||
# Output File Path Label
|
||||
generate_report_window.outputFilePathLabel = QLabel('', generate_report_window)
|
||||
generate_report_window.updateOutputFilePathLabel() # Call this method to set the initial text
|
||||
exportSettingsLayout.addWidget(generate_report_window.outputFilePathLabel, 0, 1)
|
||||
|
||||
generate_report_window.setOutputFileNameLineEdit = QLineEdit(generate_report_window)
|
||||
generate_report_window.setOutputFileNameLineEdit.setPlaceholderText(' Eigenen Dateinamen spezifizieren...')
|
||||
exportSettingsLayout.addWidget(generate_report_window.setOutputFileNameLineEdit, 3, 1)
|
||||
|
||||
|
||||
generate_report_window.mainLayout.addLayout(exportSettingsLayout)
|
||||
|
||||
# Exit Button Layout
|
||||
bottomLayout = QGridLayout()
|
||||
|
||||
#generate_report_window.customizeResultsButton = QPushButton('Customize Results (WiP)', generate_report_window)
|
||||
#generate_report_window.customizeResultsButton.setDisabled(True)
|
||||
#generate_report_window.customizeResultsButton.clicked.connect(generate_report_window.openCustomizeResultsDialog)
|
||||
#bottomLayout.addWidget(generate_report_window.customizeResultsButton, 0, 0)
|
||||
|
||||
generate_report_window.openOutputFilepathButton = QPushButton('Ausgabeverzeichnis öffnen...', generate_report_window)
|
||||
generate_report_window.openOutputFilepathButton.clicked.connect(generate_report_window.openOutputFilepath)
|
||||
bottomLayout.addWidget(generate_report_window.openOutputFilepathButton, 0, 1)
|
||||
|
||||
# Start Export Button
|
||||
generate_report_window.startExportButton = QPushButton('Report generieren...', generate_report_window)
|
||||
generate_report_window.startExportButton.clicked.connect(generate_report_window.start_export_process)
|
||||
generate_report_window.startExportButton.setStyleSheet(highlited_button_style)
|
||||
bottomLayout.addWidget(generate_report_window.startExportButton, 0, 2)
|
||||
|
||||
|
||||
# Output File Directory
|
||||
generate_report_window.selectOutputFileButton = QPushButton('Ausgabeverzeichnis setzen...', generate_report_window)
|
||||
generate_report_window.selectOutputFileButton.clicked.connect(generate_report_window.selectOutputFile)
|
||||
bottomLayout.addWidget(generate_report_window.selectOutputFileButton, 1, 1)
|
||||
|
||||
# Exit Button
|
||||
generate_report_window.exitButton = QPushButton('Schließen', generate_report_window)
|
||||
generate_report_window.exitButton.clicked.connect(generate_report_window.close)
|
||||
bottomLayout.addWidget(generate_report_window.exitButton, 1, 2)
|
||||
|
||||
generate_report_window.mainLayout.addLayout(bottomLayout)
|
||||
|
||||
#Easteregg
|
||||
#generate_report_window.extendedLayout.addLayout(generate_report_window.mainLayout)
|
||||
#generate_report_window.terminalEasterEgg = TerminalEasterEgg(generate_report_window)
|
||||
#generate_report_window.terminalEasterEgg.hide()
|
||||
#logoLabel.clicked.connect(generate_report_window.terminalEasterEgg.show)
|
||||
|
||||
|
||||
generate_report_window.setLayout(generate_report_window.mainLayout)
|
BIN
logline_leviathan/gui/logo.png
Normal file
BIN
logline_leviathan/gui/logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 48 KiB |
372
logline_leviathan/gui/mainwindow.py
Normal file
372
logline_leviathan/gui/mainwindow.py
Normal file
@ -0,0 +1,372 @@
|
||||
import sys
|
||||
import os
|
||||
import logging
|
||||
import shutil
|
||||
import multiprocessing
|
||||
import logline_leviathan.gui.versionvars as versionvars
|
||||
from PyQt5.QtWidgets import QApplication, QWidget, QMessageBox, QLabel
|
||||
from PyQt5.QtCore import QTimer
|
||||
from logline_leviathan.file_processor.file_processor_thread import FileProcessorThread
|
||||
from logline_leviathan.database.database_manager import EntityTypesTable, EntitiesTable, session_scope
|
||||
from logline_leviathan.database.database_utility import DatabaseUtility
|
||||
from logline_leviathan.database.database_operations import DatabaseOperations
|
||||
from logline_leviathan.gui.checkbox_panel import *
|
||||
from logline_leviathan.gui.initui_mainwindow import initialize_main_window
|
||||
from logline_leviathan.gui.generate_report import GenerateReportWindow
|
||||
from logline_leviathan.gui.generate_wordlist import GenerateWordlistWindow
|
||||
from logline_leviathan.gui.ui_helper import UIHelper, format_time
|
||||
from logline_leviathan.gui.settings_gui import FileSettingsWindow, AnalysisSettingsWindow
|
||||
from logline_leviathan.gui.query_window import ResultsWindow
|
||||
from logline_leviathan.database.query import DatabaseGUIQuery
|
||||
from sqlalchemy import func
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
|
||||
class MainWindow(QWidget):
|
||||
def __init__(self, app, db_init_func, directory=""):
|
||||
super().__init__()
|
||||
logging_level = getattr(logging, versionvars.loglevel, None)
|
||||
if isinstance(logging_level, int):
|
||||
logging.basicConfig(level=logging_level)
|
||||
else:
|
||||
logging.warning(f"Invalid log level: {versionvars.loglevel}")
|
||||
|
||||
self.app = app
|
||||
self.ui_helper = UIHelper(self)
|
||||
self.db_init_func = db_init_func
|
||||
db_init_func()
|
||||
self.database_operations = DatabaseOperations(self, db_init_func)
|
||||
self.current_db_path = 'entities.db' # Default database path
|
||||
self.directory = directory
|
||||
self.filePaths = []
|
||||
self.log_dir = os.path.join(os.getcwd(), 'output', 'entities_export', 'log')
|
||||
os.makedirs(self.log_dir, exist_ok=True)
|
||||
|
||||
self.external_db_path = None
|
||||
self.processing_thread = None
|
||||
self.generate_report_window = None
|
||||
self.databaseTree = DatabasePanel()
|
||||
|
||||
self.db_query_instance = DatabaseGUIQuery()
|
||||
self.results_window = ResultsWindow(self.db_query_instance, parent=self)
|
||||
self.generate_wordlist_window = GenerateWordlistWindow(self.db_query_instance)
|
||||
self.generate_report_window = GenerateReportWindow(self.app)
|
||||
self.analysis_settings_window = AnalysisSettingsWindow(self)
|
||||
self.analysis_settings_window.parsersUpdated.connect(self.refreshApplicationState)
|
||||
self.file_selection_window = FileSettingsWindow(self.filePaths, self)
|
||||
|
||||
self.database_operations.ensureDatabaseExists()
|
||||
|
||||
self.initUI()
|
||||
|
||||
self.ui_helper = UIHelper(self)
|
||||
self.database_utility = DatabaseUtility(self)
|
||||
|
||||
yaml_data = self.database_operations.loadRegexFromYAML()
|
||||
self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
|
||||
# Load data and update checkboxes
|
||||
self.refreshApplicationState()
|
||||
|
||||
self.database_operations.checkScriptPresence()
|
||||
|
||||
# Load files from the directory if specified
|
||||
if self.directory and os.path.isdir(self.directory):
|
||||
self.loadFilesFromDirectory(self.directory)
|
||||
|
||||
self.ui_update_interval = 500
|
||||
self.needs_tree_update = False
|
||||
self.needs_checkbox_update = False
|
||||
self.update_timer = QTimer(self)
|
||||
self.update_timer.timeout.connect(self.performPeriodicUpdate)
|
||||
self.update_timer.start(self.ui_update_interval)
|
||||
|
||||
|
||||
def loadFilesFromDirectory(self, directory):
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
self.filePaths.append(file_path)
|
||||
self.updateFileCountLabel()
|
||||
|
||||
|
||||
def initUI(self):
|
||||
initialize_main_window(self, self.app)
|
||||
|
||||
def openFileNameDialog(self):
|
||||
self.ui_helper.openFileNameDialog()
|
||||
self.file_selection_window.populateTable()
|
||||
self.updateFileCountLabel()
|
||||
|
||||
def openDirNameDialog(self):
|
||||
self.ui_helper.openDirNameDialog()
|
||||
self.file_selection_window.populateTable()
|
||||
self.updateFileCountLabel()
|
||||
|
||||
def clearFileSelection(self):
|
||||
self.ui_helper.clearFileSelection()
|
||||
self.file_selection_window.populateTable()
|
||||
self.updateFileCountLabel()
|
||||
|
||||
def removeSingleFile(self, file):
|
||||
self.ui_helper.removeSingleFile(file)
|
||||
self.file_selection_window.populateTable()
|
||||
self.updateFileCountLabel()
|
||||
|
||||
def refreshApplicationState(self):
|
||||
#yaml_data = self.database_operations.loadRegexFromYAML()
|
||||
#self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
|
||||
self.processing_thread = FileProcessorThread(self.filePaths)
|
||||
self.processing_thread.update_checkboxes_signal.connect(self.generate_report_window.updateCheckboxes)
|
||||
self.processing_thread.update_checkboxes_signal.connect(self.generate_wordlist_window.updateCheckboxes)
|
||||
self.generate_report_window.updateCheckboxes()
|
||||
self.generate_wordlist_window.updateCheckboxes()
|
||||
self.updateDatabaseStatusLabel()
|
||||
self.updateTree()
|
||||
self.updateFileCountLabel()
|
||||
|
||||
def updateFileCountLabel(self):
|
||||
file_count = len(self.filePaths)
|
||||
file_count_label = f" {file_count} Dateien selektiert"
|
||||
readable_size = self.ui_helper.calculate_total_size(self.filePaths)
|
||||
self.fileCountLabel.setText(file_count_label + f' // {readable_size}')
|
||||
|
||||
|
||||
def updateTree(self):
|
||||
with session_scope() as session:
|
||||
self.databaseTree.updateTree(session)
|
||||
|
||||
def updateDatabaseStatusLabel(self):
|
||||
with session_scope() as session:
|
||||
entity_count = session.query(EntitiesTable).count()
|
||||
|
||||
db_file_path = self.current_db_path # Replace with your actual database file path
|
||||
db_file_size = os.path.getsize(db_file_path)
|
||||
db_file_size_mb = db_file_size / (1024 * 1024) # Convert size to MB
|
||||
|
||||
status_text = f"Anzahl Entitäten: {entity_count}\nDatenbank-Größe: {db_file_size_mb:.2f} MB"
|
||||
self.databaseStatusLabel.setText(status_text)
|
||||
|
||||
def onTreeUpdateSignalReceived(self):
|
||||
self.needs_tree_update = True
|
||||
|
||||
def onCheckboxUpdateSignalReceived(self):
|
||||
self.needs_checkbox_update = True
|
||||
|
||||
def performPeriodicUpdate(self):
|
||||
if self.needs_tree_update:
|
||||
self.updateTree()
|
||||
self.needs_tree_update = False
|
||||
|
||||
if self.needs_checkbox_update:
|
||||
self.generate_report_window.updateCheckboxes()
|
||||
self.generate_wordlist_window.updateCheckboxes()
|
||||
self.updateTree()
|
||||
self.needs_checkbox_update = False
|
||||
|
||||
def execute_query_wrapper(self, query_text):
|
||||
self.results_window.show()
|
||||
self.results_window.set_query_and_execute(query_text)
|
||||
|
||||
|
||||
def quickStartWorkflow(self):
|
||||
self.clearFileSelection()
|
||||
self.purgeDatabase()
|
||||
yaml_data = self.database_operations.loadRegexFromYAML()
|
||||
self.database_operations.populate_and_update_entities_from_yaml(yaml_data)
|
||||
self.openDirNameDialog()
|
||||
self.processFiles()
|
||||
|
||||
def purgeDatabase(self):
|
||||
self.database_utility.purgeDatabase()
|
||||
|
||||
def importDatabase(self):
|
||||
self.database_utility.importDatabase()
|
||||
|
||||
def exportDatabase(self):
|
||||
self.database_utility.exportDatabase()
|
||||
|
||||
|
||||
def processFiles(self):
|
||||
try:
|
||||
fileCount = len(self.filePaths)
|
||||
if fileCount > 0:
|
||||
self.progressBar.setMaximum(fileCount)
|
||||
self.db_init_func()
|
||||
self.processing_thread = FileProcessorThread(self.filePaths) # Assign the thread to processing_thread
|
||||
self.processing_thread.finished.connect(self.onProcessingFinished)
|
||||
self.processing_thread.update_progress.connect(self.progressBar.setValue)
|
||||
self.processing_thread.update_status.connect(self.statusLabel.setText)
|
||||
self.processing_thread.update_rate.connect(self.updateEntityRate)
|
||||
#self.processing_thread.update_tree_signal.connect(self.updateTree)
|
||||
#self.processing_thread.update_checkboxes_signal.connect(self.generate_report_window.updateCheckboxes)
|
||||
#self.processing_thread.update_checkboxes_signal.connect(self.generate_wordlist_window.updateCheckboxes)
|
||||
self.processing_thread.update_tree_signal.connect(self.onTreeUpdateSignalReceived)
|
||||
self.processing_thread.update_checkboxes_signal.connect(self.onCheckboxUpdateSignalReceived)
|
||||
self.processing_thread.start()
|
||||
logging.debug(f"Thread started, isRunning: {self.processing_thread.isRunning()}")
|
||||
else:
|
||||
self.message("Information", "Keine Dateien Selektiert. Selektion vornehmen.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing files: {e}")
|
||||
|
||||
def abortAnalysis(self):
|
||||
if self.processing_thread and self.isProcessing():
|
||||
logging.debug(f"Abort Analysis initiated.")
|
||||
self.processing_thread.abort()
|
||||
self.processing_thread.wait()
|
||||
#self.processing_thread = None
|
||||
self.statusLabel.setText(" Verarbeitung durch User unterbrochen.")
|
||||
logging.info(f"Analysis aborted manually.")
|
||||
self.refreshApplicationState()
|
||||
|
||||
def isProcessing(self):
|
||||
if self.processing_thread is not None:
|
||||
return self.processing_thread.isRunning()
|
||||
return False
|
||||
|
||||
|
||||
def onProcessingFinished(self):
|
||||
if self.processing_thread:
|
||||
summary = self.getProcessingSummary()
|
||||
unsupported_files_count = self.processing_thread.getUnsupportedFilesCount()
|
||||
|
||||
# Generate CSV files for unprocessed and processed files
|
||||
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
unprocessed_files_log = os.path.join(self.log_dir, f"{current_timestamp}_unprocessed_files_log.csv")
|
||||
processed_files_log = os.path.join(self.log_dir, f"{current_timestamp}_processed_files_log.csv")
|
||||
|
||||
self.ui_helper.generate_files_log(unprocessed_files_log, self.processing_thread.all_unsupported_files)
|
||||
processed_files = set(self.processing_thread.file_paths) - set(self.processing_thread.all_unsupported_files)
|
||||
self.ui_helper.generate_files_log(processed_files_log, list(processed_files))
|
||||
|
||||
if unsupported_files_count > 0:
|
||||
summary += f"\n{unsupported_files_count} nicht unterstützte Dateien übersprungen."
|
||||
link_label = QLabel(f'<a href="#">Open list of all unsupported files...</a>')
|
||||
link_label.linkActivated.connect(lambda: self.ui_helper.openFile(unprocessed_files_log))
|
||||
self.message("Analyse-Zusammenfassung", summary, link_label)
|
||||
else:
|
||||
self.message("Analyse-Zusammenfassung", summary)
|
||||
|
||||
if self.external_db_path:
|
||||
try:
|
||||
shutil.copy('entities.db', self.external_db_path)
|
||||
self.statusLabel.setText(f" Datenbank gespeichert unter: {self.external_db_path}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error exporting database: {e}")
|
||||
self.statusLabel.setText(f" Fehler beim Exportieren der Datenbank: {e}")
|
||||
|
||||
self.refreshApplicationState()
|
||||
self.processing_thread = None
|
||||
|
||||
def openLogDir(self):
|
||||
self.ui_helper.openFile(self.log_dir)
|
||||
|
||||
def getProcessingSummary(self):
|
||||
with session_scope() as session:
|
||||
entity_counts = session.query(EntityTypesTable.gui_name, func.count(EntitiesTable.entities_id)) \
|
||||
.join(EntityTypesTable, EntitiesTable.entity_types_id == EntityTypesTable.entity_type_id) \
|
||||
.group_by(EntityTypesTable.gui_name) \
|
||||
.all()
|
||||
|
||||
summary = "Analyse-Zusammenfassung:\n\n"
|
||||
for gui_name, count in entity_counts:
|
||||
summary += f"{gui_name}: {count} gefunden\n"
|
||||
|
||||
return summary
|
||||
|
||||
def getUnsupportedFilesCount(self):
|
||||
if self.processing_thread:
|
||||
return self.processing_thread.getUnsupportedFilesCount()
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
def showProcessingWarning(self):
|
||||
self.message("Operation unmöglich", "Diese Operation kann nicht durchgeführt werden, während Dateien analysiert werden. Warten oder abbrechen.")
|
||||
|
||||
def updateEntityRate(self, entity_rate, total_entities, file_rate, total_files_processed, estimated_time, data_rate_kibs):
|
||||
formatted_time = format_time(estimated_time)
|
||||
total_cpu_cores = multiprocessing.cpu_count()
|
||||
rate_text = (f"{entity_rate:.2f} entities/second, Total: {total_entities} // "
|
||||
f"{file_rate:.2f} files/second, Total: {total_files_processed} // "
|
||||
f"{data_rate_kibs:.2f} KiB/s // "
|
||||
f"ETC: {formatted_time} // "
|
||||
f"CPU Cores: {total_cpu_cores}")
|
||||
self.entityRateLabel.setText(rate_text)
|
||||
|
||||
|
||||
def openGenerateReportWindow(self):
|
||||
if self.isProcessing():
|
||||
self.showProcessingWarning()
|
||||
return
|
||||
if not self.generate_report_window:
|
||||
self.generate_report_window = GenerateReportWindow(self.app)
|
||||
self.generate_report_window.show()
|
||||
|
||||
def openGenerateWordlistWindow(self):
|
||||
if self.isProcessing():
|
||||
self.showProcessingWarning()
|
||||
return
|
||||
if not self.generate_wordlist_window:
|
||||
self.generate_wordlist_window = GenerateWordlistWindow(self.app)
|
||||
self.generate_wordlist_window.show()
|
||||
|
||||
def openFileSettingsWindow(self):
|
||||
if self.isProcessing():
|
||||
self.showProcessingWarning()
|
||||
return
|
||||
if not self.file_selection_window:
|
||||
self.file_selection_window = FileSettingsWindow(self.filePaths, self)
|
||||
self.file_selection_window.show()
|
||||
|
||||
def openAnalysisSettingsWindow(self):
|
||||
if self.isProcessing():
|
||||
self.showProcessingWarning()
|
||||
return
|
||||
if not self.analysis_settings_window: # Use self.analysis_settings_window
|
||||
self.analysis_settings_window = AnalysisSettingsWindow(self) # Use self.analysis_settings_window
|
||||
self.analysis_settings_window.show() # Use self.analysis_settings_window
|
||||
|
||||
|
||||
|
||||
def message(self, title, text, extra_widget=None):
|
||||
msgBox = QMessageBox()
|
||||
msgBox.setStyleSheet("""
|
||||
QMessageBox {
|
||||
background-color: #282C34; /* Dark grey background */
|
||||
}
|
||||
QLabel {
|
||||
color: white; /* White text */
|
||||
}
|
||||
QPushButton {
|
||||
color: white; /* White text for buttons */
|
||||
background-color: #4B5563; /* Dark grey background for buttons */
|
||||
border-style: solid;
|
||||
border-width: 2px;
|
||||
border-radius: 5px;
|
||||
border-color: #4A4A4A;
|
||||
padding: 6px;
|
||||
min-width: 80px;
|
||||
min-height: 30px;
|
||||
}
|
||||
""")
|
||||
msgBox.setIcon(QMessageBox.Warning)
|
||||
msgBox.setWindowTitle(title)
|
||||
msgBox.setText(text)
|
||||
if extra_widget:
|
||||
msgBox.setInformativeText('')
|
||||
msgBox.layout().addWidget(extra_widget, 1, 1)
|
||||
msgBox.exec_()
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
app = QApplication(sys.argv)
|
||||
ex = MainWindow()
|
||||
ex.show()
|
||||
sys.exit(app.exec_())
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
32
logline_leviathan/gui/presentation_mode.py
Normal file
32
logline_leviathan/gui/presentation_mode.py
Normal file
@ -0,0 +1,32 @@
|
||||
from PyQt5.QtWidgets import QWidget, QVBoxLayout, QTextEdit
|
||||
from PyQt5.QtCore import QTimer
|
||||
import random
|
||||
|
||||
class TerminalEasterEgg(QWidget):
|
||||
def __init__(self, parent=None):
|
||||
super().__init__(parent)
|
||||
self.initUI()
|
||||
|
||||
def initUI(self):
|
||||
layout = QVBoxLayout(self)
|
||||
self.terminal_widget = QTextEdit(self)
|
||||
self.terminal_widget.setStyleSheet("background-color: black; color: green;")
|
||||
self.terminal_widget.setReadOnly(True)
|
||||
layout.addWidget(self.terminal_widget)
|
||||
|
||||
# Timer for fake prompts
|
||||
self.terminal_timer = QTimer(self)
|
||||
self.terminal_timer.timeout.connect(self.update_terminal)
|
||||
self.terminal_timer.start(1000) # Update every second
|
||||
|
||||
def update_terminal(self):
|
||||
fake_prompts = [
|
||||
"Decrypting data...",
|
||||
"Accessing secure server...",
|
||||
"Running diagnostics...",
|
||||
"Analyzing patterns...",
|
||||
"Compiling code...",
|
||||
"Scanning network...",
|
||||
# Add more fake prompts as desired
|
||||
]
|
||||
self.terminal_widget.append(random.choice(fake_prompts))
|
666
logline_leviathan/gui/query_window.py
Normal file
666
logline_leviathan/gui/query_window.py
Normal file
@ -0,0 +1,666 @@
|
||||
from PyQt5.QtWidgets import QMessageBox, QCheckBox, QGroupBox, QDateTimeEdit,QProgressBar, QMainWindow, QTableWidget, QTableWidgetItem, QLineEdit, QStyledItemDelegate, QTextEdit, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QComboBox, QLabel
|
||||
from PyQt5.QtCore import pyqtSignal, Qt, pyqtSignal, QDateTime, QThread
|
||||
from PyQt5.QtGui import QTextDocument, QTextOption
|
||||
import re
|
||||
import logging
|
||||
import html
|
||||
import datetime
|
||||
|
||||
from logline_leviathan.database.query import DatabaseGUIQuery, QueryThread
|
||||
from logline_leviathan.database.database_manager import EntitiesTable, session_scope
|
||||
|
||||
COLUMN_WIDTHS = [200, 100, 250, 100, 120, 600, 80, 100, 40] # Adjust these values as needed
|
||||
COLUMN_NAMES = ['Distinct Entity', 'Entity Type', 'File Name', 'Line Number', 'Timestamp', 'Context', 'Match Score', 'Flag', 'Identifier']
|
||||
DEFAULT_ROW_HEIGHT = 120
|
||||
FILTER_EDIT_WIDTH = 150
|
||||
|
||||
|
||||
class DataProcessor(QThread):
|
||||
dataProcessed = pyqtSignal(list)
|
||||
|
||||
def __init__(self, total_data, search_terms, chunk_size=50):
|
||||
super().__init__()
|
||||
self.total_data = total_data
|
||||
self.search_terms = search_terms
|
||||
|
||||
# Ensure chunk_size is an integer
|
||||
if not isinstance(chunk_size, int):
|
||||
raise ValueError(f"chunk_size must be an integer, got {type(chunk_size)}")
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
def run(self):
|
||||
# Process initial chunk
|
||||
for i in range(0, len(self.total_data), self.chunk_size):
|
||||
chunk = sorted(self.total_data[i:i+self.chunk_size], key=lambda x: x[1], reverse=True)
|
||||
self.dataProcessed.emit(chunk)
|
||||
|
||||
# Continue processing remaining data
|
||||
remaining_data = self.total_data[self.chunk_size:]
|
||||
for i in range(0, len(remaining_data), self.chunk_size):
|
||||
chunk = sorted(remaining_data[i:i+self.chunk_size], key=lambda x: x[1], reverse=True)
|
||||
self.dataProcessed.emit(chunk)
|
||||
|
||||
|
||||
class ResultsWindow(QMainWindow):
|
||||
def __init__(self, db_query_instance, parent=None):
|
||||
super(ResultsWindow, self).__init__(parent)
|
||||
self.db_query_instance = db_query_instance
|
||||
self.total_data = []
|
||||
self.current_filters = {}
|
||||
self.query_text = None
|
||||
self.database_query = DatabaseGUIQuery()
|
||||
self.query_thread = QueryThread(self.db_query_instance, self.query_text)
|
||||
self.sorted_results = []
|
||||
self.setWindowTitle("Suchergebnis")
|
||||
self.setGeometry(800, 600, 1600, 700) # Adjust size as needed
|
||||
|
||||
# Create central widget and set layout
|
||||
centralWidget = QWidget(self)
|
||||
self.setCentralWidget(centralWidget)
|
||||
mainLayout = QVBoxLayout(centralWidget)
|
||||
|
||||
queryFieldLayout = QHBoxLayout()
|
||||
|
||||
self.databaseQueryLineEdit = QueryLineEdit(self)
|
||||
self.databaseQueryLineEdit.setPlaceholderText(" Suchbegriff eingeben...")
|
||||
self.databaseQueryLineEdit.returnPressed.connect(self.execute_query_from_results_window)
|
||||
self.databaseQueryLineEdit.setStyleSheet("""
|
||||
QLineEdit {
|
||||
background-color: #3C4043;
|
||||
color: white;
|
||||
min-height: 20px;
|
||||
}
|
||||
""")
|
||||
queryFieldLayout.addWidget(self.databaseQueryLineEdit)
|
||||
# Create a progress bar for query in progress
|
||||
self.query_progress_bar = QProgressBar(self)
|
||||
self.query_progress_bar.setRange(0, 1) # Indeterminate mode
|
||||
self.query_progress_bar.setFixedWidth(100) # Initially hidden
|
||||
queryFieldLayout.addWidget(self.query_progress_bar)
|
||||
executeQueryButton = QPushButton("Suche ausführen", self)
|
||||
executeQueryButton.clicked.connect(self.execute_query_from_results_window)
|
||||
queryFieldLayout.addWidget(executeQueryButton)
|
||||
|
||||
mainLayout.addLayout(queryFieldLayout)
|
||||
|
||||
mainLayout.addWidget(QLabel(' Die Suche nach mehreren bestimmten Begriffen, Entitätentypen (Kurzform z.B. <ipv4>), Dateinamen und Timestamps ist möglich.\n Nach erfolgreicher Abfrage der Datenbank werden die Ergebnisse tabellarisch dargestellt. Sollte die Anzahl der Suchergebnisse sehr hoch sein, dauert der Prozess einige Sekunden. Die Anzahl der Ergebnisse ist aus Performancegründen auf die besten 512 beschränkt.\n Groß- und Kleinschreibung wird bei Zitatsuchen berücksichtigt; Werden mehrere Suchbegriffe eingegeben, fließt deren Abstand und Reihenfolge ins Ergebnis mit ein.', self))
|
||||
|
||||
# Create a horizontal layout for filter options
|
||||
filterLayout = QHBoxLayout()
|
||||
mainLayout.addLayout(filterLayout)
|
||||
|
||||
|
||||
# Updated stylesheet for the entire ResultsWindow
|
||||
stylesheet = """
|
||||
/* Styles for QTableWidget and headers */
|
||||
QTableWidget, QHeaderView::section {
|
||||
background-color: #2A2F35;
|
||||
color: white;
|
||||
border: 1px solid #4A4A4A;
|
||||
}
|
||||
|
||||
/* Style for QLineEdit */
|
||||
QLineEdit {
|
||||
background-color: #3A3F44;
|
||||
color: white;
|
||||
border: 1px solid #4A4A4A;
|
||||
}
|
||||
|
||||
/* Style for QPushButton */
|
||||
QPushButton {
|
||||
background-color: #4B5563;
|
||||
color: white;
|
||||
border-radius: 4px;
|
||||
padding: 5px;
|
||||
margin: 5px;
|
||||
}
|
||||
|
||||
QPushButton:hover {
|
||||
background-color: #5C677D;
|
||||
}
|
||||
|
||||
QPushButton:pressed {
|
||||
background-color: #2A2F35;
|
||||
}
|
||||
|
||||
/* Style for empty rows and other areas */
|
||||
QWidget {
|
||||
background-color: #2A2F35;
|
||||
color: white;
|
||||
}
|
||||
"""
|
||||
self.setStyleSheet(stylesheet)
|
||||
|
||||
|
||||
self.resultsTable = QTableWidget(self)
|
||||
|
||||
self.clearAllButton = QPushButton("Alle Filteroptionen loeschen", self)
|
||||
self.clearAllButton.clicked.connect(self.clearAllFilters)
|
||||
filterLayout.addWidget(self.clearAllButton)
|
||||
|
||||
# GroupBox for Entitätenfilter
|
||||
entitaten_filter_groupbox = QGroupBox("Entitätenfilter", self)
|
||||
entitaten_filter_layout = QVBoxLayout()
|
||||
entitaten_filter_groupbox.setLayout(entitaten_filter_layout)
|
||||
|
||||
self.distinct_entity_edit = QLineEdit(self)
|
||||
self.distinct_entity_edit.setPlaceholderText(" Enter distinct entity...")
|
||||
self.distinct_entity_edit.textChanged.connect(self.applyDistinctEntityTextFilter)
|
||||
entitaten_filter_layout.addWidget(self.distinct_entity_edit)
|
||||
|
||||
self.entityTypeComboBox = QComboBox()
|
||||
entitaten_filter_layout.addWidget(self.entityTypeComboBox)
|
||||
filterLayout.addWidget(entitaten_filter_groupbox)
|
||||
|
||||
# GroupBox for Fundstelle
|
||||
fundstelle_groupbox = QGroupBox("Fundstelle", self)
|
||||
fundstelle_layout = QVBoxLayout()
|
||||
fundstelle_groupbox.setLayout(fundstelle_layout)
|
||||
|
||||
self.file_name_edit = QLineEdit(self)
|
||||
self.file_name_edit.setPlaceholderText(" Enter file name...")
|
||||
self.file_name_edit.textChanged.connect(self.applyFileNameTextFilter)
|
||||
fundstelle_layout.addWidget(self.file_name_edit)
|
||||
|
||||
self.line_number_edit = QLineEdit(self)
|
||||
self.line_number_edit.setPlaceholderText(" Enter line number...")
|
||||
self.line_number_edit.textChanged.connect(self.applyLineNumberTextFilter)
|
||||
fundstelle_layout.addWidget(self.line_number_edit)
|
||||
filterLayout.addWidget(fundstelle_groupbox)
|
||||
|
||||
# GroupBox for timestamp filtering
|
||||
self.timestampFilterGroupbox = QGroupBox("Zeitrahmen", self)
|
||||
timestampFilterLayout = QVBoxLayout()
|
||||
self.timestampFilterGroupbox.setLayout(timestampFilterLayout)
|
||||
filterLayout.addWidget(self.timestampFilterGroupbox)
|
||||
|
||||
dateedit_layout = QHBoxLayout()
|
||||
self.startDateEdit = QDateTimeEdit(self)
|
||||
self.startDateEdit.setDisplayFormat("yyyy-MM-dd HH:mm:ss")
|
||||
self.startDateEdit.setCalendarPopup(True)
|
||||
self.startDateEdit.setDateTime(QDateTime.currentDateTime().addDays(-10000))
|
||||
dateedit_layout.addWidget(self.startDateEdit)
|
||||
self.startDateEdit.dateTimeChanged.connect(self.applyTimestampFilter)
|
||||
|
||||
self.endDateEdit = QDateTimeEdit(self)
|
||||
self.endDateEdit.setDisplayFormat("yyyy-MM-dd HH:mm:ss")
|
||||
self.endDateEdit.setCalendarPopup(True)
|
||||
self.endDateEdit.setDateTime(QDateTime.currentDateTime().addDays(1))
|
||||
dateedit_layout.addWidget(self.endDateEdit)
|
||||
self.endDateEdit.dateTimeChanged.connect(self.applyTimestampFilter)
|
||||
|
||||
self.timestamp_edit = QLineEdit(self)
|
||||
self.timestamp_edit.setPlaceholderText(" Enter timestamp...")
|
||||
self.timestamp_edit.textChanged.connect(self.applyTimestampTextFilter)
|
||||
timestampFilterLayout.addWidget(self.timestamp_edit)
|
||||
timestampFilterLayout.addLayout(dateedit_layout)
|
||||
|
||||
# GroupBox for Match Score, Flags, and Identifier
|
||||
more_filters_groupbox = QGroupBox("Weitere Filter", self)
|
||||
more_filters_layout = QHBoxLayout()
|
||||
more_filters_groupbox.setLayout(more_filters_layout)
|
||||
filterLayout.addWidget(more_filters_groupbox)
|
||||
|
||||
|
||||
|
||||
flag_layout = QVBoxLayout()
|
||||
flag_true_checkbox = QCheckBox("Flagged", self)
|
||||
flag_layout.addWidget(flag_true_checkbox)
|
||||
flag_false_checkbox = QCheckBox("Not Flagged", self)
|
||||
flag_layout.addWidget(flag_false_checkbox)
|
||||
more_filters_layout.addLayout(flag_layout)
|
||||
|
||||
meta_layout = QVBoxLayout()
|
||||
self.match_score_edit = QLineEdit(self)
|
||||
self.match_score_edit.setPlaceholderText(" Enter match score...")
|
||||
self.match_score_edit.textChanged.connect(self.applyMatchScoreTextFilter)
|
||||
meta_layout.addWidget(self.match_score_edit)
|
||||
|
||||
self.identifier_edit = QLineEdit(self)
|
||||
self.identifier_edit.setPlaceholderText(" Enter identifier...")
|
||||
self.identifier_edit.textChanged.connect(self.applyIdentifierTextFilter)
|
||||
meta_layout.addWidget(self.identifier_edit)
|
||||
more_filters_layout.addLayout(meta_layout)
|
||||
|
||||
self.resultsTable.setColumnCount(len(COLUMN_NAMES))
|
||||
self.resultsTable.setHorizontalHeaderLabels(COLUMN_NAMES)
|
||||
self.resultsTable.setSortingEnabled(True)
|
||||
mainLayout.addWidget(self.resultsTable)
|
||||
|
||||
bottomLayout = QHBoxLayout()
|
||||
self.query_status_label = QLabel(" Hintergrundsuchprozess läuft...")
|
||||
bottomLayout.addWidget(self.query_status_label)
|
||||
self.bottomButtonLayout = QHBoxLayout()
|
||||
flag_visible_items_button = QPushButton("Sichtbare Objekte markieren", self)
|
||||
flag_visible_items_button.clicked.connect(self.flagVisibleItems)
|
||||
self.bottomButtonLayout.addWidget(flag_visible_items_button)
|
||||
unflag_visible_items_button = QPushButton("Sichtbare Objekte demarkieren", self)
|
||||
unflag_visible_items_button.clicked.connect(self.unflagVisibleItems)
|
||||
self.bottomButtonLayout.addWidget(unflag_visible_items_button)
|
||||
clear_all_flags_button = QPushButton("Sämtliche Markierungen entfernen", self)
|
||||
clear_all_flags_button.clicked.connect(self.clearAllFlags)
|
||||
self.bottomButtonLayout.addWidget(clear_all_flags_button)
|
||||
bottomLayout.addLayout(self.bottomButtonLayout)
|
||||
mainLayout.addLayout(bottomLayout)
|
||||
# Create and add the Dismiss button
|
||||
self.dismissButton = QPushButton("Schließen", self)
|
||||
self.dismissButton.clicked.connect(self.close)
|
||||
mainLayout.addWidget(self.dismissButton)
|
||||
|
||||
self.populate_entity_type_combobox()
|
||||
|
||||
|
||||
|
||||
def populate_entity_type_combobox(self):
|
||||
entity_types = self.database_query.get_entity_types()
|
||||
self.entityTypeComboBox.addItem("Alle verfügbaren Typen", None) # Default option
|
||||
for entity_type in entity_types:
|
||||
self.entityTypeComboBox.addItem(entity_type, entity_type)
|
||||
self.entityTypeComboBox.currentIndexChanged.connect(self.applyEntityTypeFilter)
|
||||
|
||||
|
||||
def execute_query_from_results_window(self):
|
||||
self.resultsTable.clear()
|
||||
self.resultsTable.setRowCount(0)
|
||||
self.query_text = self.databaseQueryLineEdit.text().strip()
|
||||
if not self.query_text:
|
||||
# Handle empty query case
|
||||
return
|
||||
self.query_status_label.setText(" Suche wird mit Suchbegriffen " + self.query_text + " durchgeführt...")
|
||||
self.query_thread = QueryThread(self.database_query, self.query_text)
|
||||
self.query_thread.queryCompleted.connect(self.on_query_completed)
|
||||
self.query_thread.start()
|
||||
self.query_progress_bar.setRange(0, 0)
|
||||
|
||||
def set_query_and_execute(self, query_text):
|
||||
self.databaseQueryLineEdit.setText(query_text)
|
||||
self.execute_query_from_results_window()
|
||||
|
||||
|
||||
def on_query_completed(self, results_dict):
|
||||
self.resultsTable.setUpdatesEnabled(False)
|
||||
self.resultsTable.setSortingEnabled(False) # Disable sorting
|
||||
self.resultsTable.setRowCount(0)
|
||||
self.resultsTable.setColumnCount(len(COLUMN_NAMES))
|
||||
self.sorted_results = sorted(results_dict.items(), key=lambda x: x[1], reverse=True)
|
||||
self.top_results = self.sorted_results[:512]
|
||||
self.query_status_label.setText(" Suche abgeschlossen // Anzahl Suchergebnisse: " + str(len(results_dict)) + " // Tabelle wird befüllt (das kann einige Zeit dauern)")
|
||||
for entities_id, score in self.top_results:
|
||||
self.insert_row(entities_id, score)
|
||||
self.query_progress_bar.setRange(0, 1)
|
||||
self.query_status_label.setText(" Suche abgeschlossen // Anzahl Suchergebnisse: " + str(len(results_dict)) + " (begrenzt auf die besten 512)")
|
||||
self.resultsTable.setHorizontalHeaderLabels(COLUMN_NAMES)
|
||||
self.adjust_column_widths()
|
||||
self.resultsTable.setSortingEnabled(True)
|
||||
self.resultsTable.sortByColumn(6, Qt.DescendingOrder) # Enable sorting
|
||||
self.resultsTable.setUpdatesEnabled(True)
|
||||
self.resultsTable.update()
|
||||
self.applyAllFilters()
|
||||
|
||||
|
||||
def insert_row(self, entities_id, score):
|
||||
try:
|
||||
with session_scope() as session:
|
||||
# Fetch the entity from the database
|
||||
entity = session.query(EntitiesTable).filter(EntitiesTable.entities_id == entities_id).first()
|
||||
if not entity:
|
||||
return # Skip if the entity is not found
|
||||
|
||||
# Fetch related data
|
||||
distinct_entity = entity.entity.distinct_entity if entity.entity else ""
|
||||
entity_type = entity.regex_library.gui_name if entity.regex_library else ""
|
||||
file_name = entity.file.file_name if entity.file else ""
|
||||
line_number = str(entity.line_number) if entity.line_number is not None else ""
|
||||
entry_timestamp = entity.entry_timestamp.strftime("%Y-%m-%d %H:%M:%S") if entity.entry_timestamp else ""
|
||||
context_large = entity.context.context_large if entity.context else ""
|
||||
|
||||
# Insert a new row in the table
|
||||
row_position = self.resultsTable.rowCount()
|
||||
self.resultsTable.insertRow(row_position)
|
||||
|
||||
search_terms = self.query_text.split()
|
||||
self.highlight_delegate = HighlightDelegate(self.resultsTable, search_terms)
|
||||
|
||||
# Set the values for each column
|
||||
self.resultsTable.setItem(row_position, 0, QTableWidgetItem(distinct_entity))
|
||||
self.resultsTable.setItem(row_position, 1, QTableWidgetItem(entity_type))
|
||||
self.resultsTable.setItem(row_position, 2, QTableWidgetItem(file_name))
|
||||
self.resultsTable.setItem(row_position, 3, QTableWidgetItem(line_number))
|
||||
self.resultsTable.setItem(row_position, 4, QTableWidgetItem(entry_timestamp))
|
||||
context_widget = ScrollableTextWidget(context_large, search_terms, distinct_entity)
|
||||
self.resultsTable.setCellWidget(row_position, 5, context_widget)
|
||||
match_score_item = NumericTableWidgetItem(str(score))
|
||||
self.resultsTable.setItem(row_position, 6, match_score_item)
|
||||
flag_button_widget = FlagButton(entities_id, entity.flag)
|
||||
self.resultsTable.setCellWidget(row_position, COLUMN_NAMES.index('Flag'), flag_button_widget)
|
||||
self.resultsTable.setItem(row_position, 8, QTableWidgetItem(str(entities_id)))
|
||||
|
||||
self.resultsTable.setRowHeight(row_position, DEFAULT_ROW_HEIGHT)
|
||||
|
||||
for column_index in range(self.resultsTable.columnCount()):
|
||||
if column_index in [0, 1, 2, 3, 4]:
|
||||
self.resultsTable.setItemDelegateForColumn(column_index, self.highlight_delegate)
|
||||
self.resultsTable.update()
|
||||
except Exception as e:
|
||||
logging.error(f"Error inserting row: {e}")
|
||||
|
||||
|
||||
def adjust_column_widths(self):
|
||||
for i, width in enumerate(COLUMN_WIDTHS):
|
||||
self.resultsTable.setColumnWidth(i, width)
|
||||
|
||||
def flagVisibleItems(self):
|
||||
"""Flag all visible items in the table."""
|
||||
for row in range(self.resultsTable.rowCount()):
|
||||
if not self.resultsTable.isRowHidden(row):
|
||||
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
|
||||
if flag_button_widget and not flag_button_widget.flag:
|
||||
flag_button_widget.toggle_flag()
|
||||
|
||||
def unflagVisibleItems(self):
|
||||
"""Unflag all visible items in the table."""
|
||||
for row in range(self.resultsTable.rowCount()):
|
||||
if not self.resultsTable.isRowHidden(row):
|
||||
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
|
||||
if flag_button_widget and flag_button_widget.flag:
|
||||
flag_button_widget.toggle_flag()
|
||||
|
||||
def clearAllFlags(self):
|
||||
"""Ask for confirmation and clear all flags in the table if confirmed."""
|
||||
reply = QMessageBox.question(self, 'Confirm Action',
|
||||
"Sollen tatsächlich alle Markierungen entfernt werden?\nDas wirkt sich auf alle Einträge in der Datenbank aus!",
|
||||
QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
|
||||
|
||||
if reply == QMessageBox.Yes:
|
||||
# Clear all flags in the database
|
||||
FlagButton.clearAllFlagsInDatabase()
|
||||
|
||||
# Update all flag buttons in the table
|
||||
for row in range(self.resultsTable.rowCount()):
|
||||
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
|
||||
if flag_button_widget and flag_button_widget.flag:
|
||||
flag_button_widget.flag = False
|
||||
flag_button_widget.button.setText("_")
|
||||
flag_button_widget.update_button_style()
|
||||
|
||||
# Update all flag buttons in the table
|
||||
for row in range(self.resultsTable.rowCount()):
|
||||
flag_button_widget = self.resultsTable.cellWidget(row, COLUMN_NAMES.index('Flag'))
|
||||
if flag_button_widget and flag_button_widget.flag:
|
||||
flag_button_widget.flag = False
|
||||
flag_button_widget.button.setText("_")
|
||||
flag_button_widget.update_button_style()
|
||||
|
||||
def applyTextFilter(self, column_index, filter_text):
|
||||
if filter_text:
|
||||
for row in range(self.resultsTable.rowCount()):
|
||||
item = self.resultsTable.item(row, column_index)
|
||||
show_row = not filter_text or (item and filter_text.lower() in item.text().lower())
|
||||
self.resultsTable.setRowHidden(row, not show_row)
|
||||
|
||||
def applyDistinctEntityTextFilter(self):
|
||||
filter_text = self.distinct_entity_edit.text()
|
||||
self.applyTextFilter(COLUMN_NAMES.index('Distinct Entity'), filter_text)
|
||||
def applyFileNameTextFilter(self):
|
||||
filter_text = self.file_name_edit.text()
|
||||
self.applyTextFilter(COLUMN_NAMES.index('File Name'), filter_text)
|
||||
|
||||
def applyLineNumberTextFilter(self):
|
||||
filter_text = self.line_number_edit.text()
|
||||
self.applyTextFilter(COLUMN_NAMES.index('Line Number'), filter_text)
|
||||
|
||||
def applyMatchScoreTextFilter(self):
|
||||
filter_text = self.match_score_edit.text()
|
||||
self.applyTextFilter(COLUMN_NAMES.index('Match Score'), filter_text)
|
||||
def applyTimestampTextFilter(self):
|
||||
filter_text = self.timestamp_edit.text()
|
||||
self.applyTextFilter(COLUMN_NAMES.index('Timestamp'), filter_text)
|
||||
def applyIdentifierTextFilter(self):
|
||||
filter_text = self.identifier_edit.text()
|
||||
self.applyTextFilter(COLUMN_NAMES.index('Identifier'), filter_text)
|
||||
|
||||
|
||||
|
||||
|
||||
def applyEntityTypeFilter(self):
|
||||
selected_type = self.entityTypeComboBox.currentData()
|
||||
entity_type_column = COLUMN_NAMES.index('Entity Type')
|
||||
for row in range(self.resultsTable.rowCount()):
|
||||
item = self.resultsTable.item(row, entity_type_column)
|
||||
show_row = (selected_type is None or (item and item.text() == selected_type))
|
||||
self.resultsTable.setRowHidden(row, not show_row)
|
||||
|
||||
def applyTimestampFilter(self):
|
||||
start_date = self.startDateEdit.dateTime().toPyDateTime()
|
||||
end_date = self.endDateEdit.dateTime().toPyDateTime()
|
||||
timestamp_column = COLUMN_NAMES.index('Timestamp')
|
||||
|
||||
for row in range(self.resultsTable.rowCount()):
|
||||
item = self.resultsTable.item(row, timestamp_column)
|
||||
if item and item.text() != "":
|
||||
row_timestamp = datetime.datetime.strptime(item.text(), "%Y-%m-%d %H:%M:%S")
|
||||
show_row = start_date <= row_timestamp <= end_date
|
||||
self.resultsTable.setRowHidden(row, not show_row)
|
||||
else:
|
||||
self.resultsTable.setRowHidden(row, False)
|
||||
|
||||
|
||||
def applyAllFilters(self):
|
||||
self.applyDistinctEntityTextFilter()
|
||||
self.applyFileNameTextFilter()
|
||||
self.applyLineNumberTextFilter()
|
||||
self.applyMatchScoreTextFilter()
|
||||
self.applyTimestampTextFilter()
|
||||
self.applyEntityTypeFilter()
|
||||
self.applyTimestampFilter()
|
||||
|
||||
|
||||
|
||||
def clearAllFilters(self):
|
||||
self.entityTypeComboBox.setCurrentIndex(0)
|
||||
self.startDateEdit.setDateTime(QDateTime(QDateTime(2009, 1, 1, 0, 0, 0)))
|
||||
self.endDateEdit.setDateTime(QDateTime.currentDateTime())
|
||||
self.distinct_entity_edit.clear()
|
||||
self.file_name_edit.clear()
|
||||
self.line_number_edit.clear()
|
||||
self.match_score_edit.clear()
|
||||
self.timestamp_edit.clear()
|
||||
self.identifier_edit.clear()
|
||||
self.applyAllFilters()
|
||||
|
||||
|
||||
class QueryLineEdit(QLineEdit):
|
||||
returnPressed = pyqtSignal()
|
||||
|
||||
def keyPressEvent(self, event):
|
||||
if event.key() == Qt.Key_Return:
|
||||
self.returnPressed.emit()
|
||||
else:
|
||||
super().keyPressEvent(event)
|
||||
|
||||
|
||||
class HighlightDelegate(QStyledItemDelegate):
|
||||
def __init__(self, parent=None, search_terms=None):
|
||||
super().__init__(parent)
|
||||
self.search_terms = search_terms or []
|
||||
|
||||
def paint(self, painter, option, index):
|
||||
painter.save()
|
||||
|
||||
# Set text color and other options
|
||||
options = QTextOption()
|
||||
options.setWrapMode(QTextOption.WrapAtWordBoundaryOrAnywhere)
|
||||
document = QTextDocument()
|
||||
document.setDefaultTextOption(options)
|
||||
document.setDefaultFont(option.font)
|
||||
|
||||
# Prepare highlighted text
|
||||
text = index.model().data(index)
|
||||
highlighted_text = self.get_highlighted_text(text)
|
||||
document.setHtml(highlighted_text)
|
||||
|
||||
# Set the width of the document to the cell width
|
||||
document.setTextWidth(option.rect.width())
|
||||
|
||||
# Draw the contents
|
||||
painter.translate(option.rect.topLeft())
|
||||
document.drawContents(painter)
|
||||
painter.restore()
|
||||
|
||||
def get_highlighted_text(self, text):
|
||||
if text is None:
|
||||
text = ""
|
||||
|
||||
text_with_color = f"<span style='color: white;'>{text}</span>"
|
||||
for term in self.search_terms:
|
||||
# Retain the '+' at the beginning and strip other special characters
|
||||
is_positive = term.startswith('+')
|
||||
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-')).lower()
|
||||
|
||||
if is_positive and clean_term.lower() in text.lower():
|
||||
# Use regex for case-insensitive search and replace
|
||||
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
|
||||
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
|
||||
text_with_color = regex.sub(highlighted_term, text_with_color)
|
||||
|
||||
return text_with_color.replace("\n", "<br>")
|
||||
|
||||
|
||||
|
||||
class ScrollableTextWidget(QWidget):
|
||||
def __init__(self, text, search_terms, distinct_entity, parent=None):
|
||||
super().__init__(parent)
|
||||
layout = QVBoxLayout(self)
|
||||
layout.setContentsMargins(0, 0, 0, 0)
|
||||
self.text_edit = CustomTextEdit(self)
|
||||
self.text_edit.setReadOnly(True)
|
||||
|
||||
# Apply styles including scrollbar styles
|
||||
self.text_edit.setStyleSheet("""
|
||||
QTextEdit {
|
||||
background-color: #2A2F35; /* Dark blue-ish background */
|
||||
color: white; /* White text */
|
||||
}
|
||||
QTextEdit QScrollBar:vertical {
|
||||
border: none;
|
||||
background-color: #3A3F44; /* Dark scrollbar background */
|
||||
width: 8px; /* Width of the scrollbar */
|
||||
}
|
||||
QTextEdit QScrollBar::handle:vertical {
|
||||
background-color: #6E6E6E; /* Scroll handle color */
|
||||
border-radius: 4px; /* Rounded corners for the handle */
|
||||
}
|
||||
QTextEdit QScrollBar::add-line:vertical, QTextEdit QScrollBar::sub-line:vertical {
|
||||
background: none;
|
||||
}
|
||||
""")
|
||||
|
||||
# Set the text with highlighting
|
||||
self.setHighlightedText(text, search_terms, distinct_entity)
|
||||
layout.addWidget(self.text_edit)
|
||||
|
||||
# Scroll to the distinct entity
|
||||
self.scroll_to_text(distinct_entity)
|
||||
|
||||
def setHighlightedText(self, text, search_terms, distinct_entity):
|
||||
if text is None:
|
||||
text = ""
|
||||
|
||||
# Wrap the original text in a span to maintain color
|
||||
text_with_color = f"<span style='color: white;'>{text}</span>"
|
||||
|
||||
# Highlight distinct entity in a different color
|
||||
if distinct_entity:
|
||||
distinct_entity_escaped = html.escape(distinct_entity)
|
||||
text_with_color = re.sub(
|
||||
re.escape(distinct_entity_escaped),
|
||||
lambda match: f"<span style='background-color: blue; color: white;'>{match.group()}</span>",
|
||||
text_with_color,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
for term in search_terms:
|
||||
# Check if the term starts with '+'
|
||||
is_positive = term.startswith('+')
|
||||
clean_term = re.sub(r'[^\w\s]', '', term.lstrip('+-'))
|
||||
|
||||
# If the term starts with '+', highlight all matches regardless of case
|
||||
if is_positive or clean_term.lower() in text.lower():
|
||||
regex = re.compile(re.escape(clean_term), re.IGNORECASE)
|
||||
highlighted_term = f"<span style='background-color: yellow; color: black;'>{clean_term}</span>"
|
||||
text_with_color = regex.sub(highlighted_term, text_with_color)
|
||||
|
||||
self.text_edit.setHtml(text_with_color.replace("\n", "<br>"))
|
||||
|
||||
|
||||
|
||||
def scroll_to_text(self, text):
|
||||
if text:
|
||||
cursor = self.text_edit.document().find(text)
|
||||
self.text_edit.setTextCursor(cursor)
|
||||
|
||||
class CustomTextEdit(QTextEdit):
|
||||
def __init__(self, parent=None):
|
||||
super().__init__(parent)
|
||||
self.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) # Enable vertical scrollbar as needed
|
||||
|
||||
def wheelEvent(self, event):
|
||||
# Always handle the wheel event within QTextEdit
|
||||
super().wheelEvent(event)
|
||||
|
||||
# Stop propagation of the event to parent
|
||||
if self.verticalScrollBar().isVisible():
|
||||
event.accept()
|
||||
else:
|
||||
event.ignore()
|
||||
|
||||
|
||||
class FlagButton(QWidget):
|
||||
def __init__(self, entities_id, flag, parent=None):
|
||||
super().__init__(parent)
|
||||
self.entities_id = entities_id
|
||||
self.flag = flag
|
||||
|
||||
self.layout = QHBoxLayout(self)
|
||||
self.button = QPushButton("FLAG" if flag else "_", self)
|
||||
self.update_button_style()
|
||||
self.button.clicked.connect(self.toggle_flag)
|
||||
self.layout.addWidget(self.button)
|
||||
self.layout.setContentsMargins(0, 0, 0, 0)
|
||||
self.setLayout(self.layout)
|
||||
|
||||
def toggle_flag(self):
|
||||
# Toggle the flag
|
||||
self.flag = not self.flag
|
||||
self.button.setText("FLAG" if self.flag else "_")
|
||||
self.update_button_style()
|
||||
|
||||
# Update the flag in the database
|
||||
with session_scope() as session:
|
||||
entity = session.query(EntitiesTable).filter(EntitiesTable.entities_id == self.entities_id).first()
|
||||
if entity:
|
||||
entity.flag = self.flag
|
||||
session.commit()
|
||||
|
||||
@staticmethod
|
||||
def clearAllFlagsInDatabase():
|
||||
with session_scope() as session:
|
||||
entities = session.query(EntitiesTable).all()
|
||||
for entity in entities:
|
||||
entity.flag = False
|
||||
session.commit()
|
||||
|
||||
|
||||
def update_button_style(self):
|
||||
if self.flag:
|
||||
self.button.setStyleSheet("QPushButton { background-color: yellow; color: black; }")
|
||||
else:
|
||||
self.button.setStyleSheet("")
|
||||
|
||||
|
||||
|
||||
class NumericTableWidgetItem(QTableWidgetItem):
|
||||
def __lt__(self, other):
|
||||
return float(self.text()) < float(other.text())
|
||||
|
255
logline_leviathan/gui/settings_gui.py
Normal file
255
logline_leviathan/gui/settings_gui.py
Normal file
@ -0,0 +1,255 @@
|
||||
from PyQt5.QtWidgets import QGroupBox, QCheckBox, QLineEdit, QDialog, QTableWidget, QVBoxLayout, QTableWidgetItem, QPushButton, QHBoxLayout, QFileDialog
|
||||
from PyQt5.QtCore import Qt, pyqtSignal
|
||||
from PyQt5.QtGui import QPalette, QColor
|
||||
from logline_leviathan.gui.ui_helper import UIHelper
|
||||
from logline_leviathan.file_processor.file_processor_thread import FileProcessorThread
|
||||
from logline_leviathan.database.database_manager import EntityTypesTable, session_scope
|
||||
import os
|
||||
|
||||
|
||||
class FileSettingsWindow(QDialog):
|
||||
def __init__(self, file_paths, main_window=None):
|
||||
super().__init__(main_window)
|
||||
self.file_paths = file_paths
|
||||
self.main_window = main_window # Reference to MainWindow
|
||||
self.ui_helper = UIHelper(self)
|
||||
self.processing_thread = None
|
||||
self.initUI()
|
||||
|
||||
def initUI(self):
|
||||
self.layout = QVBoxLayout(self)
|
||||
|
||||
# Button layout
|
||||
self.buttonLayout = QHBoxLayout()
|
||||
self.removeSelectedButton = QPushButton("Remove Selected Files")
|
||||
self.removeAllButton = QPushButton("Remove All")
|
||||
self.addFilesButton = QPushButton("Add Files to Selection")
|
||||
self.addDirButton = QPushButton("Add Directory to Selection")
|
||||
self.buttonLayout.addWidget(self.removeSelectedButton)
|
||||
self.buttonLayout.addWidget(self.removeAllButton)
|
||||
self.buttonLayout.addWidget(self.addFilesButton)
|
||||
self.buttonLayout.addWidget(self.addDirButton)
|
||||
|
||||
# Connect buttons to functions
|
||||
self.removeSelectedButton.clicked.connect(self.removeSelected)
|
||||
self.removeAllButton.clicked.connect(self.removeAll)
|
||||
self.addFilesButton.clicked.connect(self.openFileNameDialog)
|
||||
self.addDirButton.clicked.connect(self.openDirNameDialog)
|
||||
|
||||
self.layout.addLayout(self.buttonLayout)
|
||||
|
||||
self.filterLineEdit = QLineEdit()
|
||||
self.filterLineEdit.setPlaceholderText("Filter files...")
|
||||
self.filterLineEdit.textChanged.connect(self.filterTableItems)
|
||||
self.layout.insertWidget(1, self.filterLineEdit) # Inserting QLineEdit above the table
|
||||
|
||||
# Table widget
|
||||
self.tableWidget = QTableWidget()
|
||||
self.tableWidget.setColumnCount(1)
|
||||
self.tableWidget.setHorizontalHeaderLabels(["File Path"])
|
||||
self.layout.addWidget(self.tableWidget)
|
||||
self.populateTable()
|
||||
|
||||
# Close button
|
||||
self.closeButton = QPushButton("Close")
|
||||
self.layout.addWidget(self.closeButton)
|
||||
self.closeButton.clicked.connect(self.close)
|
||||
|
||||
def populateTable(self):
|
||||
self.tableWidget.clearContents()
|
||||
self.tableWidget.setRowCount(len(self.file_paths))
|
||||
for row, file_path in enumerate(self.file_paths):
|
||||
self.tableWidget.setItem(row, 0, QTableWidgetItem(file_path))
|
||||
self.tableWidget.resizeColumnsToContents()
|
||||
|
||||
|
||||
def filterTableItems(self, text):
|
||||
for row in range(self.tableWidget.rowCount()):
|
||||
item = self.tableWidget.item(row, 0)
|
||||
self.tableWidget.setRowHidden(row, text.lower() not in item.text().lower())
|
||||
|
||||
def removeSelected(self):
|
||||
if self.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
# Get the selected rows from the table
|
||||
selected_rows = self.tableWidget.selectionModel().selectedRows()
|
||||
|
||||
# Extract file paths from the selected rows
|
||||
selected_files = [self.tableWidget.item(row.row(), 0).text() for row in selected_rows]
|
||||
|
||||
# Remove each selected file
|
||||
for file_path in selected_files:
|
||||
if file_path in self.file_paths:
|
||||
self.file_paths.remove(file_path) # Remove from local file_paths list
|
||||
self.main_window.removeSingleFile(file_path) # Call method in main_window
|
||||
|
||||
# Refresh the table and update the file count
|
||||
self.populateTable()
|
||||
self.main_window.updateFileCountLabel()
|
||||
|
||||
|
||||
def removeAll(self):
|
||||
if self.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
self.main_window.clearFileSelection()
|
||||
self.main_window.updateFileCountLabel()
|
||||
|
||||
|
||||
def openFileNameDialog(self):
|
||||
if self.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
self.main_window.openFileNameDialog()
|
||||
self.main_window.updateFileCountLabel()
|
||||
|
||||
|
||||
def openDirNameDialog(self):
|
||||
if self.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
self.main_window.openDirNameDialog()
|
||||
self.main_window.updateFileCountLabel()
|
||||
|
||||
def isProcessing(self):
|
||||
return self.processing_thread and self.processing_thread.isRunning()
|
||||
|
||||
class AnalysisSettingsWindow(QDialog):
|
||||
parsersUpdated = pyqtSignal()
|
||||
def __init__(self, main_window=None):
|
||||
super().__init__(main_window)
|
||||
self.main_window = main_window
|
||||
self.ui_helper = UIHelper(self)
|
||||
|
||||
self.initUI()
|
||||
|
||||
def initUI(self):
|
||||
self.layout = QHBoxLayout(self)
|
||||
|
||||
# Create the QGroupBox
|
||||
self.parserSettingsGroupBox = QGroupBox("Parser Settings")
|
||||
self.parserSettingsGroupBoxLayout = QVBoxLayout()
|
||||
self.toggleAllButtonsLayout = QHBoxLayout()
|
||||
self.parserSettingsGroupBoxLayout.addLayout(self.toggleAllButtonsLayout)
|
||||
self.enableAllButton = QPushButton("Enable All")
|
||||
self.disableAllButton = QPushButton("Disable All")
|
||||
self.toggleAllButtonsLayout.addWidget(self.enableAllButton)
|
||||
self.enableAllButton.clicked.connect(self.enableAllCheckboxes)
|
||||
self.toggleAllButtonsLayout.addWidget(self.disableAllButton)
|
||||
self.disableAllButton.clicked.connect(self.disableAllCheckboxes)
|
||||
self.parserSettingsGroupBox.setLayout(self.parserSettingsGroupBoxLayout)
|
||||
self.layout.addWidget(self.parserSettingsGroupBox)
|
||||
|
||||
# Populate the QGroupBox
|
||||
self.populateGroupBox()
|
||||
|
||||
self.configDirectoriesGroupBox = QGroupBox("Config Directories")
|
||||
self.configDirectoriesGroupBoxLayout = QVBoxLayout()
|
||||
self.configDirectoriesGroupBox.setLayout(self.configDirectoriesGroupBoxLayout)
|
||||
self.configDirectoriesGroupBoxLayout.setAlignment(Qt.AlignTop)
|
||||
self.layout.addWidget(self.configDirectoriesGroupBox)
|
||||
# Inspect Regex Button
|
||||
self.inspectRegexButton = QPushButton('YAML-Konfigurationsdatei inspizieren...', self)
|
||||
self.inspectRegexButton.clicked.connect(self.openRegexLibrary)
|
||||
self.configDirectoriesGroupBoxLayout.addWidget(self.inspectRegexButton)
|
||||
|
||||
self.openWordlistPathButton = QPushButton('Wordlist-Verzeichnis oeffnen...', self)
|
||||
self.openWordlistPathButton.clicked.connect(self.openWordlistPath)
|
||||
self.configDirectoriesGroupBoxLayout.addWidget(self.openWordlistPathButton)
|
||||
|
||||
self.openScriptsPathButton = QPushButton('Scripts-Verzeichnis oeffnen...', self)
|
||||
self.openScriptsPathButton.clicked.connect(self.openScriptsPath)
|
||||
self.configDirectoriesGroupBoxLayout.addWidget(self.openScriptsPathButton)
|
||||
|
||||
|
||||
|
||||
# Close Button
|
||||
self.closeButton = QPushButton("Close")
|
||||
self.layout.addWidget(self.closeButton)
|
||||
self.closeButton.clicked.connect(self.close)
|
||||
|
||||
def populateGroupBox(self):
|
||||
self.parserSettingsGroupBoxLayout.setAlignment(Qt.AlignTop)
|
||||
with session_scope() as db_session:
|
||||
entity_types = db_session.query(EntityTypesTable).all()
|
||||
|
||||
self.checkboxes = []
|
||||
|
||||
for et in entity_types:
|
||||
# Omit entries that start with "category_"
|
||||
if et.entity_type.startswith("category_"):
|
||||
continue
|
||||
|
||||
parser_info = []
|
||||
if et.regex_pattern:
|
||||
parser_info.append("regex")
|
||||
if et.script_parser:
|
||||
parser_info.append("script")
|
||||
|
||||
checkBoxText = f"{et.gui_name} ({', '.join(parser_info)})" if parser_info else et.gui_name
|
||||
checkBox = QCheckBox(checkBoxText)
|
||||
|
||||
# Fetch the current state of parser_enabled from the database for each entity type
|
||||
current_parser_enabled = db_session.query(EntityTypesTable).filter(EntityTypesTable.entity_type_id == et.entity_type_id).first().parser_enabled
|
||||
|
||||
checkBox.setChecked(current_parser_enabled)
|
||||
color = 'green' if current_parser_enabled else 'red'
|
||||
checkBox.setStyleSheet(f"QCheckBox {{ color: {color}; }}")
|
||||
|
||||
self.parserSettingsGroupBoxLayout.addWidget(checkBox)
|
||||
self.checkboxes.append((checkBox, et.entity_type_id))
|
||||
checkBox.toggled.connect(lambda checked, et_id=et.entity_type_id: self.updateParserEnabled(checked, et_id))
|
||||
|
||||
|
||||
def enableAllCheckboxes(self):
|
||||
for checkBox, _ in self.checkboxes:
|
||||
checkBox.setChecked(True)
|
||||
|
||||
def disableAllCheckboxes(self):
|
||||
for checkBox, _ in self.checkboxes:
|
||||
checkBox.setChecked(False)
|
||||
|
||||
def updateParserEnabled(self, checked, entity_type_id):
|
||||
with session_scope() as db_session: # Using a session context manager
|
||||
et = db_session.query(EntityTypesTable).filter_by(entity_type_id=entity_type_id).first()
|
||||
if et:
|
||||
et.parser_enabled = checked
|
||||
db_session.commit() # Commit changes
|
||||
if self.main_window:
|
||||
self.main_window.refreshApplicationState() # Refresh the UI state
|
||||
self.refreshGroupBox() # Refresh the group box to reflect changes
|
||||
|
||||
|
||||
def refreshGroupBox(self):
|
||||
# Clear and repopulate the group box
|
||||
for checkBox, _ in self.checkboxes:
|
||||
checkBox.deleteLater() # Properly delete the checkbox
|
||||
self.populateGroupBox()
|
||||
self.parsersUpdated.emit()
|
||||
|
||||
def openWordlistPath(self):
|
||||
wordlist_path = os.path.join(os.getcwd(), 'data', 'wordlist')
|
||||
if os.path.exists(wordlist_path):
|
||||
self.ui_helper.openFile(wordlist_path) # Call openFile method on the UIHelper instance
|
||||
else:
|
||||
#self.statusLabel.setText(" wordlist nicht unter ./daten/ gefunden.")
|
||||
pass
|
||||
|
||||
def openRegexLibrary(self):
|
||||
path_to_yaml = os.path.join(os.getcwd(), 'data', 'entities.yaml')
|
||||
if os.path.exists(path_to_yaml):
|
||||
self.ui_helper.openFile(path_to_yaml) # Call openFile method on the UIHelper instance
|
||||
else:
|
||||
#self.statusLabel.setText(" entities.yaml nicht unter ./daten/ gefunden.")
|
||||
pass
|
||||
|
||||
def openScriptsPath(self):
|
||||
scripts_path = os.path.join(os.getcwd(), 'data', 'parser')
|
||||
if os.path.exists(scripts_path):
|
||||
self.ui_helper.openFile(scripts_path) # Call openFile method on the UIHelper instance
|
||||
else:
|
||||
#self.statusLabel.setText(" scripts nicht unter ./daten/ gefunden.")
|
||||
pass
|
||||
|
||||
|
119
logline_leviathan/gui/ui_helper.py
Normal file
119
logline_leviathan/gui/ui_helper.py
Normal file
@ -0,0 +1,119 @@
|
||||
from PyQt5.QtWidgets import QFileDialog
|
||||
from PyQt5.QtGui import QDesktopServices
|
||||
from PyQt5.QtCore import QUrl
|
||||
import os
|
||||
import math
|
||||
import logging
|
||||
import csv
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
class UIHelper():
|
||||
def __init__(self, main_window):
|
||||
self.main_window = main_window
|
||||
|
||||
def openFileNameDialog(self):
|
||||
if self.main_window.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
options = QFileDialog.Options()
|
||||
files, _ = QFileDialog.getOpenFileNames(self.main_window, "Dateien selektieren", "", "All Files (*)", options=options)
|
||||
if files:
|
||||
for file in files:
|
||||
if file not in self.main_window.filePaths:
|
||||
self.main_window.filePaths.append(file)
|
||||
self.main_window.updateFileCountLabel()
|
||||
|
||||
|
||||
|
||||
def openDirNameDialog(self):
|
||||
if self.main_window.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
|
||||
options = QFileDialog.Options()
|
||||
options |= QFileDialog.DontUseNativeDialog
|
||||
fileDialog = QFileDialog(self.main_window, "Ordner selektieren", "", options=options)
|
||||
fileDialog.setFileMode(QFileDialog.Directory)
|
||||
fileDialog.setOption(QFileDialog.ShowDirsOnly, True)
|
||||
fileDialog.setOption(QFileDialog.DontResolveSymlinks, True)
|
||||
|
||||
# Store previously selected directories
|
||||
selected_directories = []
|
||||
|
||||
while True:
|
||||
if fileDialog.exec_() == QFileDialog.Accepted:
|
||||
directory = fileDialog.selectedFiles()[0]
|
||||
if directory and directory not in selected_directories:
|
||||
selected_directories.append(directory)
|
||||
self.addAllFilesFromDirectory(directory)
|
||||
else:
|
||||
break # Exit loop if user cancels
|
||||
self.main_window.updateFileCountLabel()
|
||||
|
||||
def calculate_total_size(self, file_paths):
|
||||
total_size = sum(os.path.getsize(f) for f in file_paths if os.path.exists(f))
|
||||
return self.format_size(total_size)
|
||||
|
||||
def format_size(self, size_bytes):
|
||||
if size_bytes == 0:
|
||||
return "0B"
|
||||
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
|
||||
i = int(math.floor(math.log(size_bytes, 1024)))
|
||||
p = math.pow(1024, i)
|
||||
s = round(size_bytes / p, 2)
|
||||
return f"{s} {size_name[i]}"
|
||||
|
||||
def addAllFilesFromDirectory(self, directory):
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
if file_path not in self.main_window.filePaths:
|
||||
self.main_window.filePaths.append(file_path)
|
||||
|
||||
|
||||
|
||||
def clearFileSelection(self):
|
||||
if self.main_window.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
self.main_window.filePaths.clear()
|
||||
self.main_window.updateFileCountLabel()
|
||||
self.main_window.fileCountLabel.setText(' Keine Dateien selektiert')
|
||||
|
||||
def removeSingleFile(self, file):
|
||||
if self.main_window.isProcessing():
|
||||
self.main_window.showProcessingWarning()
|
||||
return
|
||||
if self.main_window.filePaths:
|
||||
# Remove the file by value
|
||||
if file in self.main_window.filePaths:
|
||||
self.main_window.filePaths.remove(file)
|
||||
self.main_window.updateFileCountLabel()
|
||||
|
||||
|
||||
def generate_files_log(self, file_path, files_list):
|
||||
try:
|
||||
with open(file_path, mode='w', newline='', encoding='utf-8') as file:
|
||||
writer = csv.writer(file)
|
||||
for file in files_list:
|
||||
writer.writerow([file])
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating log file {file_path}: {e}")
|
||||
|
||||
def openFile(self, file_path):
|
||||
if sys.platform == 'win32':
|
||||
os.startfile(file_path)
|
||||
elif sys.platform == 'darwin': # macOS
|
||||
subprocess.Popen(['open', file_path])
|
||||
else: # Linux and other Unix-like systems
|
||||
subprocess.Popen(['xdg-open', file_path])
|
||||
|
||||
def format_time(seconds):
|
||||
if seconds != seconds or seconds == float('inf'): # Check for NaN and inf
|
||||
return "N/A"
|
||||
|
||||
minutes = int(seconds // 60)
|
||||
seconds = int(seconds % 60)
|
||||
return f"{minutes} min {seconds} sec"
|
||||
|
4
logline_leviathan/gui/versionvars.py
Normal file
4
logline_leviathan/gui/versionvars.py
Normal file
@ -0,0 +1,4 @@
|
||||
repo_link = "https://cloud.mikoshi.de/call/qhtkcnmn#/"
|
||||
repo_link_text = "Feedback // Support (öffnet externen Link)"
|
||||
version_string = "2024-02-08 - Version: 0.4.4 // TESTING // UPDATE REGULARLY"
|
||||
loglevel = "INFO"
|
0
logline_leviathan/plugins/flag_setter.py
Normal file
0
logline_leviathan/plugins/flag_setter.py
Normal file
13
requirements.txt
Normal file
13
requirements.txt
Normal file
@ -0,0 +1,13 @@
|
||||
sqlalchemy
|
||||
pyYAML
|
||||
PyQt5
|
||||
odfpy
|
||||
pandas
|
||||
python-magic
|
||||
openpyxl
|
||||
PyMuPDF
|
||||
tldextract
|
||||
fuzzywuzzy
|
||||
python-Levenshtein
|
||||
phonenumbers
|
||||
python-docx
|
Loading…
x
Reference in New Issue
Block a user