2025-09-03 13:20:23 +02:00

137 lines
6.4 KiB
Python

import logging
import os
from logline_leviathan.database.database_manager import FileMetadata, DistinctEntitiesTable, EntitiesTable, ContextTable, session_scope
from datetime import datetime
def handle_file_metadata(db_session, file_path, file_mimetype, sheet_name=None):
#with session_scope() as db_session:
try:
# Construct file name with or without sheet name
base_file_name = os.path.basename(file_path)
modified_file_name = f"{base_file_name}_{sheet_name}" if sheet_name else base_file_name
# Search for existing metadata using the modified file name
file_metadata = db_session.query(FileMetadata).filter_by(file_path=file_path, file_name=modified_file_name).first()
if not file_metadata:
logging.debug(f"File metadata {file_metadata} does not exist.")
file_metadata = FileMetadata(file_name=modified_file_name, file_path=file_path, file_mimetype=file_mimetype)
db_session.add(file_metadata)
else:
logging.debug(f"File metadata {file_metadata} already exists.")
# Update the MIME type if the record already exists
file_metadata.file_mimetype = file_mimetype
logging.debug(f"Updated file mimetype: {file_metadata.file_mimetype}")
logging.debug(f"committing file metadata {file_metadata}")
db_session.commit()
return file_metadata
except Exception as e:
logging.error(f"Error handling file metadata for {file_path}: {e}")
return None
def handle_distinct_entity(db_session, match_text, entity_type_id):
#with session_scope() as db_session:
try:
distinct_entity = db_session.query(DistinctEntitiesTable).filter_by(distinct_entity=match_text, entity_types_id=entity_type_id).first()
if not distinct_entity:
logging.debug(f"Distinct entity {match_text} does not exist.")
distinct_entity = DistinctEntitiesTable(distinct_entity=match_text, entity_types_id=entity_type_id)
db_session.add(distinct_entity)
logging.debug(f"committing distinct entity {distinct_entity}")
db_session.commit()
else:
logging.debug(f"Distinct entity {distinct_entity} already exists.")
return distinct_entity
except Exception as e:
logging.error(f"Error handling distinct entity {match_text}: {e}")
return None
def handle_individual_entity(db_session, entity, file_metadata, line_number, timestamp, entity_types_id, abort_flag, thread_instance):
#with session_scope() as db_session:
try:
if abort_flag():
return None
if timestamp and isinstance(timestamp, str):
try:
timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
except ValueError:
logging.warning(f"Invalid timestamp format: {timestamp}")
timestamp = None
individual_entity = db_session.query(EntitiesTable).filter_by(
distinct_entities_id=entity.distinct_entities_id,
file_id=file_metadata.file_id,
line_number=line_number
).first()
if not individual_entity:
logging.debug(f"Individual entity {individual_entity} does not exist.")
individual_entity = EntitiesTable(
distinct_entities_id=entity.distinct_entities_id,
file_id=file_metadata.file_id,
line_number=line_number,
entry_timestamp=timestamp,
entity_types_id=entity_types_id
)
db_session.add(individual_entity)
logging.debug(f"committing individual entity {individual_entity}")
db_session.commit()
thread_instance.total_entities_count_lock.lock() # Lock the mutex
try:
thread_instance.total_entities_count += 1
finally:
thread_instance.total_entities_count_lock.unlock() # Unlock the mutex
thread_instance.calculate_and_emit_rate()
else:
logging.debug(f"Individual entity {individual_entity} already exists.")
return individual_entity
except Exception as e:
logging.error(f"Error handling individual entity in {file_metadata.file_path}, line {line_number}: {e}")
return None
#def count_newlines(content, start, end):
# return content[start:end].count('\n')
def handle_context_snippet(db_session, individual_entity, content, start_line, end_line):
#with session_scope() as db_session:
try:
context_sizes = {
'Kontext - gleiche Zeile': 0,
'Kontext - mittelgroß': 8,
'Kontext - umfangreich': 15
#'Index Context': 30
}
context_snippets = {}
for size, lines in context_sizes.items():
context_start = max(0, start_line - lines)
context_end = min(len(content), end_line + lines + 1)
context_snippets[size] = "\n".join(content[context_start:context_end])
# Check if a similar context already exists
existing_context = db_session.query(ContextTable).filter_by(entities_id=individual_entity.entities_id).first()
if not existing_context:
context = ContextTable(entities_id=individual_entity.entities_id,
context_small=context_snippets['Kontext - gleiche Zeile'],
context_medium=context_snippets['Kontext - mittelgroß'],
context_large=context_snippets['Kontext - umfangreich']
)
db_session.add(context)
logging.debug(f"committing context {context}")
db_session.commit()
else:
logging.debug(f"Existing context {existing_context} already exists.")
except Exception as e:
logging.error(f"Error handling context snippet: {e}")