137 lines
6.4 KiB
Python
137 lines
6.4 KiB
Python
import logging
|
|
import os
|
|
from logline_leviathan.database.database_manager import FileMetadata, DistinctEntitiesTable, EntitiesTable, ContextTable, session_scope
|
|
from datetime import datetime
|
|
|
|
|
|
def handle_file_metadata(db_session, file_path, file_mimetype, sheet_name=None):
|
|
#with session_scope() as db_session:
|
|
try:
|
|
# Construct file name with or without sheet name
|
|
base_file_name = os.path.basename(file_path)
|
|
modified_file_name = f"{base_file_name}_{sheet_name}" if sheet_name else base_file_name
|
|
|
|
# Search for existing metadata using the modified file name
|
|
file_metadata = db_session.query(FileMetadata).filter_by(file_path=file_path, file_name=modified_file_name).first()
|
|
|
|
if not file_metadata:
|
|
logging.debug(f"File metadata {file_metadata} does not exist.")
|
|
file_metadata = FileMetadata(file_name=modified_file_name, file_path=file_path, file_mimetype=file_mimetype)
|
|
db_session.add(file_metadata)
|
|
else:
|
|
logging.debug(f"File metadata {file_metadata} already exists.")
|
|
# Update the MIME type if the record already exists
|
|
file_metadata.file_mimetype = file_mimetype
|
|
logging.debug(f"Updated file mimetype: {file_metadata.file_mimetype}")
|
|
logging.debug(f"committing file metadata {file_metadata}")
|
|
db_session.commit()
|
|
return file_metadata
|
|
except Exception as e:
|
|
logging.error(f"Error handling file metadata for {file_path}: {e}")
|
|
return None
|
|
|
|
|
|
|
|
def handle_distinct_entity(db_session, match_text, entity_type_id):
|
|
#with session_scope() as db_session:
|
|
try:
|
|
distinct_entity = db_session.query(DistinctEntitiesTable).filter_by(distinct_entity=match_text, entity_types_id=entity_type_id).first()
|
|
if not distinct_entity:
|
|
logging.debug(f"Distinct entity {match_text} does not exist.")
|
|
distinct_entity = DistinctEntitiesTable(distinct_entity=match_text, entity_types_id=entity_type_id)
|
|
db_session.add(distinct_entity)
|
|
logging.debug(f"committing distinct entity {distinct_entity}")
|
|
db_session.commit()
|
|
else:
|
|
logging.debug(f"Distinct entity {distinct_entity} already exists.")
|
|
|
|
return distinct_entity
|
|
except Exception as e:
|
|
logging.error(f"Error handling distinct entity {match_text}: {e}")
|
|
return None
|
|
|
|
|
|
|
|
def handle_individual_entity(db_session, entity, file_metadata, line_number, timestamp, entity_types_id, abort_flag, thread_instance):
|
|
#with session_scope() as db_session:
|
|
try:
|
|
if abort_flag():
|
|
return None
|
|
if timestamp and isinstance(timestamp, str):
|
|
try:
|
|
timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
|
|
except ValueError:
|
|
logging.warning(f"Invalid timestamp format: {timestamp}")
|
|
timestamp = None
|
|
|
|
individual_entity = db_session.query(EntitiesTable).filter_by(
|
|
distinct_entities_id=entity.distinct_entities_id,
|
|
file_id=file_metadata.file_id,
|
|
line_number=line_number
|
|
).first()
|
|
|
|
if not individual_entity:
|
|
logging.debug(f"Individual entity {individual_entity} does not exist.")
|
|
individual_entity = EntitiesTable(
|
|
distinct_entities_id=entity.distinct_entities_id,
|
|
file_id=file_metadata.file_id,
|
|
line_number=line_number,
|
|
entry_timestamp=timestamp,
|
|
entity_types_id=entity_types_id
|
|
)
|
|
db_session.add(individual_entity)
|
|
logging.debug(f"committing individual entity {individual_entity}")
|
|
db_session.commit()
|
|
|
|
thread_instance.total_entities_count_lock.lock() # Lock the mutex
|
|
try:
|
|
thread_instance.total_entities_count += 1
|
|
finally:
|
|
thread_instance.total_entities_count_lock.unlock() # Unlock the mutex
|
|
|
|
thread_instance.calculate_and_emit_rate()
|
|
else:
|
|
logging.debug(f"Individual entity {individual_entity} already exists.")
|
|
|
|
return individual_entity
|
|
except Exception as e:
|
|
logging.error(f"Error handling individual entity in {file_metadata.file_path}, line {line_number}: {e}")
|
|
return None
|
|
|
|
|
|
#def count_newlines(content, start, end):
|
|
# return content[start:end].count('\n')
|
|
|
|
def handle_context_snippet(db_session, individual_entity, content, start_line, end_line):
|
|
#with session_scope() as db_session:
|
|
try:
|
|
context_sizes = {
|
|
'Kontext - gleiche Zeile': 0,
|
|
'Kontext - mittelgroß': 8,
|
|
'Kontext - umfangreich': 15
|
|
#'Index Context': 30
|
|
}
|
|
|
|
context_snippets = {}
|
|
for size, lines in context_sizes.items():
|
|
context_start = max(0, start_line - lines)
|
|
context_end = min(len(content), end_line + lines + 1)
|
|
context_snippets[size] = "\n".join(content[context_start:context_end])
|
|
|
|
# Check if a similar context already exists
|
|
existing_context = db_session.query(ContextTable).filter_by(entities_id=individual_entity.entities_id).first()
|
|
if not existing_context:
|
|
context = ContextTable(entities_id=individual_entity.entities_id,
|
|
context_small=context_snippets['Kontext - gleiche Zeile'],
|
|
context_medium=context_snippets['Kontext - mittelgroß'],
|
|
context_large=context_snippets['Kontext - umfangreich']
|
|
)
|
|
db_session.add(context)
|
|
logging.debug(f"committing context {context}")
|
|
db_session.commit()
|
|
else:
|
|
logging.debug(f"Existing context {existing_context} already exists.")
|
|
except Exception as e:
|
|
logging.error(f"Error handling context snippet: {e}")
|
|
|