import logging import os from logline_leviathan.database.database_manager import FileMetadata, DistinctEntitiesTable, EntitiesTable, ContextTable, session_scope from datetime import datetime def handle_file_metadata(db_session, file_path, file_mimetype, sheet_name=None): #with session_scope() as db_session: try: # Construct file name with or without sheet name base_file_name = os.path.basename(file_path) modified_file_name = f"{base_file_name}_{sheet_name}" if sheet_name else base_file_name # Search for existing metadata using the modified file name file_metadata = db_session.query(FileMetadata).filter_by(file_path=file_path, file_name=modified_file_name).first() if not file_metadata: logging.debug(f"File metadata {file_metadata} does not exist.") file_metadata = FileMetadata(file_name=modified_file_name, file_path=file_path, file_mimetype=file_mimetype) db_session.add(file_metadata) else: logging.debug(f"File metadata {file_metadata} already exists.") # Update the MIME type if the record already exists file_metadata.file_mimetype = file_mimetype logging.debug(f"Updated file mimetype: {file_metadata.file_mimetype}") logging.debug(f"committing file metadata {file_metadata}") db_session.commit() return file_metadata except Exception as e: logging.error(f"Error handling file metadata for {file_path}: {e}") return None def handle_distinct_entity(db_session, match_text, entity_type_id): #with session_scope() as db_session: try: distinct_entity = db_session.query(DistinctEntitiesTable).filter_by(distinct_entity=match_text, entity_types_id=entity_type_id).first() if not distinct_entity: logging.debug(f"Distinct entity {match_text} does not exist.") distinct_entity = DistinctEntitiesTable(distinct_entity=match_text, entity_types_id=entity_type_id) db_session.add(distinct_entity) logging.debug(f"committing distinct entity {distinct_entity}") db_session.commit() else: logging.debug(f"Distinct entity {distinct_entity} already exists.") return distinct_entity except Exception as e: logging.error(f"Error handling distinct entity {match_text}: {e}") return None def handle_individual_entity(db_session, entity, file_metadata, line_number, timestamp, entity_types_id, abort_flag, thread_instance): #with session_scope() as db_session: try: if abort_flag(): return None if timestamp and isinstance(timestamp, str): try: timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S') except ValueError: logging.warning(f"Invalid timestamp format: {timestamp}") timestamp = None individual_entity = db_session.query(EntitiesTable).filter_by( distinct_entities_id=entity.distinct_entities_id, file_id=file_metadata.file_id, line_number=line_number ).first() if not individual_entity: logging.debug(f"Individual entity {individual_entity} does not exist.") individual_entity = EntitiesTable( distinct_entities_id=entity.distinct_entities_id, file_id=file_metadata.file_id, line_number=line_number, entry_timestamp=timestamp, entity_types_id=entity_types_id ) db_session.add(individual_entity) logging.debug(f"committing individual entity {individual_entity}") db_session.commit() thread_instance.total_entities_count_lock.lock() # Lock the mutex try: thread_instance.total_entities_count += 1 finally: thread_instance.total_entities_count_lock.unlock() # Unlock the mutex thread_instance.calculate_and_emit_rate() else: logging.debug(f"Individual entity {individual_entity} already exists.") return individual_entity except Exception as e: logging.error(f"Error handling individual entity in {file_metadata.file_path}, line {line_number}: {e}") return None #def count_newlines(content, start, end): # return content[start:end].count('\n') def handle_context_snippet(db_session, individual_entity, content, start_line, end_line): #with session_scope() as db_session: try: context_sizes = { 'Kontext - gleiche Zeile': 0, 'Kontext - mittelgroß': 8, 'Kontext - umfangreich': 15 #'Index Context': 30 } context_snippets = {} for size, lines in context_sizes.items(): context_start = max(0, start_line - lines) context_end = min(len(content), end_line + lines + 1) context_snippets[size] = "\n".join(content[context_start:context_end]) # Check if a similar context already exists existing_context = db_session.query(ContextTable).filter_by(entities_id=individual_entity.entities_id).first() if not existing_context: context = ContextTable(entities_id=individual_entity.entities_id, context_small=context_snippets['Kontext - gleiche Zeile'], context_medium=context_snippets['Kontext - mittelgroß'], context_large=context_snippets['Kontext - umfangreich'] ) db_session.add(context) logging.debug(f"committing context {context}") db_session.commit() else: logging.debug(f"Existing context {existing_context} already exists.") except Exception as e: logging.error(f"Error handling context snippet: {e}")