from logline_leviathan.exporter.export_constructor import generate_dataframe import re import pandas as pd def create_regex_pattern_from_entity(entity): words = entity.split() regex_pattern = "|".join(re.escape(word) for word in words) return re.compile(regex_pattern, re.IGNORECASE) def highlight_entities_in_context(context, entity_regex): def replace_match(match): return f"{match.group()}" return re.sub(entity_regex, replace_match, context) def generate_html_file(output_file_path, db_session, checkboxes, files, context_selection, only_crossmatches, start_date=None, end_date=None, include_flagged=False, only_flagged=False, only_unflagged=False): # Fetch data using the new DataFrame constructor df = generate_dataframe(db_session, checkboxes, files, context_selection, only_crossmatches, start_date, end_date, include_flagged, only_flagged, only_unflagged) # Add line breaks for HTML formatting where needed if context_selection == 'Kompakte Zusammenfassung ohne Kontext': df['Sources'] = df['Sources'].apply(lambda x: x.replace(' // ', ' //
')) df['Timestamp'] = df['Timestamp'].apply(lambda x: x.replace(' // ', ' //
')) # Iterate over the DataFrame to apply regex-based highlighting for index, row in df.iterrows(): entity_regex = create_regex_pattern_from_entity(row['Entity']) df.at[index, 'Context'] = highlight_entities_in_context(row['Context'], entity_regex) # Replace newline characters with HTML line breaks in the 'Context' column df['Context'] = df['Context'].apply(lambda x: x.replace('\n', '
') if x else x) # Convert DataFrame to HTML table html_table = df.to_html(classes="table table-bordered", escape=False, index=False) html_template = f""" Logline Leviathan Report {html_table} """ # Write the HTML template to the file with open(output_file_path, 'w', encoding='utf-8') as file: file.write(html_template)