nginx_to_jsonl.py hinzugefügt
This commit is contained in:
		
							parent
							
								
									f8083beb9d
								
							
						
					
					
						commit
						6f008a18af
					
				
							
								
								
									
										101
									
								
								nginx_to_jsonl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								nginx_to_jsonl.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,101 @@
 | 
				
			|||||||
 | 
					import re, json, datetime, gzip, glob, os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def convert_logs_to_jsonl():
 | 
				
			||||||
 | 
					    # Define log types and their patterns
 | 
				
			||||||
 | 
					    log_types = {
 | 
				
			||||||
 | 
					        'access': {
 | 
				
			||||||
 | 
					            'pattern': 'access.log*',
 | 
				
			||||||
 | 
					            'output': 'timesketch_access.jsonl',
 | 
				
			||||||
 | 
					            'desc': 'HTTP Request Time'
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'error': {
 | 
				
			||||||
 | 
					            'pattern': 'error.log*', 
 | 
				
			||||||
 | 
					            'output': 'timesketch_error.jsonl',
 | 
				
			||||||
 | 
					            'desc': 'Error Event Time'
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'redirect': {
 | 
				
			||||||
 | 
					            'pattern': 'redirect-access.log*',
 | 
				
			||||||
 | 
					            'output': 'timesketch_redirect.jsonl', 
 | 
				
			||||||
 | 
					            'desc': 'Redirect Request Time'
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    # Regex pattern for nginx logs
 | 
				
			||||||
 | 
					    pattern = r'(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\d+) (\S+) "([^"]*)" "([^"]*)"(?:\s+"([^"]*)")?'
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for log_type, config in log_types.items():
 | 
				
			||||||
 | 
					        files = sorted(glob.glob(config['pattern']))
 | 
				
			||||||
 | 
					        if not files:
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					        print(f"Processing {len(files)} {log_type} files...")
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        with open(config['output'], 'w') as out:
 | 
				
			||||||
 | 
					            for log_file in files:
 | 
				
			||||||
 | 
					                print(f"  Processing: {log_file}")
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                # Handle compressed files
 | 
				
			||||||
 | 
					                if log_file.endswith('.gz'):
 | 
				
			||||||
 | 
					                    opener = gzip.open
 | 
				
			||||||
 | 
					                    mode = 'rt'
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    opener = open
 | 
				
			||||||
 | 
					                    mode = 'r'
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    with opener(log_file, mode) as f:
 | 
				
			||||||
 | 
					                        for line_num, line in enumerate(f):
 | 
				
			||||||
 | 
					                            match = re.match(pattern, line.strip())
 | 
				
			||||||
 | 
					                            if match:
 | 
				
			||||||
 | 
					                                groups = match.groups()
 | 
				
			||||||
 | 
					                                ip = groups[0]
 | 
				
			||||||
 | 
					                                remote_ident = groups[1] if groups[1] != '-' else None
 | 
				
			||||||
 | 
					                                remote_user = groups[2] if groups[2] != '-' else None
 | 
				
			||||||
 | 
					                                timestamp = groups[3]
 | 
				
			||||||
 | 
					                                request = groups[4]
 | 
				
			||||||
 | 
					                                status = groups[5]
 | 
				
			||||||
 | 
					                                size = groups[6]
 | 
				
			||||||
 | 
					                                referer = groups[7] if groups[7] != '-' else None
 | 
				
			||||||
 | 
					                                user_agent = groups[8]
 | 
				
			||||||
 | 
					                                additional = groups[9] if len(groups) > 9 and groups[9] else None
 | 
				
			||||||
 | 
					                                
 | 
				
			||||||
 | 
					                                # Parse HTTP request
 | 
				
			||||||
 | 
					                                request_parts = request.split(' ')
 | 
				
			||||||
 | 
					                                method = request_parts[0] if len(request_parts) > 0 else None
 | 
				
			||||||
 | 
					                                uri = request_parts[1] if len(request_parts) > 1 else None
 | 
				
			||||||
 | 
					                                protocol = request_parts[2] if len(request_parts) > 2 else None
 | 
				
			||||||
 | 
					                                
 | 
				
			||||||
 | 
					                                # Convert timestamp
 | 
				
			||||||
 | 
					                                dt = datetime.datetime.strptime(timestamp, '%d/%b/%Y:%H:%M:%S %z')
 | 
				
			||||||
 | 
					                                
 | 
				
			||||||
 | 
					                                event = {
 | 
				
			||||||
 | 
					                                    "message": line.strip(),
 | 
				
			||||||
 | 
					                                    "datetime": dt.isoformat(),
 | 
				
			||||||
 | 
					                                    "timestamp_desc": config['desc'],
 | 
				
			||||||
 | 
					                                    "source_file": log_file,
 | 
				
			||||||
 | 
					                                    "log_type": log_type,
 | 
				
			||||||
 | 
					                                    "source_ip": ip,
 | 
				
			||||||
 | 
					                                    "remote_ident": remote_ident,
 | 
				
			||||||
 | 
					                                    "remote_user": remote_user,
 | 
				
			||||||
 | 
					                                    "http_method": method,
 | 
				
			||||||
 | 
					                                    "http_uri": uri,
 | 
				
			||||||
 | 
					                                    "http_protocol": protocol,
 | 
				
			||||||
 | 
					                                    "http_request_full": request,
 | 
				
			||||||
 | 
					                                    "status_code": int(status),
 | 
				
			||||||
 | 
					                                    "response_size": int(size) if size.isdigit() else 0,
 | 
				
			||||||
 | 
					                                    "referer": referer,
 | 
				
			||||||
 | 
					                                    "user_agent": user_agent,
 | 
				
			||||||
 | 
					                                    "additional_field": additional,
 | 
				
			||||||
 | 
					                                    "data_type": f"web:{log_type}:log"
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                                
 | 
				
			||||||
 | 
					                                # Remove None values
 | 
				
			||||||
 | 
					                                event = {k: v for k, v in event.items() if v is not None}
 | 
				
			||||||
 | 
					                                out.write(json.dumps(event) + '\n')
 | 
				
			||||||
 | 
					                                
 | 
				
			||||||
 | 
					                except Exception as e:
 | 
				
			||||||
 | 
					                    print(f"    Error processing {log_file}: {e}")
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        print(f"  Output: {config['output']}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					convert_logs_to_jsonl()
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user