fixes to semeionSearchArtifact
This commit is contained in:
@@ -6,7 +6,33 @@
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
#
|
||||
|
||||
# from .llm import LLMClient
|
||||
# from .qdrant import QdrantClient
|
||||
from .semeionArtifact import SemeionArtifact, Actors, ActorsRole, Content, ChunkInfo, ContextGroup, ContextGroupType, Location, Ingestion, ArtifactClass
|
||||
from .semeionSearchObject import SemeionSearchObject
|
||||
from .source_specific_models import AuthenticationEventMetadata, BrowserEventMetadata, DocumentMetadata, EmailMetadata, FileEventMetadata, MessageMetadata, NetworkEventMetadata, ProcessEventMetadata, RegistryEventMetadata, ScheduledTaskMetadata, SystemEventMetadata
|
||||
|
||||
__all__ = []
|
||||
__all__ = ["SemeionArtifact",
|
||||
"Actors",
|
||||
"ActorsRole",
|
||||
"Content",
|
||||
"ChunkInfo",
|
||||
"ContextGroup",
|
||||
"ContextGroupType",
|
||||
"Location",
|
||||
"Ingestion",
|
||||
"ArtifactClass",
|
||||
"ContextGroupType",
|
||||
"Location",
|
||||
"Ingestion",
|
||||
"ArtifactClass",
|
||||
"SemeionSearchObject",
|
||||
"AuthenticationEventMetadata",
|
||||
"BrowserEventMetadata",
|
||||
"DocumentMetadata",
|
||||
"EmailMetadata",
|
||||
"FileEventMetadata",
|
||||
"MessageMetadata",
|
||||
"NetworkEventMetadata",
|
||||
"ProcessEventMetadata",
|
||||
"RegistryEventMetadata",
|
||||
"ScheduledTaskMetadata",
|
||||
"SystemEventMetadata"]
|
||||
@@ -8,14 +8,13 @@
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import enum
|
||||
from enum import Enum
|
||||
from typing import Annotated, Union
|
||||
from uuid import UUID
|
||||
from interfaces import QdrantInterface
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class ArtifactClass(str, enum):
|
||||
class ArtifactClass(str, Enum):
|
||||
"""
|
||||
the general class of the artifact which determines wether it can be searched for or not.
|
||||
this also affects how the artifact is processed during ingestion and search.
|
||||
@@ -38,15 +37,7 @@ class ArtifactClass(str, enum):
|
||||
AUTHENTICATION_EVENT = "authentication_event"
|
||||
SCHEDULED_TASK = "scheduled_task"
|
||||
|
||||
class Actors(BaseModel):
|
||||
"""
|
||||
the actor is any entity associated with some behavioral pattern.
|
||||
"""
|
||||
identifier: str # some unique identifier which is consistent across artifacts
|
||||
display_name: str # human readeable, needs to be parsed properly by ingestion module
|
||||
role: ActorsRole # see class
|
||||
|
||||
class ActorsRole(str, enum):
|
||||
class ActorsRole(str, Enum):
|
||||
"""
|
||||
this will determine the role of the actor, which is mainly important for message filtering and later behavioral analysis.
|
||||
"""
|
||||
@@ -58,6 +49,14 @@ class ActorsRole(str, enum):
|
||||
INITIATOR = "initiator" # browser events, filesystem, network events
|
||||
TARGET = "target" # network events, authentication events
|
||||
|
||||
class Actors(BaseModel):
|
||||
"""
|
||||
the actor is any entity associated with some behavioral pattern.
|
||||
"""
|
||||
identifier: str # some unique identifier which is consistent across artifacts
|
||||
display_name: str # human readeable, needs to be parsed properly by ingestion module
|
||||
role: ActorsRole # see class
|
||||
|
||||
class Content(BaseModel):
|
||||
"""
|
||||
the most important field, this is what gets embedded.
|
||||
@@ -73,14 +72,7 @@ class ChunkInfo(BaseModel):
|
||||
index: int # zero-based index of the chunk
|
||||
total: int # total number of chunks
|
||||
|
||||
class ContextGroup(BaseModel):
|
||||
"""
|
||||
some artifacts can be aggregated into context groups, if they are semantically liked
|
||||
"""
|
||||
type: ContextGroupType
|
||||
id: str
|
||||
|
||||
class ContextGroupType(str, enum):
|
||||
class ContextGroupType(str, Enum):
|
||||
"""
|
||||
some artifacts can be aggregated into context groups, if they are semantically liked
|
||||
"""
|
||||
@@ -90,6 +82,13 @@ class ContextGroupType(str, enum):
|
||||
# not file system directories: they are inherently linked by path and parent_id
|
||||
# no chunks: they are inherently linked by chunk_info
|
||||
|
||||
class ContextGroup(BaseModel):
|
||||
"""
|
||||
some artifacts can be aggregated into context groups, if they are semantically liked
|
||||
"""
|
||||
type: ContextGroupType
|
||||
id: str
|
||||
|
||||
class Location(BaseModel):
|
||||
"""
|
||||
any information about where the artifact was located.
|
||||
@@ -209,14 +208,13 @@ class SemeionArtifact(BaseModel):
|
||||
elif self.artifact_class in [ArtifactClass.MESSAGE, ArtifactClass.BROWSER_EVENT, ArtifactClass.EMAIL, ArtifactClass.DOCUMENT] and not self.searchable:
|
||||
exception = f"Artifact {self.id} of class {self.artifact_class} is marked as non-searchable, but this class should always be searchable."
|
||||
raise ValueError(exception)
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
def submit_to_vector_db(self) -> bool:
|
||||
def get_vector_payload(self) -> dict:
|
||||
"""
|
||||
submit to qdrant
|
||||
"""
|
||||
payload = self.model_dump(mode="json", by_alias=True)
|
||||
QdrantInterface.submit(payload)
|
||||
return payload
|
||||
|
||||
|
||||
@@ -10,8 +10,6 @@
|
||||
# from .qdrant import QdrantClient
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Annotated, Union
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
Reference in New Issue
Block a user