fixes to semeionSearchArtifact
This commit is contained in:
@@ -6,7 +6,33 @@
|
|||||||
# LICENSE file in the root directory of this source tree.
|
# LICENSE file in the root directory of this source tree.
|
||||||
#
|
#
|
||||||
|
|
||||||
# from .llm import LLMClient
|
from .semeionArtifact import SemeionArtifact, Actors, ActorsRole, Content, ChunkInfo, ContextGroup, ContextGroupType, Location, Ingestion, ArtifactClass
|
||||||
# from .qdrant import QdrantClient
|
from .semeionSearchObject import SemeionSearchObject
|
||||||
|
from .source_specific_models import AuthenticationEventMetadata, BrowserEventMetadata, DocumentMetadata, EmailMetadata, FileEventMetadata, MessageMetadata, NetworkEventMetadata, ProcessEventMetadata, RegistryEventMetadata, ScheduledTaskMetadata, SystemEventMetadata
|
||||||
|
|
||||||
__all__ = []
|
__all__ = ["SemeionArtifact",
|
||||||
|
"Actors",
|
||||||
|
"ActorsRole",
|
||||||
|
"Content",
|
||||||
|
"ChunkInfo",
|
||||||
|
"ContextGroup",
|
||||||
|
"ContextGroupType",
|
||||||
|
"Location",
|
||||||
|
"Ingestion",
|
||||||
|
"ArtifactClass",
|
||||||
|
"ContextGroupType",
|
||||||
|
"Location",
|
||||||
|
"Ingestion",
|
||||||
|
"ArtifactClass",
|
||||||
|
"SemeionSearchObject",
|
||||||
|
"AuthenticationEventMetadata",
|
||||||
|
"BrowserEventMetadata",
|
||||||
|
"DocumentMetadata",
|
||||||
|
"EmailMetadata",
|
||||||
|
"FileEventMetadata",
|
||||||
|
"MessageMetadata",
|
||||||
|
"NetworkEventMetadata",
|
||||||
|
"ProcessEventMetadata",
|
||||||
|
"RegistryEventMetadata",
|
||||||
|
"ScheduledTaskMetadata",
|
||||||
|
"SystemEventMetadata"]
|
||||||
@@ -8,14 +8,13 @@
|
|||||||
|
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import enum
|
from enum import Enum
|
||||||
from typing import Annotated, Union
|
from typing import Annotated, Union
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from interfaces import QdrantInterface
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
class ArtifactClass(str, enum):
|
class ArtifactClass(str, Enum):
|
||||||
"""
|
"""
|
||||||
the general class of the artifact which determines wether it can be searched for or not.
|
the general class of the artifact which determines wether it can be searched for or not.
|
||||||
this also affects how the artifact is processed during ingestion and search.
|
this also affects how the artifact is processed during ingestion and search.
|
||||||
@@ -37,16 +36,8 @@ class ArtifactClass(str, enum):
|
|||||||
NETWORK_EVENT = "network_event"
|
NETWORK_EVENT = "network_event"
|
||||||
AUTHENTICATION_EVENT = "authentication_event"
|
AUTHENTICATION_EVENT = "authentication_event"
|
||||||
SCHEDULED_TASK = "scheduled_task"
|
SCHEDULED_TASK = "scheduled_task"
|
||||||
|
|
||||||
class Actors(BaseModel):
|
class ActorsRole(str, Enum):
|
||||||
"""
|
|
||||||
the actor is any entity associated with some behavioral pattern.
|
|
||||||
"""
|
|
||||||
identifier: str # some unique identifier which is consistent across artifacts
|
|
||||||
display_name: str # human readeable, needs to be parsed properly by ingestion module
|
|
||||||
role: ActorsRole # see class
|
|
||||||
|
|
||||||
class ActorsRole(str, enum):
|
|
||||||
"""
|
"""
|
||||||
this will determine the role of the actor, which is mainly important for message filtering and later behavioral analysis.
|
this will determine the role of the actor, which is mainly important for message filtering and later behavioral analysis.
|
||||||
"""
|
"""
|
||||||
@@ -57,6 +48,14 @@ class ActorsRole(str, enum):
|
|||||||
OWNER = "owner" # filesystem objects
|
OWNER = "owner" # filesystem objects
|
||||||
INITIATOR = "initiator" # browser events, filesystem, network events
|
INITIATOR = "initiator" # browser events, filesystem, network events
|
||||||
TARGET = "target" # network events, authentication events
|
TARGET = "target" # network events, authentication events
|
||||||
|
|
||||||
|
class Actors(BaseModel):
|
||||||
|
"""
|
||||||
|
the actor is any entity associated with some behavioral pattern.
|
||||||
|
"""
|
||||||
|
identifier: str # some unique identifier which is consistent across artifacts
|
||||||
|
display_name: str # human readeable, needs to be parsed properly by ingestion module
|
||||||
|
role: ActorsRole # see class
|
||||||
|
|
||||||
class Content(BaseModel):
|
class Content(BaseModel):
|
||||||
"""
|
"""
|
||||||
@@ -73,14 +72,7 @@ class ChunkInfo(BaseModel):
|
|||||||
index: int # zero-based index of the chunk
|
index: int # zero-based index of the chunk
|
||||||
total: int # total number of chunks
|
total: int # total number of chunks
|
||||||
|
|
||||||
class ContextGroup(BaseModel):
|
class ContextGroupType(str, Enum):
|
||||||
"""
|
|
||||||
some artifacts can be aggregated into context groups, if they are semantically liked
|
|
||||||
"""
|
|
||||||
type: ContextGroupType
|
|
||||||
id: str
|
|
||||||
|
|
||||||
class ContextGroupType(str, enum):
|
|
||||||
"""
|
"""
|
||||||
some artifacts can be aggregated into context groups, if they are semantically liked
|
some artifacts can be aggregated into context groups, if they are semantically liked
|
||||||
"""
|
"""
|
||||||
@@ -90,6 +82,13 @@ class ContextGroupType(str, enum):
|
|||||||
# not file system directories: they are inherently linked by path and parent_id
|
# not file system directories: they are inherently linked by path and parent_id
|
||||||
# no chunks: they are inherently linked by chunk_info
|
# no chunks: they are inherently linked by chunk_info
|
||||||
|
|
||||||
|
class ContextGroup(BaseModel):
|
||||||
|
"""
|
||||||
|
some artifacts can be aggregated into context groups, if they are semantically liked
|
||||||
|
"""
|
||||||
|
type: ContextGroupType
|
||||||
|
id: str
|
||||||
|
|
||||||
class Location(BaseModel):
|
class Location(BaseModel):
|
||||||
"""
|
"""
|
||||||
any information about where the artifact was located.
|
any information about where the artifact was located.
|
||||||
@@ -209,14 +208,13 @@ class SemeionArtifact(BaseModel):
|
|||||||
elif self.artifact_class in [ArtifactClass.MESSAGE, ArtifactClass.BROWSER_EVENT, ArtifactClass.EMAIL, ArtifactClass.DOCUMENT] and not self.searchable:
|
elif self.artifact_class in [ArtifactClass.MESSAGE, ArtifactClass.BROWSER_EVENT, ArtifactClass.EMAIL, ArtifactClass.DOCUMENT] and not self.searchable:
|
||||||
exception = f"Artifact {self.id} of class {self.artifact_class} is marked as non-searchable, but this class should always be searchable."
|
exception = f"Artifact {self.id} of class {self.artifact_class} is marked as non-searchable, but this class should always be searchable."
|
||||||
raise ValueError(exception)
|
raise ValueError(exception)
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def submit_to_vector_db(self) -> bool:
|
def get_vector_payload(self) -> dict:
|
||||||
"""
|
"""
|
||||||
submit to qdrant
|
submit to qdrant
|
||||||
"""
|
"""
|
||||||
payload = self.model_dump(mode="json", by_alias=True)
|
payload = self.model_dump(mode="json", by_alias=True)
|
||||||
QdrantInterface.submit(payload)
|
return payload
|
||||||
|
|
||||||
|
|||||||
@@ -10,8 +10,6 @@
|
|||||||
# from .qdrant import QdrantClient
|
# from .qdrant import QdrantClient
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
|
||||||
from typing import Annotated, Union
|
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|||||||
Reference in New Issue
Block a user