search object data model
This commit is contained in:
@@ -167,6 +167,8 @@ class SemeionArtifact(BaseModel):
|
|||||||
"title": "string | null"
|
"title": "string | null"
|
||||||
} | null,
|
} | null,
|
||||||
"source_specific": {} | null,
|
"source_specific": {} | null,
|
||||||
|
"marked_interesting": "bool",
|
||||||
|
"marked_not_interesting": "bool",
|
||||||
"ingestion": {
|
"ingestion": {
|
||||||
"ingested_at": "string (ISO8601)",
|
"ingested_at": "string (ISO8601)",
|
||||||
"source_file": "string",
|
"source_file": "string",
|
||||||
@@ -176,7 +178,7 @@ class SemeionArtifact(BaseModel):
|
|||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_schema_version: str = "1.1.0"
|
_schema_version: str = "1.1.1"
|
||||||
id: UUID = Field(description="deterministic UUID v5 based on case_id, source_file, and unique key")
|
id: UUID = Field(description="deterministic UUID v5 based on case_id, source_file, and unique key")
|
||||||
case_id: str = Field(description="case identifier this artifact belongs to")
|
case_id: str = Field(description="case identifier this artifact belongs to")
|
||||||
searchable: bool = Field(description="searchable or only for timeline context")
|
searchable: bool = Field(description="searchable or only for timeline context")
|
||||||
@@ -192,6 +194,8 @@ class SemeionArtifact(BaseModel):
|
|||||||
context_group: ContextGroup | None = Field(default=None, description="a parameter which provides information for semantically linked artifacts, e.g., email threads, browser sessions, etc.")
|
context_group: ContextGroup | None = Field(default=None, description="a parameter which provides information for semantically linked artifacts, e.g., email threads, browser sessions, etc.")
|
||||||
location: Location | None = Field(default=None, description="information about where the artifact was located, e.g., file path, URL, host, etc.")
|
location: Location | None = Field(default=None, description="information about where the artifact was located, e.g., file path, URL, host, etc.")
|
||||||
source_specific: source_specific_models | None = Field(default=None, description="source-specific metadata for the artifact")
|
source_specific: source_specific_models | None = Field(default=None, description="source-specific metadata for the artifact")
|
||||||
|
marked_interesting: bool = Field(description="user feedback flag marking this artifact as interesting")
|
||||||
|
marked_not_interesting: bool = Field(description="user feedback flag marking this artifact as not interesting")
|
||||||
ingestion: Ingestion = Field(description="metadata about the ingestion process")
|
ingestion: Ingestion = Field(description="metadata about the ingestion process")
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
|
|||||||
@@ -14,6 +14,35 @@ from uuid import UUID
|
|||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
class Filters(BaseModel):
|
||||||
|
"""
|
||||||
|
filters which can be applied to the search query to limit the search space.
|
||||||
|
all filters are optional and can be combined.
|
||||||
|
"""
|
||||||
|
case_ids: list[str] | None = Field(default=None, description="list of case IDs to limit the search to")
|
||||||
|
artifact_classes: list[str] | None = Field(default=None, description="list of artifact classes to limit the search to")
|
||||||
|
source_platforms: list[str] | None = Field(default=None, description="list of source platforms to limit the search to")
|
||||||
|
actor_identifiers: list[str] | None = Field(default=None, description="list of actor identifiers to limit the search to")
|
||||||
|
indexed_entities_any: list[str] | None = Field(default=None, description="list of indexed entities, any of which must be present in the artifact")
|
||||||
|
time_after: datetime | None = Field(default=None, description="only return artifacts after this timestamp (inclusive)")
|
||||||
|
time_before: datetime | None = Field(default=None, description="only return artifacts before this timestamp (inclusive)")
|
||||||
|
context_group_ids: list[str] | None = Field(default=None, description="list of context group IDs to limit the search to")
|
||||||
|
hosts: list[str] | None = Field(default=None, description="list of hostnames or IPs to limit the search to")
|
||||||
|
|
||||||
|
class Options(BaseModel):
|
||||||
|
"""
|
||||||
|
options to modify the search behavior.
|
||||||
|
"""
|
||||||
|
limit: int = Field(default=100, description="maximum number of search results to return")
|
||||||
|
min_score: float | None = Field(default=None, description="minimum similarity score threshold for returned results")
|
||||||
|
use_hybrid: bool = Field(default=False, description="whether to use hybrid search (semantic dense vectors + keyword (sparse vectors)")
|
||||||
|
|
||||||
|
class Interpretation(BaseModel):
|
||||||
|
"""
|
||||||
|
this data structure holds information about how the semantic query was interpreted by the generative LLM
|
||||||
|
"""
|
||||||
|
original_query: str = Field(description="the original user query before any modifications or interpretations")
|
||||||
|
notes: list[str] = Field(default=[], description="any notes or clarifications made by the LLM regarding the query interpretation")
|
||||||
|
|
||||||
class SemeionSearchObject(BaseModel):
|
class SemeionSearchObject(BaseModel):
|
||||||
"""
|
"""
|
||||||
@@ -48,11 +77,16 @@ class SemeionSearchObject(BaseModel):
|
|||||||
|
|
||||||
"interpretation": {
|
"interpretation": {
|
||||||
"original_query": "string",
|
"original_query": "string",
|
||||||
"notes": ["string"],
|
"notes": ["string"]
|
||||||
"confidence": "float"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
pass
|
_schema_version: str = "1.1.0"
|
||||||
|
query_id: UUID = Field(description="UUID v4 identifying this search query")
|
||||||
|
created_at: datetime = Field(description="timestamp when the search query was created for audit")
|
||||||
|
semantic_query: str = Field(description="the search string which gets embedded and used for semantic search")
|
||||||
|
filters: Filters = Field(description="filters to limit the search space")
|
||||||
|
options: Options = Field(description="options to modify the search behavior")
|
||||||
|
interpretation: Interpretation = Field(description="information about how the semantic query was interpreted by the generative LLM")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user