search object data model

This commit is contained in:
2025-11-30 14:49:18 +01:00
parent c0e0325958
commit 0957027eed
2 changed files with 43 additions and 5 deletions

View File

@@ -167,6 +167,8 @@ class SemeionArtifact(BaseModel):
"title": "string | null"
} | null,
"source_specific": {} | null,
"marked_interesting": "bool",
"marked_not_interesting": "bool",
"ingestion": {
"ingested_at": "string (ISO8601)",
"source_file": "string",
@@ -176,7 +178,7 @@ class SemeionArtifact(BaseModel):
}
"""
_schema_version: str = "1.1.0"
_schema_version: str = "1.1.1"
id: UUID = Field(description="deterministic UUID v5 based on case_id, source_file, and unique key")
case_id: str = Field(description="case identifier this artifact belongs to")
searchable: bool = Field(description="searchable or only for timeline context")
@@ -192,6 +194,8 @@ class SemeionArtifact(BaseModel):
context_group: ContextGroup | None = Field(default=None, description="a parameter which provides information for semantically linked artifacts, e.g., email threads, browser sessions, etc.")
location: Location | None = Field(default=None, description="information about where the artifact was located, e.g., file path, URL, host, etc.")
source_specific: source_specific_models | None = Field(default=None, description="source-specific metadata for the artifact")
marked_interesting: bool = Field(description="user feedback flag marking this artifact as interesting")
marked_not_interesting: bool = Field(description="user feedback flag marking this artifact as not interesting")
ingestion: Ingestion = Field(description="metadata about the ingestion process")
class Config:

View File

@@ -14,6 +14,35 @@ from uuid import UUID
from pydantic import BaseModel, Field
class Filters(BaseModel):
"""
filters which can be applied to the search query to limit the search space.
all filters are optional and can be combined.
"""
case_ids: list[str] | None = Field(default=None, description="list of case IDs to limit the search to")
artifact_classes: list[str] | None = Field(default=None, description="list of artifact classes to limit the search to")
source_platforms: list[str] | None = Field(default=None, description="list of source platforms to limit the search to")
actor_identifiers: list[str] | None = Field(default=None, description="list of actor identifiers to limit the search to")
indexed_entities_any: list[str] | None = Field(default=None, description="list of indexed entities, any of which must be present in the artifact")
time_after: datetime | None = Field(default=None, description="only return artifacts after this timestamp (inclusive)")
time_before: datetime | None = Field(default=None, description="only return artifacts before this timestamp (inclusive)")
context_group_ids: list[str] | None = Field(default=None, description="list of context group IDs to limit the search to")
hosts: list[str] | None = Field(default=None, description="list of hostnames or IPs to limit the search to")
class Options(BaseModel):
"""
options to modify the search behavior.
"""
limit: int = Field(default=100, description="maximum number of search results to return")
min_score: float | None = Field(default=None, description="minimum similarity score threshold for returned results")
use_hybrid: bool = Field(default=False, description="whether to use hybrid search (semantic dense vectors + keyword (sparse vectors)")
class Interpretation(BaseModel):
"""
this data structure holds information about how the semantic query was interpreted by the generative LLM
"""
original_query: str = Field(description="the original user query before any modifications or interpretations")
notes: list[str] = Field(default=[], description="any notes or clarifications made by the LLM regarding the query interpretation")
class SemeionSearchObject(BaseModel):
"""
@@ -48,11 +77,16 @@ class SemeionSearchObject(BaseModel):
"interpretation": {
"original_query": "string",
"notes": ["string"],
"confidence": "float"
"notes": ["string"]
}
}
"""
pass
_schema_version: str = "1.1.0"
query_id: UUID = Field(description="UUID v4 identifying this search query")
created_at: datetime = Field(description="timestamp when the search query was created for audit")
semantic_query: str = Field(description="the search string which gets embedded and used for semantic search")
filters: Filters = Field(description="filters to limit the search space")
options: Options = Field(description="options to modify the search behavior")
interpretation: Interpretation = Field(description="information about how the semantic query was interpreted by the generative LLM")