search object data model
This commit is contained in:
@@ -167,6 +167,8 @@ class SemeionArtifact(BaseModel):
|
||||
"title": "string | null"
|
||||
} | null,
|
||||
"source_specific": {} | null,
|
||||
"marked_interesting": "bool",
|
||||
"marked_not_interesting": "bool",
|
||||
"ingestion": {
|
||||
"ingested_at": "string (ISO8601)",
|
||||
"source_file": "string",
|
||||
@@ -176,7 +178,7 @@ class SemeionArtifact(BaseModel):
|
||||
}
|
||||
"""
|
||||
|
||||
_schema_version: str = "1.1.0"
|
||||
_schema_version: str = "1.1.1"
|
||||
id: UUID = Field(description="deterministic UUID v5 based on case_id, source_file, and unique key")
|
||||
case_id: str = Field(description="case identifier this artifact belongs to")
|
||||
searchable: bool = Field(description="searchable or only for timeline context")
|
||||
@@ -192,6 +194,8 @@ class SemeionArtifact(BaseModel):
|
||||
context_group: ContextGroup | None = Field(default=None, description="a parameter which provides information for semantically linked artifacts, e.g., email threads, browser sessions, etc.")
|
||||
location: Location | None = Field(default=None, description="information about where the artifact was located, e.g., file path, URL, host, etc.")
|
||||
source_specific: source_specific_models | None = Field(default=None, description="source-specific metadata for the artifact")
|
||||
marked_interesting: bool = Field(description="user feedback flag marking this artifact as interesting")
|
||||
marked_not_interesting: bool = Field(description="user feedback flag marking this artifact as not interesting")
|
||||
ingestion: Ingestion = Field(description="metadata about the ingestion process")
|
||||
|
||||
class Config:
|
||||
|
||||
@@ -14,6 +14,35 @@ from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class Filters(BaseModel):
|
||||
"""
|
||||
filters which can be applied to the search query to limit the search space.
|
||||
all filters are optional and can be combined.
|
||||
"""
|
||||
case_ids: list[str] | None = Field(default=None, description="list of case IDs to limit the search to")
|
||||
artifact_classes: list[str] | None = Field(default=None, description="list of artifact classes to limit the search to")
|
||||
source_platforms: list[str] | None = Field(default=None, description="list of source platforms to limit the search to")
|
||||
actor_identifiers: list[str] | None = Field(default=None, description="list of actor identifiers to limit the search to")
|
||||
indexed_entities_any: list[str] | None = Field(default=None, description="list of indexed entities, any of which must be present in the artifact")
|
||||
time_after: datetime | None = Field(default=None, description="only return artifacts after this timestamp (inclusive)")
|
||||
time_before: datetime | None = Field(default=None, description="only return artifacts before this timestamp (inclusive)")
|
||||
context_group_ids: list[str] | None = Field(default=None, description="list of context group IDs to limit the search to")
|
||||
hosts: list[str] | None = Field(default=None, description="list of hostnames or IPs to limit the search to")
|
||||
|
||||
class Options(BaseModel):
|
||||
"""
|
||||
options to modify the search behavior.
|
||||
"""
|
||||
limit: int = Field(default=100, description="maximum number of search results to return")
|
||||
min_score: float | None = Field(default=None, description="minimum similarity score threshold for returned results")
|
||||
use_hybrid: bool = Field(default=False, description="whether to use hybrid search (semantic dense vectors + keyword (sparse vectors)")
|
||||
|
||||
class Interpretation(BaseModel):
|
||||
"""
|
||||
this data structure holds information about how the semantic query was interpreted by the generative LLM
|
||||
"""
|
||||
original_query: str = Field(description="the original user query before any modifications or interpretations")
|
||||
notes: list[str] = Field(default=[], description="any notes or clarifications made by the LLM regarding the query interpretation")
|
||||
|
||||
class SemeionSearchObject(BaseModel):
|
||||
"""
|
||||
@@ -48,11 +77,16 @@ class SemeionSearchObject(BaseModel):
|
||||
|
||||
"interpretation": {
|
||||
"original_query": "string",
|
||||
"notes": ["string"],
|
||||
"confidence": "float"
|
||||
}
|
||||
"notes": ["string"]
|
||||
}
|
||||
}
|
||||
"""
|
||||
pass
|
||||
_schema_version: str = "1.1.0"
|
||||
query_id: UUID = Field(description="UUID v4 identifying this search query")
|
||||
created_at: datetime = Field(description="timestamp when the search query was created for audit")
|
||||
semantic_query: str = Field(description="the search string which gets embedded and used for semantic search")
|
||||
filters: Filters = Field(description="filters to limit the search space")
|
||||
options: Options = Field(description="options to modify the search behavior")
|
||||
interpretation: Interpretation = Field(description="information about how the semantic query was interpreted by the generative LLM")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user