| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323 |
- from typing import Any, Optional, Union
- from pydantic import BaseModel, Field
- from qdrant_client.conversions.common_types import SparseVector
- from qdrant_client.http import models
- try:
- from fastembed import (
- TextEmbedding,
- SparseTextEmbedding,
- ImageEmbedding,
- LateInteractionTextEmbedding,
- LateInteractionMultimodalEmbedding,
- )
- from fastembed.common import OnnxProvider, ImageInput
- except ImportError:
- TextEmbedding = None
- SparseTextEmbedding = None
- ImageEmbedding = None
- LateInteractionTextEmbedding = None
- LateInteractionMultimodalEmbedding = None
- OnnxProvider = None
- ImageInput = None
- class QueryResponse(BaseModel, extra="forbid"): # type: ignore
- id: Union[str, int]
- embedding: Optional[list[float]]
- sparse_embedding: Optional[SparseVector] = Field(default=None)
- metadata: dict[str, Any]
- document: str
- score: float
- class FastEmbedMisc:
- IS_INSTALLED: bool = False
- _TEXT_MODELS: set[str] = set()
- _IMAGE_MODELS: set[str] = set()
- _LATE_INTERACTION_TEXT_MODELS: set[str] = set()
- _LATE_INTERACTION_MULTIMODAL_MODELS: set[str] = set()
- _SPARSE_MODELS: set[str] = set()
- @classmethod
- def is_installed(cls) -> bool:
- if cls.IS_INSTALLED:
- return cls.IS_INSTALLED
- try:
- from fastembed import (
- SparseTextEmbedding,
- TextEmbedding,
- ImageEmbedding,
- LateInteractionMultimodalEmbedding,
- LateInteractionTextEmbedding,
- )
- assert len(SparseTextEmbedding.list_supported_models()) > 0
- assert len(TextEmbedding.list_supported_models()) > 0
- assert len(ImageEmbedding.list_supported_models()) > 0
- assert len(LateInteractionTextEmbedding.list_supported_models()) > 0
- assert len(LateInteractionMultimodalEmbedding.list_supported_models()) > 0
- cls.IS_INSTALLED = True
- except ImportError:
- cls.IS_INSTALLED = False
- return cls.IS_INSTALLED
- @classmethod
- def import_fastembed(cls) -> None:
- if cls.IS_INSTALLED:
- return
- # If it's not, ask the user to install it
- raise ImportError(
- "fastembed is not installed."
- " Please install it to enable fast vector indexing with `pip install fastembed`."
- )
- @classmethod
- def list_text_models(cls) -> dict[str, tuple[int, models.Distance]]:
- """Lists the supported dense text models.
- Requires invocation of TextEmbedding.list_supported_models() to support custom models.
- Returns:
- dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
- """
- return (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in TextEmbedding.list_supported_models()
- }
- if TextEmbedding
- else {}
- )
- @classmethod
- def list_image_models(cls) -> dict[str, tuple[int, models.Distance]]:
- """Lists the supported image dense models.
- Custom image models are not supported yet, but calls to ImageEmbedding.list_supported_models() is done each
- time in order for preserving the same style as with TextEmbedding.
- Returns:
- dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
- """
- return (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in ImageEmbedding.list_supported_models()
- }
- if ImageEmbedding
- else {}
- )
- @classmethod
- def list_late_interaction_text_models(cls) -> dict[str, tuple[int, models.Distance]]:
- """Lists the supported late interaction text models.
- Custom late interaction models are not supported yet, but calls to
- LateInteractionTextEmbedding.list_supported_models()
- is done each time in order for preserving the same style as with TextEmbedding.
- Returns:
- dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
- """
- return (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in LateInteractionTextEmbedding.list_supported_models()
- }
- if LateInteractionTextEmbedding
- else {}
- )
- @classmethod
- def list_late_interaction_multimodal_models(cls) -> dict[str, tuple[int, models.Distance]]:
- """Lists the supported late interaction multimodal models.
- Custom late interaction multimodal models are not supported yet, but calls to
- LateInteractionMultimodalEmbedding.list_supported_models()
- is done each time in order for preserving the same style as with TextEmbedding.
- Returns:
- dict[str, tuple[int, models.Distance]]: A dict of model names, their dimensions and distance metrics.
- """
- return (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in LateInteractionMultimodalEmbedding.list_supported_models()
- }
- if LateInteractionMultimodalEmbedding
- else {}
- )
- @classmethod
- def list_sparse_models(cls) -> dict[str, dict[str, Any]]:
- """Lists the supported sparse models.
- Custom sparse models are not supported yet, but calls to
- SparseTextEmbedding.list_supported_models()
- is done each time in order for preserving the same style as with TextEmbedding.
- Returns:
- dict[str, dict[str, Any]]: A dict of model names and their descriptions.
- """
- descriptions = {}
- if SparseTextEmbedding:
- for description in SparseTextEmbedding.list_supported_models():
- descriptions[description.pop("model")] = description
- return descriptions
- @classmethod
- def is_supported_text_model(cls, model_name: str) -> bool:
- """Checks if the model is supported by fastembed.
- Args:
- model_name (str): The name of the model to check.
- Returns:
- bool: True if the model is supported, False otherwise.
- """
- if model_name.lower() in cls._TEXT_MODELS:
- return True
- # update cached list in case custom models were added
- cls._TEXT_MODELS = {model.lower() for model in cls.list_text_models()}
- if model_name.lower() in cls._TEXT_MODELS:
- return True
- return False
- @classmethod
- def is_supported_image_model(cls, model_name: str) -> bool:
- """Checks if the model is supported by fastembed.
- Args:
- model_name (str): The name of the model to check.
- Returns:
- bool: True if the model is supported, False otherwise.
- """
- if model_name.lower() in cls._IMAGE_MODELS:
- return True
- # update cached list in case custom models were added
- cls._IMAGE_MODELS = {model.lower() for model in cls.list_image_models()}
- if model_name.lower() in cls._IMAGE_MODELS:
- return True
- return False
- @classmethod
- def is_supported_late_interaction_text_model(cls, model_name: str) -> bool:
- """Checks if the model is supported by fastembed.
- Args:
- model_name (str): The name of the model to check.
- Returns:
- bool: True if the model is supported, False otherwise.
- """
- if model_name.lower() in cls._LATE_INTERACTION_TEXT_MODELS:
- return True
- # update cached list in case custom models were added
- cls._LATE_INTERACTION_TEXT_MODELS = {
- model.lower() for model in cls.list_late_interaction_text_models()
- }
- if model_name.lower() in cls._LATE_INTERACTION_TEXT_MODELS:
- return True
- return False
- @classmethod
- def is_supported_late_interaction_multimodal_model(cls, model_name: str) -> bool:
- """Checks if the model is supported by fastembed.
- Args:
- model_name (str): The name of the model to check.
- Returns:
- bool: True if the model is supported, False otherwise.
- """
- if model_name.lower() in cls._LATE_INTERACTION_MULTIMODAL_MODELS:
- return True
- # update cached list in case custom models were added
- cls._LATE_INTERACTION_MULTIMODAL_MODELS = {
- model.lower() for model in cls.list_late_interaction_multimodal_models()
- }
- if model_name.lower() in cls._LATE_INTERACTION_MULTIMODAL_MODELS:
- return True
- return False
- @classmethod
- def is_supported_sparse_model(cls, model_name: str) -> bool:
- """Checks if the model is supported by fastembed.
- Args:
- model_name (str): The name of the model to check.
- Returns:
- bool: True if the model is supported, False otherwise.
- """
- if model_name.lower() in cls._SPARSE_MODELS:
- return True
- # update cached list in case custom models were added
- cls._SPARSE_MODELS = {model.lower() for model in cls.list_sparse_models()}
- if model_name.lower() in cls._SPARSE_MODELS:
- return True
- return False
- # region deprecated
- # prefer using methods builtin into QdrantClient, e.g. list_supported_text_models, list_supported_idf_models, etc.
- SUPPORTED_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in TextEmbedding.list_supported_models()
- }
- if TextEmbedding
- else {}
- )
- SUPPORTED_SPARSE_EMBEDDING_MODELS: dict[str, dict[str, Any]] = (
- {model["model"]: model for model in SparseTextEmbedding.list_supported_models()}
- if SparseTextEmbedding
- else {}
- )
- IDF_EMBEDDING_MODELS: set[str] = (
- {
- model_config["model"]
- for model_config in SparseTextEmbedding.list_supported_models()
- if model_config.get("requires_idf", None)
- }
- if SparseTextEmbedding
- else set()
- )
- _LATE_INTERACTION_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in LateInteractionTextEmbedding.list_supported_models()
- }
- if LateInteractionTextEmbedding
- else {}
- )
- _IMAGE_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in ImageEmbedding.list_supported_models()
- }
- if ImageEmbedding
- else {}
- )
- _LATE_INTERACTION_MULTIMODAL_EMBEDDING_MODELS: dict[str, tuple[int, models.Distance]] = (
- {
- model["model"]: (model["dim"], models.Distance.COSINE)
- for model in LateInteractionMultimodalEmbedding.list_supported_models()
- }
- if LateInteractionMultimodalEmbedding
- else {}
- )
- # endregion
|