Mindspore models

`labridge.models.local.mindspore_models` ¶

`labridge.models.local.mindspore_models.MindsporeEmbedding` ¶

Bases: BaseEmbedding

The Embedding model based on Mindspore framework and MindNLP.

ATTRIBUTE	DESCRIPTION
`cache_folder`	Cache folder for Hugging Face files. TYPE: `Optional[str]`
`normalize`	Normalize embeddings or not. TYPE: `bool`
`query_instruction`	Instruction to prepend to query text. TYPE: `Optional[str]`
`text_instruction`	Instruction to prepend to text. TYPE: `Optional[str]`
`_embed_model`	The loaded embedding model. TYPE: `SentenceTransformer`
`_device`	The deployed device. TYPE: `str`

Source code in labridge\models\local\mindspore_models.py

class MindsporeEmbedding(BaseEmbedding):
	r"""
	The Embedding model based on Mindspore framework and MindNLP.

	Attributes:
		cache_folder (Optional[str]): Cache folder for Hugging Face files.
		normalize (bool): Normalize embeddings or not.
		query_instruction (Optional[str]): Instruction to prepend to query text.
		text_instruction (Optional[str]): Instruction to prepend to text.
		_embed_model (SentenceTransformer): The loaded embedding model.
		_device (str): The deployed device.
	"""
	cache_folder: Optional[str] = Field(
		description="Cache folder for Hugging Face files."
	)
	normalize: bool = Field(
		default=True,
		description="Normalize embeddings or not."
	)
	query_instrhuouction: Optional[str] = Field(
		description="Instruction to prepend to query text."
	)
	text_instruction: Optional[str] = Field(
		description="Instruction to prepend to text."
	)
	_embed_model: Any = PrivateAttr()
	_device: str = PrivateAttr()

	def __init__(
		self,
		model_name: str = DEFAULT_MINDSPORE_EMBEDDING,
		device: str = "Ascend",
		query_instruction: Optional[str] = None,
		text_instruction: Optional[str] = None,
		normalize: bool = True,
		embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE,
		cache_folder: Optional[str] = None,
	):
		super().__init__(
			model_name=model_name,
			embed_batch_size=embed_batch_size,
			normalize = normalize,
		)
		cache_folder = cache_folder or get_cache_dir()
		self._device = device

		self._embed_model = SentenceTransformer(
			model_name_or_path=model_name,
			device="CPU",
			cache_folder=cache_folder,
			prompts={
				"query": query_instruction
						 or get_query_instruct_for_model_name(model_name),
				"text": text_instruction
						or get_text_instruct_for_model_name(model_name),
			}
		)

	def _get_query_embedding(self, query: str) -> Embedding:
		r""" Get the embeddings of a query from the Mindspore embedding model. """
		return self._embed(query, prompt_name="query")

	def _embed(
		self,
		sentences: str,
		prompt_name: Optional[str] = None,
	) -> Embedding:
		r""" Mindspore embedding. """
		embedding = self._embed_model.encode(
			sentences,
			prompt_name=prompt_name,
			batch_size=self.embed_batch_size,
			normalize_embeddings=True,
		)
		return list(embedding.numpy())

	async def _aget_query_embedding(self, query: str) -> Embedding:
		return self._get_query_embedding(query=query)

	def _get_text_embedding(self, text: str) -> Embedding:
		r""" Get the embeddings of a text from the Mindspore embedding model. """
		return self._embed(text, prompt_name="text")

`labridge.models.local.mindspore_models.MindsporeLLM` ¶

Bases: CustomLLM

The LLM based on Mindspore framework and MindNLP.

ATTRIBUTE	DESCRIPTION
`model_name`	The model name to use from HuggingFace or local model directory. TYPE: `str`
`tokenizer_name`	The name of the tokenizer to use from HuggingFace. TYPE: `str`
`context_window`	The maximum number of tokens available for input. TYPE: `int`
`max_new_tokens`	The maximum number of tokens to generate. TYPE: `int`
`generate_kwargs`	The kwargs to pass to the model during generation. TYPE: `dict`
`is_chat_model`	is a chat model or not. TYPE: `bool`
`_model`	The loaded model. TYPE: `AutoModelForCausalLM`
`_tokenizer`	The loaded tokenizer. TYPE: `AutoTokenizer`

Source code in labridge\models\local\mindspore_models.py

class MindsporeLLM(CustomLLM):
	r"""
	The LLM based on Mindspore framework and MindNLP.

	Attributes:
		model_name (str): The model name to use from HuggingFace or local model directory.
		tokenizer_name (str): The name of the tokenizer to use from HuggingFace.
		context_window (int): The maximum number of tokens available for input.
		max_new_tokens (int): The maximum number of tokens to generate.
		generate_kwargs (dict): The kwargs to pass to the model during generation.
		is_chat_model (bool): is a chat model or not.
		_model (AutoModelForCausalLM): The loaded model.
		_tokenizer (AutoTokenizer): The loaded tokenizer.
	"""
	num_output: int = 1024

	model_name: str = Field(
		default=DEFAULT_MINDSPORE_MODEL,
		description=(
			"The model name to use from HuggingFace. "
		),
	)
	tokenizer_name: str = Field(
		default=DEFAULT_MINDSPORE_MODEL,
		description=(
			"The name of the tokenizer to use from HuggingFace. "
			"Unused if `tokenizer` is passed in directly."
		),
	)
	context_window: int = Field(
		default=DEFAULT_CONTEXT_WINDOW,
		description="The maximum number of tokens available for input.",
		gt=0,
	)
	max_new_tokens: int = Field(
		default=DEFAULT_NUM_OUTPUTS,
		description="The maximum number of tokens to generate.",
		gt=0,
	)
	generate_kwargs: dict = Field(
		default=DEFAULT_MINDSPORE_GENERATE_KWARGS,
		# default_factory=dict,
		description="The kwargs to pass to the model during generation.",
	)
	is_chat_model: bool = Field(
		default=False,
		description=(
				LLMMetadata.__fields__["is_chat_model"].field_info.description
				+ " Be sure to verify that you either pass an appropriate tokenizer "
				"that can convert prompts to properly formatted chat messages or a "
				"`messages_to_prompt` that does so."
		),
	)

	_model: Any = PrivateAttr()
	_tokenizer: Any = PrivateAttr()

	def __init__(
		self,
		model_name: str = DEFAULT_MINDSPORE_MODEL,
		tokenizer_name: str = DEFAULT_MINDSPORE_MODEL,
		context_window: int = DEFAULT_CONTEXT_WINDOW,
		max_new_tokens: int = DEFAULT_NUM_OUTPUTS,
		generate_kwargs: Optional[dict] = None,
		is_chat_model: Optional[bool] = False,
		system_prompt: str = "",
		messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
		completion_to_prompt: Optional[Callable[[str], str]] = None,
	):
		self._model = AutoModelForCausalLM.from_pretrained(
			pretrained_model_name_or_path=model_name,
			mirror='modelscope',
			ms_dtype=mindspore.float16,
		).eval()

		config_dict = self._model.config.to_dict()
		model_context_window = int(
			config_dict.get("max_position_embeddings", context_window)
		)
		if model_context_window and model_context_window < context_window:
			context_window = model_context_window

		self._tokenizer = AutoTokenizer.from_pretrained(
			pretrained_model_name_or_path=model_name,
			mirror='modelscope',
			max_length=context_window,
		)
		super().__init__(
			context_window=context_window,
			max_new_tokens=max_new_tokens,
			tokenizer_name=tokenizer_name,
			model_name=model_name,
			generate_kwargs=generate_kwargs or DEFAULT_MINDSPORE_GENERATE_KWARGS,
			is_chat_model=is_chat_model,
			system_prompt = system_prompt,
			messages_to_prompt=messages_to_prompt,
			completion_to_prompt=completion_to_prompt,
		)

	@property
	def metadata(self) -> LLMMetadata:
		"""Get LLM metadata."""
		return LLMMetadata(
			context_window=self.context_window,
			num_output=self.max_new_tokens,
			model_name=self.model_name,
			is_chat_model=self.is_chat_model,
		)

	@llm_completion_callback()
	def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
		r""" Get response from the Mindspore LLM. """
		inputs = self._tokenizer(prompt, return_tensors="ms")
		outputs = self._model.generate(**inputs, **self.generate_kwargs)
		outputs = outputs[:, inputs['input_ids'].shape[1]:]
		response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
		return CompletionResponse(text=response_text)

	@llm_completion_callback()
	def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
		r""" Get response from the Mindspore LLM. """
		inputs = self._tokenizer(prompt, return_tensors="ms")
		outputs = self._model.generate(**inputs, **self.generate_kwargs)
		outputs = outputs[:, inputs['input_ids'].shape[1]:]
		response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
		gen_tokens = ""
		for token in response_text:
			gen_tokens += token
			yield CompletionResponse(text=gen_tokens, delta=token)

`labridge.models.local.mindspore_models.MindsporeLLM.metadata: LLMMetadata` `property` ¶

Get LLM metadata.

`labridge.models.local.mindspore_models.MindsporeLLM.complete(prompt, **kwargs)` ¶

Get response from the Mindspore LLM.

Source code in labridge\models\local\mindspore_models.py

@llm_completion_callback()
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
	r""" Get response from the Mindspore LLM. """
	inputs = self._tokenizer(prompt, return_tensors="ms")
	outputs = self._model.generate(**inputs, **self.generate_kwargs)
	outputs = outputs[:, inputs['input_ids'].shape[1]:]
	response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
	return CompletionResponse(text=response_text)

`labridge.models.local.mindspore_models.MindsporeLLM.stream_complete(prompt, **kwargs)` ¶

Get response from the Mindspore LLM.

Source code in labridge\models\local\mindspore_models.py

@llm_completion_callback()
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
	r""" Get response from the Mindspore LLM. """
	inputs = self._tokenizer(prompt, return_tensors="ms")
	outputs = self._model.generate(**inputs, **self.generate_kwargs)
	outputs = outputs[:, inputs['input_ids'].shape[1]:]
	response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
	gen_tokens = ""
	for token in response_text:
		gen_tokens += token
		yield CompletionResponse(text=gen_tokens, delta=token)

Mindspore models

labridge.models.local.mindspore_models ¶

labridge.models.local.mindspore_models.MindsporeEmbedding ¶

labridge.models.local.mindspore_models.MindsporeLLM ¶

labridge.models.local.mindspore_models.MindsporeLLM.metadata: LLMMetadata property ¶

labridge.models.local.mindspore_models.MindsporeLLM.complete(prompt, **kwargs) ¶

labridge.models.local.mindspore_models.MindsporeLLM.stream_complete(prompt, **kwargs) ¶

`labridge.models.local.mindspore_models` ¶

`labridge.models.local.mindspore_models.MindsporeEmbedding` ¶

`labridge.models.local.mindspore_models.MindsporeLLM` ¶

`labridge.models.local.mindspore_models.MindsporeLLM.metadata: LLMMetadata` `property` ¶

`labridge.models.local.mindspore_models.MindsporeLLM.complete(prompt, **kwargs)` ¶

`labridge.models.local.mindspore_models.MindsporeLLM.stream_complete(prompt, **kwargs)` ¶