跳转至

Mindspore models

labridge.models.local.mindspore_models

labridge.models.local.mindspore_models.MindsporeEmbedding

Bases: BaseEmbedding

The Embedding model based on Mindspore framework and MindNLP.

ATTRIBUTE DESCRIPTION
cache_folder

Cache folder for Hugging Face files.

TYPE: Optional[str]

normalize

Normalize embeddings or not.

TYPE: bool

query_instruction

Instruction to prepend to query text.

TYPE: Optional[str]

text_instruction

Instruction to prepend to text.

TYPE: Optional[str]

_embed_model

The loaded embedding model.

TYPE: SentenceTransformer

_device

The deployed device.

TYPE: str

Source code in labridge\models\local\mindspore_models.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
class MindsporeEmbedding(BaseEmbedding):
	r"""
	The Embedding model based on Mindspore framework and MindNLP.

	Attributes:
		cache_folder (Optional[str]): Cache folder for Hugging Face files.
		normalize (bool): Normalize embeddings or not.
		query_instruction (Optional[str]): Instruction to prepend to query text.
		text_instruction (Optional[str]): Instruction to prepend to text.
		_embed_model (SentenceTransformer): The loaded embedding model.
		_device (str): The deployed device.
	"""
	cache_folder: Optional[str] = Field(
		description="Cache folder for Hugging Face files."
	)
	normalize: bool = Field(
		default=True,
		description="Normalize embeddings or not."
	)
	query_instrhuouction: Optional[str] = Field(
		description="Instruction to prepend to query text."
	)
	text_instruction: Optional[str] = Field(
		description="Instruction to prepend to text."
	)
	_embed_model: Any = PrivateAttr()
	_device: str = PrivateAttr()

	def __init__(
		self,
		model_name: str = DEFAULT_MINDSPORE_EMBEDDING,
		device: str = "Ascend",
		query_instruction: Optional[str] = None,
		text_instruction: Optional[str] = None,
		normalize: bool = True,
		embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE,
		cache_folder: Optional[str] = None,
	):
		super().__init__(
			model_name=model_name,
			embed_batch_size=embed_batch_size,
			normalize = normalize,
		)
		cache_folder = cache_folder or get_cache_dir()
		self._device = device

		self._embed_model = SentenceTransformer(
			model_name_or_path=model_name,
			device="CPU",
			cache_folder=cache_folder,
			prompts={
				"query": query_instruction
						 or get_query_instruct_for_model_name(model_name),
				"text": text_instruction
						or get_text_instruct_for_model_name(model_name),
			}
		)

	def _get_query_embedding(self, query: str) -> Embedding:
		r""" Get the embeddings of a query from the Mindspore embedding model. """
		return self._embed(query, prompt_name="query")

	def _embed(
		self,
		sentences: str,
		prompt_name: Optional[str] = None,
	) -> Embedding:
		r""" Mindspore embedding. """
		embedding = self._embed_model.encode(
			sentences,
			prompt_name=prompt_name,
			batch_size=self.embed_batch_size,
			normalize_embeddings=True,
		)
		return list(embedding.numpy())

	async def _aget_query_embedding(self, query: str) -> Embedding:
		return self._get_query_embedding(query=query)

	def _get_text_embedding(self, text: str) -> Embedding:
		r""" Get the embeddings of a text from the Mindspore embedding model. """
		return self._embed(text, prompt_name="text")

labridge.models.local.mindspore_models.MindsporeLLM

Bases: CustomLLM

The LLM based on Mindspore framework and MindNLP.

ATTRIBUTE DESCRIPTION
model_name

The model name to use from HuggingFace or local model directory.

TYPE: str

tokenizer_name

The name of the tokenizer to use from HuggingFace.

TYPE: str

context_window

The maximum number of tokens available for input.

TYPE: int

max_new_tokens

The maximum number of tokens to generate.

TYPE: int

generate_kwargs

The kwargs to pass to the model during generation.

TYPE: dict

is_chat_model

is a chat model or not.

TYPE: bool

_model

The loaded model.

TYPE: AutoModelForCausalLM

_tokenizer

The loaded tokenizer.

TYPE: AutoTokenizer

Source code in labridge\models\local\mindspore_models.py
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
class MindsporeLLM(CustomLLM):
	r"""
	The LLM based on Mindspore framework and MindNLP.

	Attributes:
		model_name (str): The model name to use from HuggingFace or local model directory.
		tokenizer_name (str): The name of the tokenizer to use from HuggingFace.
		context_window (int): The maximum number of tokens available for input.
		max_new_tokens (int): The maximum number of tokens to generate.
		generate_kwargs (dict): The kwargs to pass to the model during generation.
		is_chat_model (bool): is a chat model or not.
		_model (AutoModelForCausalLM): The loaded model.
		_tokenizer (AutoTokenizer): The loaded tokenizer.
	"""
	num_output: int = 1024

	model_name: str = Field(
		default=DEFAULT_MINDSPORE_MODEL,
		description=(
			"The model name to use from HuggingFace. "
		),
	)
	tokenizer_name: str = Field(
		default=DEFAULT_MINDSPORE_MODEL,
		description=(
			"The name of the tokenizer to use from HuggingFace. "
			"Unused if `tokenizer` is passed in directly."
		),
	)
	context_window: int = Field(
		default=DEFAULT_CONTEXT_WINDOW,
		description="The maximum number of tokens available for input.",
		gt=0,
	)
	max_new_tokens: int = Field(
		default=DEFAULT_NUM_OUTPUTS,
		description="The maximum number of tokens to generate.",
		gt=0,
	)
	generate_kwargs: dict = Field(
		default=DEFAULT_MINDSPORE_GENERATE_KWARGS,
		# default_factory=dict,
		description="The kwargs to pass to the model during generation.",
	)
	is_chat_model: bool = Field(
		default=False,
		description=(
				LLMMetadata.__fields__["is_chat_model"].field_info.description
				+ " Be sure to verify that you either pass an appropriate tokenizer "
				"that can convert prompts to properly formatted chat messages or a "
				"`messages_to_prompt` that does so."
		),
	)

	_model: Any = PrivateAttr()
	_tokenizer: Any = PrivateAttr()

	def __init__(
		self,
		model_name: str = DEFAULT_MINDSPORE_MODEL,
		tokenizer_name: str = DEFAULT_MINDSPORE_MODEL,
		context_window: int = DEFAULT_CONTEXT_WINDOW,
		max_new_tokens: int = DEFAULT_NUM_OUTPUTS,
		generate_kwargs: Optional[dict] = None,
		is_chat_model: Optional[bool] = False,
		system_prompt: str = "",
		messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
		completion_to_prompt: Optional[Callable[[str], str]] = None,
	):
		self._model = AutoModelForCausalLM.from_pretrained(
			pretrained_model_name_or_path=model_name,
			mirror='modelscope',
			ms_dtype=mindspore.float16,
		).eval()

		config_dict = self._model.config.to_dict()
		model_context_window = int(
			config_dict.get("max_position_embeddings", context_window)
		)
		if model_context_window and model_context_window < context_window:
			context_window = model_context_window

		self._tokenizer = AutoTokenizer.from_pretrained(
			pretrained_model_name_or_path=model_name,
			mirror='modelscope',
			max_length=context_window,
		)
		super().__init__(
			context_window=context_window,
			max_new_tokens=max_new_tokens,
			tokenizer_name=tokenizer_name,
			model_name=model_name,
			generate_kwargs=generate_kwargs or DEFAULT_MINDSPORE_GENERATE_KWARGS,
			is_chat_model=is_chat_model,
			system_prompt = system_prompt,
			messages_to_prompt=messages_to_prompt,
			completion_to_prompt=completion_to_prompt,
		)

	@property
	def metadata(self) -> LLMMetadata:
		"""Get LLM metadata."""
		return LLMMetadata(
			context_window=self.context_window,
			num_output=self.max_new_tokens,
			model_name=self.model_name,
			is_chat_model=self.is_chat_model,
		)

	@llm_completion_callback()
	def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
		r""" Get response from the Mindspore LLM. """
		inputs = self._tokenizer(prompt, return_tensors="ms")
		outputs = self._model.generate(**inputs, **self.generate_kwargs)
		outputs = outputs[:, inputs['input_ids'].shape[1]:]
		response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
		return CompletionResponse(text=response_text)

	@llm_completion_callback()
	def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
		r""" Get response from the Mindspore LLM. """
		inputs = self._tokenizer(prompt, return_tensors="ms")
		outputs = self._model.generate(**inputs, **self.generate_kwargs)
		outputs = outputs[:, inputs['input_ids'].shape[1]:]
		response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
		gen_tokens = ""
		for token in response_text:
			gen_tokens += token
			yield CompletionResponse(text=gen_tokens, delta=token)

labridge.models.local.mindspore_models.MindsporeLLM.metadata: LLMMetadata property

Get LLM metadata.

labridge.models.local.mindspore_models.MindsporeLLM.complete(prompt, **kwargs)

Get response from the Mindspore LLM.

Source code in labridge\models\local\mindspore_models.py
150
151
152
153
154
155
156
157
@llm_completion_callback()
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
	r""" Get response from the Mindspore LLM. """
	inputs = self._tokenizer(prompt, return_tensors="ms")
	outputs = self._model.generate(**inputs, **self.generate_kwargs)
	outputs = outputs[:, inputs['input_ids'].shape[1]:]
	response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
	return CompletionResponse(text=response_text)

labridge.models.local.mindspore_models.MindsporeLLM.stream_complete(prompt, **kwargs)

Get response from the Mindspore LLM.

Source code in labridge\models\local\mindspore_models.py
159
160
161
162
163
164
165
166
167
168
169
@llm_completion_callback()
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
	r""" Get response from the Mindspore LLM. """
	inputs = self._tokenizer(prompt, return_tensors="ms")
	outputs = self._model.generate(**inputs, **self.generate_kwargs)
	outputs = outputs[:, inputs['input_ids'].shape[1]:]
	response_text = self._tokenizer.decode(outputs[0], skip_special_tokens=True)
	gen_tokens = ""
	for token in response_text:
		gen_tokens += token
		yield CompletionResponse(text=gen_tokens, delta=token)