跳转至

Temporary paper retriever

labridge.func_modules.paper.retrieve.temporary_paper_retriever

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever

This class is the retriever that retrieving in the recent papers store of a specific user.

PARAMETER DESCRIPTION
embed_model

The used embedding model. If not specified, the Settings.embed_model will be used.

TYPE: BaseEmbedding

final_use_context

Whether to use the context nodes as parts of the retrieved results.

TYPE: bool DEFAULT: True

first_top_k

The similarity_top_k in the first retrieving. Refer to the method retrieve for details.

TYPE: int DEFAULT: None

secondary_top_k

The similarity_top_k in the secondary retrieving. Refer to the method retrieve for details.

TYPE: int DEFAULT: None

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
class RecentPaperRetriever:
	r"""
	This class is the retriever that retrieving in the recent papers store of a specific user.

	Args:
		embed_model (BaseEmbedding): The used embedding model. If not specified, the `Settings.embed_model` will be used.
		final_use_context (bool): Whether to use the context nodes as parts of the retrieved results.
		first_top_k (int): The `similarity_top_k` in the first retrieving.
			Refer to the method `retrieve` for details.
		secondary_top_k (int): The `similarity_top_k` in the secondary retrieving.
			Refer to the method `retrieve` for details.
	"""
	def __init__(
		self,
		embed_model: BaseEmbedding,
		final_use_context: bool = True,
		first_top_k: int = None,
		secondary_top_k: int = None,
	):
		self.paper_store = None
		self.paper_retriever = None
		self._embed_model = embed_model or Settings.embed_model
		self._final_use_context = final_use_context
		self._first_top_k = first_top_k or RECENT_PAPER_INFO_SIMILARITY_TOP_K
		self._relevant_top_k = secondary_top_k or RECENT_PAPER_SIMILARITY_TOP_K
		self.fs = fsspec.filesystem("file")

	def _add_context(
		self,
		content_nodes: List[NodeWithScore]
	) -> List[NodeWithScore]:
		r"""
		Add context nodes for the retrieved nodes.

		Args:
			content_nodes (List[NodeWithScore]): The retrieved nodes.

		Returns:
			List[NodeWithScore]: Concatenated nodes including context nodes.
		"""
		vector_index = self.paper_store.vector_index
		existing_ids = [node.node_id for node in content_nodes]
		final_nodes = []

		for node in content_nodes:
			prev_node_info = node.node.prev_node
			next_node_info = node.node.next_node
			if prev_node_info is not None:
				prev_id = prev_node_info.node_id
				if prev_id not in existing_ids:
					existing_ids.append(prev_id)
					prev_node = vector_index.docstore.get_node(node_id=prev_id)
					final_nodes.append(NodeWithScore(node=prev_node))
			final_nodes.append(node)
			if next_node_info is not None:
				next_id = next_node_info.node_id
				if next_id not in existing_ids:
					existing_ids.append(next_id)
					next_node = vector_index.docstore.get_node(node_id=next_id)
					final_nodes.append(NodeWithScore(node=next_node))
		return final_nodes

	def get_paper_retriever(self) -> VectorIndexRetriever:
		r"""
		Get the default paper retriever, with a node_type_filter.

		Returns:
			VectorIndexRetriever: The paper retriever.
		"""
		paper_retriever = self.paper_store.vector_index.as_retriever(
			similarity_top_k=self._relevant_top_k,
			filters=MetadataFilters(
				filters=[self.node_type_filter]
			),
		)
		return paper_retriever

	@property
	def node_type_filter(self) -> MetadataFilter:
		r"""
		The node type filter that filters nodes with type `TMP_PAPER_DOC_NODE_TYPE`.

		Returns:
			MetadataFilter: The node type metadata filter.
		"""
		doc_node_filter = MetadataFilter(
			key=TMP_PAPER_NODE_TYPE_KEY,
			value=TMP_PAPER_DOC_NODE_TYPE,
			operator=FilterOperator.EQ,
		)
		return doc_node_filter

	def get_date_filter(self, date_list: List[str]) -> MetadataFilter:
		r"""
		Get the date filter that filters according to the creation date of nodes.

		Args:
			date_list (List[str]): The date candidates. Only nodes created in one of these dates will be retrieved.

		Returns:
			MetadataFilter: The date filter.
		"""
		date_filter = MetadataFilter(
			key=TMP_PAPER_DATE,
			value=date_list,
			operator=FilterOperator.ANY,
		)
		return date_filter

	def reset_retriever(self):
		r"""
		Reset the paper retriever:

		- reset the node_ids that confine the retrieving range.
		- reset the similarity_top_k.
		- reset the MetadataFilters.

		Returns:
			None.
		"""
		if self.paper_retriever:
			self.paper_retriever._node_ids = None
			self.paper_retriever._similarity_top_k = self._first_top_k
			self.paper_retriever._filters = MetadataFilters(
				filters=[self.node_type_filter,]
			)

	def first_retrieve(self, paper_info: str) -> List[str]:
		r"""
		First retrieve: retrieve according to the paper_info.

		Args:
			paper_info (str): The information about the paper.

		Returns:
			List[str]: all the node ids of relevant papers.
		"""
		self.paper_retriever._similarity_top_k = self._first_top_k
		info_relevant_nodes = self.paper_retriever.retrieve(paper_info)
		confine_node_ids = self.paper_store.get_all_relevant_node_ids(
			node_ids=[node.node_id for node in info_relevant_nodes]
		)
		return confine_node_ids

	async def afirst_retrieve(self, paper_info: str) -> List[str]:
		r"""
		First retrieve: retrieve according to the paper_info.

		Args:
			paper_info (str): The information about the paper.

		Returns:
			List[str]: all the node ids of relevant papers.
		"""
		self.paper_retriever._similarity_top_k = self._first_top_k
		info_relevant_nodes = await self.paper_retriever.aretrieve(paper_info)
		confine_node_ids = self.paper_store.get_all_relevant_node_ids(
			node_ids=[node.node_id for node in info_relevant_nodes]
		)
		return confine_node_ids

	def secondary_retrieve(
		self,
		item_to_be_retrieved: str,
		confine_node_ids: List[str],
	) -> List[NodeWithScore]:
		r"""
		Secondary retrieve in the confined nodes range.

		Args:
			item_to_be_retrieved (str): The aspects to be retrieved in a paper.
			confine_node_ids (List[str]): The confined node ids.

		Returns:
			List[NodeWithScore]: The retrieved relevant nodes.
		"""
		self.paper_retriever._node_ids = confine_node_ids
		nodes = self.paper_retriever.retrieve(item_to_be_retrieved)
		return nodes

	async def asecondary_retrieve(
		self,
		item_to_be_retrieved: str,
		confine_node_ids: List[str],
	) -> List[NodeWithScore]:
		r"""
		Asynchronous secondary retrieve in the confined nodes range.

		Args:
			item_to_be_retrieved (str): The aspects to be retrieved in a paper.
			confine_node_ids (List[str]): The confined node ids.

		Returns:
			List[NodeWithScore]: The retrieved relevant nodes.
		"""
		self.paper_retriever._node_ids = confine_node_ids
		nodes = await self.paper_retriever.aretrieve(item_to_be_retrieved)
		return nodes

	@dispatcher.span
	def retrieve(
		self,
		paper_info: str,
		item_to_be_retrieved: str,
		user_id: str,
		start_date: str = None,
		end_date: str = None,
		**kwargs: Any,
	) -> List[NodeWithScore]:
		r"""
		This tool is used to retrieve in the recent papers storage of a specific user.
		These information should be provided:
		1. The paper information, such as title or save path.
		2. The specific question that you want to obtain answer from the paper.
		3. The user id.

		Args:
			paper_info (str): This argument is necessary.
				It is the relevant information of the paper.
				For example, it can be the paper title, or its save path.
			item_to_be_retrieved (str): This argument is necessary.
				It denotes the specific question that you want to retrieve in a specific paper.
			user_id (str): This argument is necessary.
				The user_id of a lab member.
			start_date (str): This argument is optional. It denotes the start date in the format 'Year-Month-Day'.
				If both start_date and end_date are specified, only papers which are added to storage between the
				start_date and end_date will be retrieved.
			end_date: This argument is optional. It denotes the end date in the format 'Year-Month-Day'.
			**kwargs: Other keyword arguments will be ignored.

		Returns:
			The retrieved results.
		"""
		# This docstring is used as the corresponding tool description.
		if self.paper_store is None or self.paper_store.user_id != user_id:
			self.paper_store = RecentPaperStore.from_user_id(
				user_id=user_id,
				embed_model=self._embed_model,
			)
			if self.fs.exists(paper_info):
				print(f"Putting {paper_info} into storage.")
				self.paper_store.put(paper_file_path=paper_info)
			self.paper_retriever = self.get_paper_retriever()

		self.reset_retriever()

		# if new file
		if self.fs.exists(paper_info) and not self.paper_store.file_exists(file_path=paper_info):
			self.paper_store.put(paper_file_path=paper_info)

		if None not in [start_date, end_date]:
			# get the candidate date list.
			date_list = parse_date_list(start_date_str=start_date, end_date_str=end_date)
			metadata_filters = MetadataFilters(
				filters=[
					self.node_type_filter,
					self.get_date_filter(date_list=date_list),
				]
			)
			self.paper_retriever._filters = metadata_filters

		node_ids_range = self.first_retrieve(paper_info=paper_info)
		relevant_nodes = self.secondary_retrieve(
			item_to_be_retrieved=item_to_be_retrieved,
			confine_node_ids=node_ids_range,
		)
		if self._final_use_context:
			relevant_nodes = self._add_context(content_nodes=relevant_nodes)

		return relevant_nodes

	@dispatcher.span
	async def aretrieve(
		self,
		paper_info: str,
		item_to_be_retrieved: str,
		user_id: str,
		start_date: str = None,
		end_date: str = None,
		**kwargs: Any,
	) -> List[NodeWithScore]:
		r"""
		This tool is used to retrieve in the recent papers storage of a specific user.
		These information should be provided:
		1. The paper information, such as title or save path.
		2. The specific question that you want to obtain answer from the paper.
		3. The user id.

		Args:
			paper_info (str): This argument is necessary.
				It is the relevant information of the paper.
				For example, it can be the paper title, or its save path.
			item_to_be_retrieved (str): This argument is necessary.
				It denotes the specific question that you want to retrieve in a specific paper.
			user_id (str): This argument is necessary.
				The user_id of a lab member.
			start_date (str): This argument is optional. It denotes the start date in the format 'Year-Month-Day'.
				If both start_date and end_date are specified, only papers which are added to storage between the
				start_date and end_date will be retrieved.
			end_date: This argument is optional. It denotes the end date in the format 'Year-Month-Day'.
			**kwargs: Other keyword arguments will be ignored.

		Returns:
			The retrieved results.
		"""
		# This docstring is used as the corresponding tool description.
		if self.paper_store is None or self.paper_store.user_id != user_id:
			self.paper_store = RecentPaperStore.from_user_id(
				user_id=user_id,
				embed_model=self._embed_model,
			)
			if self.fs.exists(paper_info):
				self.paper_store.put(paper_file_path=paper_info)
			self.paper_retriever = self.get_paper_retriever()

		self.reset_retriever()

		if self.fs.exists(paper_info) and not self.paper_store.file_exists(file_path=paper_info):
			self.paper_store.put(paper_file_path=paper_info)

		if None not in [start_date, end_date]:
			# get the candidate date list.
			date_list = parse_date_list(start_date_str=start_date, end_date_str=end_date)
			metadata_filters = MetadataFilters(
				filters=[
					self.node_type_filter,
					self.get_date_filter(date_list=date_list),
				]
			)
			self.paper_retriever._filters = metadata_filters

		node_ids_range = await self.afirst_retrieve(paper_info=paper_info)
		relevant_nodes = await self.asecondary_retrieve(
			item_to_be_retrieved=item_to_be_retrieved,
			confine_node_ids=node_ids_range,
		)
		if self._final_use_context:
			relevant_nodes = self._add_context(content_nodes=relevant_nodes)
		return relevant_nodes

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.node_type_filter: MetadataFilter property

The node type filter that filters nodes with type TMP_PAPER_DOC_NODE_TYPE.

RETURNS DESCRIPTION
MetadataFilter

The node type metadata filter.

TYPE: MetadataFilter

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.afirst_retrieve(paper_info) async

First retrieve: retrieve according to the paper_info.

PARAMETER DESCRIPTION
paper_info

The information about the paper.

TYPE: str

RETURNS DESCRIPTION
List[str]

List[str]: all the node ids of relevant papers.

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
async def afirst_retrieve(self, paper_info: str) -> List[str]:
	r"""
	First retrieve: retrieve according to the paper_info.

	Args:
		paper_info (str): The information about the paper.

	Returns:
		List[str]: all the node ids of relevant papers.
	"""
	self.paper_retriever._similarity_top_k = self._first_top_k
	info_relevant_nodes = await self.paper_retriever.aretrieve(paper_info)
	confine_node_ids = self.paper_store.get_all_relevant_node_ids(
		node_ids=[node.node_id for node in info_relevant_nodes]
	)
	return confine_node_ids

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.aretrieve(paper_info, item_to_be_retrieved, user_id, start_date=None, end_date=None, **kwargs) async

This tool is used to retrieve in the recent papers storage of a specific user. These information should be provided: 1. The paper information, such as title or save path. 2. The specific question that you want to obtain answer from the paper. 3. The user id.

PARAMETER DESCRIPTION
paper_info

This argument is necessary. It is the relevant information of the paper. For example, it can be the paper title, or its save path.

TYPE: str

item_to_be_retrieved

This argument is necessary. It denotes the specific question that you want to retrieve in a specific paper.

TYPE: str

user_id

This argument is necessary. The user_id of a lab member.

TYPE: str

start_date

This argument is optional. It denotes the start date in the format 'Year-Month-Day'. If both start_date and end_date are specified, only papers which are added to storage between the start_date and end_date will be retrieved.

TYPE: str DEFAULT: None

end_date

This argument is optional. It denotes the end date in the format 'Year-Month-Day'.

TYPE: str DEFAULT: None

**kwargs

Other keyword arguments will be ignored.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
List[NodeWithScore]

The retrieved results.

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
@dispatcher.span
async def aretrieve(
	self,
	paper_info: str,
	item_to_be_retrieved: str,
	user_id: str,
	start_date: str = None,
	end_date: str = None,
	**kwargs: Any,
) -> List[NodeWithScore]:
	r"""
	This tool is used to retrieve in the recent papers storage of a specific user.
	These information should be provided:
	1. The paper information, such as title or save path.
	2. The specific question that you want to obtain answer from the paper.
	3. The user id.

	Args:
		paper_info (str): This argument is necessary.
			It is the relevant information of the paper.
			For example, it can be the paper title, or its save path.
		item_to_be_retrieved (str): This argument is necessary.
			It denotes the specific question that you want to retrieve in a specific paper.
		user_id (str): This argument is necessary.
			The user_id of a lab member.
		start_date (str): This argument is optional. It denotes the start date in the format 'Year-Month-Day'.
			If both start_date and end_date are specified, only papers which are added to storage between the
			start_date and end_date will be retrieved.
		end_date: This argument is optional. It denotes the end date in the format 'Year-Month-Day'.
		**kwargs: Other keyword arguments will be ignored.

	Returns:
		The retrieved results.
	"""
	# This docstring is used as the corresponding tool description.
	if self.paper_store is None or self.paper_store.user_id != user_id:
		self.paper_store = RecentPaperStore.from_user_id(
			user_id=user_id,
			embed_model=self._embed_model,
		)
		if self.fs.exists(paper_info):
			self.paper_store.put(paper_file_path=paper_info)
		self.paper_retriever = self.get_paper_retriever()

	self.reset_retriever()

	if self.fs.exists(paper_info) and not self.paper_store.file_exists(file_path=paper_info):
		self.paper_store.put(paper_file_path=paper_info)

	if None not in [start_date, end_date]:
		# get the candidate date list.
		date_list = parse_date_list(start_date_str=start_date, end_date_str=end_date)
		metadata_filters = MetadataFilters(
			filters=[
				self.node_type_filter,
				self.get_date_filter(date_list=date_list),
			]
		)
		self.paper_retriever._filters = metadata_filters

	node_ids_range = await self.afirst_retrieve(paper_info=paper_info)
	relevant_nodes = await self.asecondary_retrieve(
		item_to_be_retrieved=item_to_be_retrieved,
		confine_node_ids=node_ids_range,
	)
	if self._final_use_context:
		relevant_nodes = self._add_context(content_nodes=relevant_nodes)
	return relevant_nodes

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.asecondary_retrieve(item_to_be_retrieved, confine_node_ids) async

Asynchronous secondary retrieve in the confined nodes range.

PARAMETER DESCRIPTION
item_to_be_retrieved

The aspects to be retrieved in a paper.

TYPE: str

confine_node_ids

The confined node ids.

TYPE: List[str]

RETURNS DESCRIPTION
List[NodeWithScore]

List[NodeWithScore]: The retrieved relevant nodes.

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
async def asecondary_retrieve(
	self,
	item_to_be_retrieved: str,
	confine_node_ids: List[str],
) -> List[NodeWithScore]:
	r"""
	Asynchronous secondary retrieve in the confined nodes range.

	Args:
		item_to_be_retrieved (str): The aspects to be retrieved in a paper.
		confine_node_ids (List[str]): The confined node ids.

	Returns:
		List[NodeWithScore]: The retrieved relevant nodes.
	"""
	self.paper_retriever._node_ids = confine_node_ids
	nodes = await self.paper_retriever.aretrieve(item_to_be_retrieved)
	return nodes

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.first_retrieve(paper_info)

First retrieve: retrieve according to the paper_info.

PARAMETER DESCRIPTION
paper_info

The information about the paper.

TYPE: str

RETURNS DESCRIPTION
List[str]

List[str]: all the node ids of relevant papers.

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def first_retrieve(self, paper_info: str) -> List[str]:
	r"""
	First retrieve: retrieve according to the paper_info.

	Args:
		paper_info (str): The information about the paper.

	Returns:
		List[str]: all the node ids of relevant papers.
	"""
	self.paper_retriever._similarity_top_k = self._first_top_k
	info_relevant_nodes = self.paper_retriever.retrieve(paper_info)
	confine_node_ids = self.paper_store.get_all_relevant_node_ids(
		node_ids=[node.node_id for node in info_relevant_nodes]
	)
	return confine_node_ids

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.get_date_filter(date_list)

Get the date filter that filters according to the creation date of nodes.

PARAMETER DESCRIPTION
date_list

The date candidates. Only nodes created in one of these dates will be retrieved.

TYPE: List[str]

RETURNS DESCRIPTION
MetadataFilter

The date filter.

TYPE: MetadataFilter

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def get_date_filter(self, date_list: List[str]) -> MetadataFilter:
	r"""
	Get the date filter that filters according to the creation date of nodes.

	Args:
		date_list (List[str]): The date candidates. Only nodes created in one of these dates will be retrieved.

	Returns:
		MetadataFilter: The date filter.
	"""
	date_filter = MetadataFilter(
		key=TMP_PAPER_DATE,
		value=date_list,
		operator=FilterOperator.ANY,
	)
	return date_filter

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.get_paper_retriever()

Get the default paper retriever, with a node_type_filter.

RETURNS DESCRIPTION
VectorIndexRetriever

The paper retriever.

TYPE: VectorIndexRetriever

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def get_paper_retriever(self) -> VectorIndexRetriever:
	r"""
	Get the default paper retriever, with a node_type_filter.

	Returns:
		VectorIndexRetriever: The paper retriever.
	"""
	paper_retriever = self.paper_store.vector_index.as_retriever(
		similarity_top_k=self._relevant_top_k,
		filters=MetadataFilters(
			filters=[self.node_type_filter]
		),
	)
	return paper_retriever

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.reset_retriever()

Reset the paper retriever:

  • reset the node_ids that confine the retrieving range.
  • reset the similarity_top_k.
  • reset the MetadataFilters.
RETURNS DESCRIPTION

None.

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def reset_retriever(self):
	r"""
	Reset the paper retriever:

	- reset the node_ids that confine the retrieving range.
	- reset the similarity_top_k.
	- reset the MetadataFilters.

	Returns:
		None.
	"""
	if self.paper_retriever:
		self.paper_retriever._node_ids = None
		self.paper_retriever._similarity_top_k = self._first_top_k
		self.paper_retriever._filters = MetadataFilters(
			filters=[self.node_type_filter,]
		)

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.retrieve(paper_info, item_to_be_retrieved, user_id, start_date=None, end_date=None, **kwargs)

This tool is used to retrieve in the recent papers storage of a specific user. These information should be provided: 1. The paper information, such as title or save path. 2. The specific question that you want to obtain answer from the paper. 3. The user id.

PARAMETER DESCRIPTION
paper_info

This argument is necessary. It is the relevant information of the paper. For example, it can be the paper title, or its save path.

TYPE: str

item_to_be_retrieved

This argument is necessary. It denotes the specific question that you want to retrieve in a specific paper.

TYPE: str

user_id

This argument is necessary. The user_id of a lab member.

TYPE: str

start_date

This argument is optional. It denotes the start date in the format 'Year-Month-Day'. If both start_date and end_date are specified, only papers which are added to storage between the start_date and end_date will be retrieved.

TYPE: str DEFAULT: None

end_date

This argument is optional. It denotes the end date in the format 'Year-Month-Day'.

TYPE: str DEFAULT: None

**kwargs

Other keyword arguments will be ignored.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
List[NodeWithScore]

The retrieved results.

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
@dispatcher.span
def retrieve(
	self,
	paper_info: str,
	item_to_be_retrieved: str,
	user_id: str,
	start_date: str = None,
	end_date: str = None,
	**kwargs: Any,
) -> List[NodeWithScore]:
	r"""
	This tool is used to retrieve in the recent papers storage of a specific user.
	These information should be provided:
	1. The paper information, such as title or save path.
	2. The specific question that you want to obtain answer from the paper.
	3. The user id.

	Args:
		paper_info (str): This argument is necessary.
			It is the relevant information of the paper.
			For example, it can be the paper title, or its save path.
		item_to_be_retrieved (str): This argument is necessary.
			It denotes the specific question that you want to retrieve in a specific paper.
		user_id (str): This argument is necessary.
			The user_id of a lab member.
		start_date (str): This argument is optional. It denotes the start date in the format 'Year-Month-Day'.
			If both start_date and end_date are specified, only papers which are added to storage between the
			start_date and end_date will be retrieved.
		end_date: This argument is optional. It denotes the end date in the format 'Year-Month-Day'.
		**kwargs: Other keyword arguments will be ignored.

	Returns:
		The retrieved results.
	"""
	# This docstring is used as the corresponding tool description.
	if self.paper_store is None or self.paper_store.user_id != user_id:
		self.paper_store = RecentPaperStore.from_user_id(
			user_id=user_id,
			embed_model=self._embed_model,
		)
		if self.fs.exists(paper_info):
			print(f"Putting {paper_info} into storage.")
			self.paper_store.put(paper_file_path=paper_info)
		self.paper_retriever = self.get_paper_retriever()

	self.reset_retriever()

	# if new file
	if self.fs.exists(paper_info) and not self.paper_store.file_exists(file_path=paper_info):
		self.paper_store.put(paper_file_path=paper_info)

	if None not in [start_date, end_date]:
		# get the candidate date list.
		date_list = parse_date_list(start_date_str=start_date, end_date_str=end_date)
		metadata_filters = MetadataFilters(
			filters=[
				self.node_type_filter,
				self.get_date_filter(date_list=date_list),
			]
		)
		self.paper_retriever._filters = metadata_filters

	node_ids_range = self.first_retrieve(paper_info=paper_info)
	relevant_nodes = self.secondary_retrieve(
		item_to_be_retrieved=item_to_be_retrieved,
		confine_node_ids=node_ids_range,
	)
	if self._final_use_context:
		relevant_nodes = self._add_context(content_nodes=relevant_nodes)

	return relevant_nodes

labridge.func_modules.paper.retrieve.temporary_paper_retriever.RecentPaperRetriever.secondary_retrieve(item_to_be_retrieved, confine_node_ids)

Secondary retrieve in the confined nodes range.

PARAMETER DESCRIPTION
item_to_be_retrieved

The aspects to be retrieved in a paper.

TYPE: str

confine_node_ids

The confined node ids.

TYPE: List[str]

RETURNS DESCRIPTION
List[NodeWithScore]

List[NodeWithScore]: The retrieved relevant nodes.

Source code in labridge\func_modules\paper\retrieve\temporary_paper_retriever.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def secondary_retrieve(
	self,
	item_to_be_retrieved: str,
	confine_node_ids: List[str],
) -> List[NodeWithScore]:
	r"""
	Secondary retrieve in the confined nodes range.

	Args:
		item_to_be_retrieved (str): The aspects to be retrieved in a paper.
		confine_node_ids (List[str]): The confined node ids.

	Returns:
		List[NodeWithScore]: The retrieved relevant nodes.
	"""
	self.paper_retriever._node_ids = confine_node_ids
	nodes = self.paper_retriever.retrieve(item_to_be_retrieved)
	return nodes