跳转至

Paper retriever

labridge.tools.paper.temporary_papers.paper_retriever

labridge.tools.paper.temporary_papers.paper_retriever.RecentPaperRetrieveTool

Bases: RetrieverBaseTool

This tool is used to retrieve in the recent papers store of a specific user. A multilevel retrieving strategy is used. For details, refer to the RecentPaperRetriever. (start_date, end_date) can be provided to confine the retrieving range.

PARAMETER DESCRIPTION
embed_model

The used embedding model. If not specified, The Settings.embed_model will be used.

TYPE: BaseEmbedding DEFAULT: None

first_top_k

The similarity_top_k in the first retrieving. Defaults to RECENT_PAPER_INFO_SIMILARITY_TOP_K.

TYPE: int DEFAULT: RECENT_PAPER_INFO_SIMILARITY_TOP_K

secondary_top_k

The similarity_top_k in the secondary retrieving. Defaults to RECENT_PAPER_SIMILARITY_TOP_K.

TYPE: int DEFAULT: RECENT_PAPER_SIMILARITY_TOP_K

Source code in labridge\tools\paper\temporary_papers\paper_retriever.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
class RecentPaperRetrieveTool(RetrieverBaseTool):
	r"""
	This tool is used to retrieve in the recent papers store of a specific user.
	A multilevel retrieving strategy is used. For details, refer to the `RecentPaperRetriever`.
	(start_date, end_date) can be provided to confine the retrieving range.

	Args:
		embed_model (BaseEmbedding): The used embedding model. If not specified, The `Settings.embed_model` will be used.
		first_top_k (int): The similarity_top_k in the first retrieving.
			Defaults to `RECENT_PAPER_INFO_SIMILARITY_TOP_K`.
		secondary_top_k (int): The similarity_top_k in the secondary retrieving.
			Defaults to `RECENT_PAPER_SIMILARITY_TOP_K`.
	"""
	def __init__(
		self,
		embed_model: BaseEmbedding = None,
		first_top_k: int = RECENT_PAPER_INFO_SIMILARITY_TOP_K,
		secondary_top_k: int = RECENT_PAPER_SIMILARITY_TOP_K,
		use_context: bool = False,

	):
		retriever = RecentPaperRetriever(
			embed_model=embed_model,
			final_use_context=use_context,
			first_top_k=first_top_k,
			secondary_top_k=secondary_top_k,
		)
		super().__init__(
			name=RecentPaperRetrieveTool.__name__,
			retriever=retriever,
			retrieve_fn=retriever.retrieve,
		)

	def log(self, log_dict: dict) -> ToolLog:
		r"""
		Record the tool log.

		Args:
			log_dict (dict): Including the input keyword arguments and the retrieving logs.

		Returns:
			ToolLog: The packed tool log.
		"""
		user_id = log_dict["user_id"]
		item_to_be_retrieved = log_dict["item_to_be_retrieved"]
		paper_file_path = log_dict.get("paper_file_path", None)
		start_date = log_dict.get("start_date", None)
		end_date = log_dict.get("end_date", None)

		op_log = (
			f"Retrieve in the recent papers of the user: {user_id}.\n"
			f"retrieve string: {item_to_be_retrieved}\n"
		)
		if paper_file_path is not None:
			op_log += f"target paper file path: {paper_file_path}\n"
		if None not in [start_date, end_date]:
			op_log += (
				f"start_date: {start_date}\n"
				f"end_date: {end_date}"
			)

		log_to_user = None
		log_to_system = {
			TOOL_OP_DESCRIPTION: op_log,
			TOOL_REFERENCES: None,
		}

		return ToolLog(
			tool_name=self.metadata.name,
			log_to_user=log_to_user,
			log_to_system=log_to_system,
		)

	def get_ref_info(self, nodes: List[NodeWithScore]) -> List[RefInfoBase]:
		r""" Get the reference infos from the retrieved nodes. """
		return []

	def _retrieve(self, retrieve_kwargs: dict) -> List[NodeWithScore]:
		r""" Use the retriever to retrieve relevant nodes. """
		nodes = self._retriever.retrieve(**retrieve_kwargs)
		return nodes

	async def _aretrieve(self, retrieve_kwargs: dict) -> List[NodeWithScore]:
		r""" Asynchronously use the retriever to retrieve relevant nodes. """
		nodes = await self._retriever.aretrieve(**retrieve_kwargs)
		return nodes

	def _nodes_to_tool_output(self, nodes: List[NodeWithScore]) -> Tuple[str, dict]:
		r""" output the retrieved contents in a specific format, and the output log. """
		paper_contents = {}

		for node in nodes:
			file_path = node.node.parent_node.node_id
			if file_path not in paper_contents:
				paper_contents[file_path] = [node.get_content(metadata_mode=MetadataMode.LLM)]
			else:
				paper_contents[file_path].append(node.get_content(metadata_mode=MetadataMode.LLM))

		if paper_contents:
			content_str = "Have retrieved the following content: \n"
			contents = []
			for paper_path in paper_contents.keys():
				each_str = f"Following contents are from the paper stored in {paper_path}:\n"
				each_str += "\n".join(paper_contents[paper_path])
				contents.append(each_str.strip())
			content_str += "\n\n".join(contents)
		else:
			content_str = "Have retrieved nothing.\n"
		return content_str, dict()

labridge.tools.paper.temporary_papers.paper_retriever.RecentPaperRetrieveTool.get_ref_info(nodes)

Get the reference infos from the retrieved nodes.

Source code in labridge\tools\paper\temporary_papers\paper_retriever.py
89
90
91
def get_ref_info(self, nodes: List[NodeWithScore]) -> List[RefInfoBase]:
	r""" Get the reference infos from the retrieved nodes. """
	return []

labridge.tools.paper.temporary_papers.paper_retriever.RecentPaperRetrieveTool.log(log_dict)

Record the tool log.

PARAMETER DESCRIPTION
log_dict

Including the input keyword arguments and the retrieving logs.

TYPE: dict

RETURNS DESCRIPTION
ToolLog

The packed tool log.

TYPE: ToolLog

Source code in labridge\tools\paper\temporary_papers\paper_retriever.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def log(self, log_dict: dict) -> ToolLog:
	r"""
	Record the tool log.

	Args:
		log_dict (dict): Including the input keyword arguments and the retrieving logs.

	Returns:
		ToolLog: The packed tool log.
	"""
	user_id = log_dict["user_id"]
	item_to_be_retrieved = log_dict["item_to_be_retrieved"]
	paper_file_path = log_dict.get("paper_file_path", None)
	start_date = log_dict.get("start_date", None)
	end_date = log_dict.get("end_date", None)

	op_log = (
		f"Retrieve in the recent papers of the user: {user_id}.\n"
		f"retrieve string: {item_to_be_retrieved}\n"
	)
	if paper_file_path is not None:
		op_log += f"target paper file path: {paper_file_path}\n"
	if None not in [start_date, end_date]:
		op_log += (
			f"start_date: {start_date}\n"
			f"end_date: {end_date}"
		)

	log_to_user = None
	log_to_system = {
		TOOL_OP_DESCRIPTION: op_log,
		TOOL_REFERENCES: None,
	}

	return ToolLog(
		tool_name=self.metadata.name,
		log_to_user=log_to_user,
		log_to_system=log_to_system,
	)