LlamaIndex
功能包括
Neo4jVector
Neo4j 向量集成支持多种操作
-
从 LangChain 文档创建向量
-
查询向量
-
使用其他图检索 Cypher 查询查询向量
-
从现有图数据构建向量实例
-
混合搜索
-
元数据过滤
%pip install llama-index-llms-openai
%pip install llama-index-vector-stores-neo4jvector
%pip install llama-index-embeddings-openai
%pip install neo4j
import os
import openai
from llama_index.vector_stores.neo4jvector import Neo4jVectorStore
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
openai.api_key = os.environ["OPENAI_API_KEY"]
username = "neo4j"
password = "pleaseletmein"
url = "bolt://localhost:7687"
embed_dim = 1536
neo4j_vector = Neo4jVectorStore(username, password, url, embed_dim)
# load documents
documents = SimpleDirectoryReader("./data/paul_graham").load_data()
from llama_index.core import StorageContext
storage_context = StorageContext.from_defaults(vector_store=neo4j_vector)
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context
)
query_engine = index.as_query_engine()
response = query_engine.query("What happened at interleaf?")
混合搜索
混合搜索将向量搜索与全文搜索相结合,并重新排序和去重结果。
neo4j_vector_hybrid = Neo4jVectorStore(
username, password, url, embed_dim, hybrid_search=True
)
storage_context = StorageContext.from_defaults(
vector_store=neo4j_vector_hybrid
)
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context
)
query_engine = index.as_query_engine()
response = query_engine.query("What happened at interleaf?")
元数据过滤
元数据过滤通过允许根据特定节点属性细化搜索来增强向量搜索。这种集成方法通过利用向量的相似性和节点的上下文属性,确保获得更准确和相关的搜索结果。
from llama_index.core.vector_stores import (
MetadataFilter,
MetadataFilters,
FilterOperator,
)
filters = MetadataFilters(
filters=[
MetadataFilter(
key="theme", operator=FilterOperator.EQ, value="Fiction"
),
]
)
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")
Neo4jPropertyGraphStore
Neo4j 属性图存储集成是 Neo4j Python 驱动程序的包装器。它允许从 LlamaIndex 以简化的方式查询和更新 Neo4j 数据库。许多集成允许您使用 Neo4j 属性图存储作为 LlamaIndex 的数据源。
属性图索引
知识图谱索引可用于从文本中提取信息的图表示,并用它来构建知识图谱。然后可以在 RAG 应用程序中检索图信息以获得更准确的响应。
%pip install llama-index llama-index-graph-stores-neo4j
from llama_index.core import SimpleDirectoryReader
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
graph_store = Neo4jPropertyGraphStore(
username="neo4j",
password="password",
url="bolt://localhost:7687",
)
# Extract graph from documents
index = PropertyGraphIndex.from_documents(
documents,
embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
kg_extractors=[
SchemaLLMPathExtractor(
llm=OpenAI(model="gpt-3.5-turbo", temperature=0.0)
)
],
property_graph_store=graph_store,
show_progress=True,
)
# Define retriever
retriever = index.as_retriever(
include_text=False, # include source text in returned nodes, default True
)
results = retriever.retrieve("What happened at Interleaf and Viaweb?")
for record in results:
print(record.text)
# Question answering
query_engine = index.as_query_engine(include_text=True)
response = query_engine.query("What happened at Interleaf and Viaweb?")
print(str(response))
属性图构建模块
LlamaIndex 提供多个图构建模块。LlamaIndex 中的属性图构建通过对每个文本块执行一系列kg_extractors
,并将实体和关系作为元数据附加到每个 llama-index 节点来工作。您可以在此处使用任意多个,它们都将被应用。在文档中了解更多信息。
以下是用预定义模式构建图的示例。
%pip install llama-index llama-index-graph-stores-neo4j
from typing import Literal
from llama_index.core import SimpleDirectoryReader
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
# best practice to use upper-case
entities = Literal["PERSON", "PLACE", "ORGANIZATION"]
relations = Literal["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"]
# define which entities can have which relations
validation_schema = {
"PERSON": ["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"],
"PLACE": ["HAS", "PART_OF", "WORKED_AT"],
"ORGANIZATION": ["HAS", "PART_OF", "WORKED_WITH"],
}
kg_extractor = SchemaLLMPathExtractor(
llm=OpenAI(model="gpt-3.5-turbo", temperature=0.0),
possible_entities=entities,
possible_relations=relations,
kg_validation_schema=validation_schema,
# if false, allows for values outside of the schema
# useful for using the schema as a suggestion
strict=True,
)
graph_store = Neo4jPropertyGraphStore(
username="neo4j",
password="password",
url="bolt://localhost:7687",
)
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
index = PropertyGraphIndex.from_documents(
documents,
kg_extractors=[kg_extractor],
embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
property_graph_store=graph_store,
show_progress=True,
)
属性图查询模块
可以以多种方式查询标记的属性图以检索节点和路径。在 LlamaIndex 中,您可以同时组合多个节点检索方法!在文档中了解更多关于哪些方法可用。
您还可以定义自定义图检索器,如下所示。
from llama_index.core.retrievers import (
CustomPGRetriever,
VectorContextRetriever,
TextToCypherRetriever,
)
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.graph_stores import PropertyGraphStore
from llama_index.core.vector_stores.types import VectorStore
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.prompts import PromptTemplate
from llama_index.core.llms import LLM
from llama_index.postprocessor.cohere_rerank import CohereRerank
from typing import Optional, Any, Union
class MyCustomRetriever(CustomPGRetriever):
"""Custom retriever with cohere reranking."""
def init(
self,
## vector context retriever params
embed_model: Optional[BaseEmbedding] = None,
vector_store: Optional[VectorStore] = None,
similarity_top_k: int = 4,
path_depth: int = 1,
## text-to-cypher params
llm: Optional[LLM] = None,
text_to_cypher_template: Optional[Union[PromptTemplate, str]] = None,
## cohere reranker params
cohere_api_key: Optional[str] = None,
cohere_top_n: int = 2,
**kwargs: Any,
) -> None:
"""Uses any kwargs passed in from class constructor."""
self.vector_retriever = VectorContextRetriever(
self.graph_store,
include_text=self.include_text,
embed_model=embed_model,
vector_store=vector_store,
similarity_top_k=similarity_top_k,
path_depth=path_depth,
)
self.cypher_retriever = TextToCypherRetriever(
self.graph_store,
llm=llm,
text_to_cypher_template=text_to_cypher_template
## NOTE: you can attach other parameters here if you'd like
)
self.reranker = CohereRerank(
api_key=cohere_api_key, top_n=cohere_top_n
)
def custom_retrieve(self, query_str: str) -> str:
"""Define custom retriever with reranking.
Could return `str`, `TextNode`, `NodeWithScore`, or a list of those.
"""
nodes_1 = self.vector_retriever.retrieve(query_str)
nodes_2 = self.cypher_retriever.retrieve(query_str)
reranked_nodes = self.reranker.postprocess_nodes(
nodes_1 + nodes_2, query_str=query_str
)
## TMP: please change
final_text = "\n\n".join(
[n.get_content(metadata_mode="llm") for n in reranked_nodes]
)
return final_text
custom_sub_retriever = MyCustomRetriever(
index.property_graph_store,
include_text=True,
vector_store=index.vector_store,
cohere_api_key="...",
)
query_engine = RetrieverQueryEngine.from_args(
index.as_retriever(sub_retrievers=[custom_sub_retriever]), llm=llm
)
response = query_engine.query("Did the author like programming?")
print(str(response))
文档
Neo4j 查询引擎包
此Neo4j 查询引擎 LlamaPack创建了一个 Neo4j 查询引擎,并执行其查询函数。此包提供了创建多种类型的查询引擎的选项,即
-
基于知识图谱向量的实体检索(如果未提供查询引擎类型选项,则为默认值)
-
基于知识图谱关键词的实体检索
-
知识图谱混合实体检索
-
原始向量索引检索
-
自定义组合查询引擎(向量相似性 + KG 实体检索)
-
KnowledgeGraphQueryEngine
-
KnowledgeGraphRAGRetriever