knightutils/llamaindex_cloud_rag/rag.py

from llama_index.core import Settings
from llama_index.llms.dashscope import DashScope
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.postprocessor.dashscope_rerank import DashScopeRerank

'''
本例中构建检索引擎时，需要手动设置下列参数，请根据实际效果调整。
'''
Settings.llm = DashScope(model_name="qwen-max") # 设置检索引擎生成回答时调用的大模型。
similarity_top_k = 5  # 检索引擎找到的相似度最高的结果数
similarity_cutoff = 0.4  # 过滤检索结果时使用的最低相似度阈值
top_n = 1  # 进行重排后返回语义上相关度最高的结果数

'''
本例中构建RAG应用时，设置如下问答模板，请根据实际需求调整。
'''
init_chat = "\n您好，我是AI助手，可以回答关于百炼系列产品的提问。有什么可以帮您的？（请输入问题，退出请输入'q'）\n> "
resp_with_no_answer = "很抱歉，知识库未提供相关信息。" + "\n"
prompt_template = "回答如下问题: {0}\n如果根据提供的信息无法回答，请返回：{1}"


'''
格式化输出。
'''
def prettify_rag(resp):
    output = ""
    output += "\n回答：{0}\n".format(resp.response)
    for j in range(len(resp.source_nodes)):
        output += "\n产品知识库中的相关文本：\n{0}\n".format(resp.source_nodes[j].text)
    return output


'''
基于云端知识库的向量索引，构建检索引擎，能够接收终端用户的提问，从云端知识库中检索相关的文本片段，再将提问和检索结果合并后输入到大模型，并生成回答。
RAG应用提供与终端用户的交互界面，如果无法检索到相关的文本片段，或根据检索到的文本片段无法回答终端用户的提问，则返回适当的错误信息。
'''
if __name__ == '__main__':
    index = DashScopeCloudIndex("my_first_index")  # 读取百炼平台上已创建的知识库索引
    query_engine = index.as_query_engine(  # 构建检索引擎
        similarity_top_k=similarity_top_k,
        node_postprocessors=[  # 默认检索结果可能不满足需求，本例中通过加入node_postprocessors对检索结果进行后处理。
            SimilarityPostprocessor(similarity_cutoff=similarity_cutoff),  # 过滤不满足最低相似度阈值的检索结果。
            DashScopeRerank(top_n=top_n, model="gte-rerank")  # 对检索结果进行重排，返回语义上相关度最高的结果。
        ],
        response_mode="tree_summarize"
    )
    while True:
        user_prompt = input(init_chat)
        if user_prompt in ['q','Q']:
            break
        resp = query_engine.query(prompt_template.format(user_prompt, resp_with_no_answer))
        if len(resp.source_nodes) == 0:
            output = resp_with_no_answer  # 如果未找到相关上下文信息，则返回适当的报错信息。
        else:
            output = prettify_rag(resp)
        print(output)