Files
knightutils/llamaindex_cloud_rag/create_cloud_index.py
2025-08-27 22:22:18 +08:00

28 lines
1.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from llama_index.core import SimpleDirectoryReader
from llama_index.readers.dashscope.base import DashScopeParse
from llama_index.readers.dashscope.utils import ResultType
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
def read_parse_upload_local_documents(dir, num_workers=1):
"""读取、解析、上传本地文件到百炼数据管理平台。
Args:
dir (str): 本地文件存储的路径。
num_workers (int, optional): 执行的并发数。
Returns:
已上传到云端的文件列表
"""
parse = DashScopeParse(result_type=ResultType.DASHSCOPE_DOCMIND)
file_extractor = {'.txt': parse, '.docx': parse, ".pdf": parse} # 设置需要读取解析的文件格式,请根据实际需求调整
documents = SimpleDirectoryReader(input_dir=dir, file_extractor=file_extractor).load_data(num_workers=num_workers)
return documents
if __name__ == '__main__':
dir = "./docs/" # 本例中业务相关文件存储在当前路径下的docs文件夹请根据实际情况调整。
documents = read_parse_upload_local_documents(dir)
cloud_index_name = "my_first_index" # 设置云端知识库索引名称
index = DashScopeCloudIndex.from_documents(documents, cloud_index_name, verbose=True) # 创建云端知识库索引