Files
knightutils/llamaindex_cloud_rag/create_cloud_index.py

28 lines
1.3 KiB
Python
Raw Normal View History

2025-08-27 22:22:18 +08:00
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.dashscope.base import DashScopeParse
from llama_index.readers.dashscope.utils import ResultType
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
def read_parse_upload_local_documents(dir, num_workers=1):
"""读取、解析、上传本地文件到百炼数据管理平台。
Args:
dir (str): 本地文件存储的路径
num_workers (int, optional): 执行的并发数
Returns:
已上传到云端的文件列表
"""
parse = DashScopeParse(result_type=ResultType.DASHSCOPE_DOCMIND)
file_extractor = {'.txt': parse, '.docx': parse, ".pdf": parse} # 设置需要读取解析的文件格式,请根据实际需求调整
documents = SimpleDirectoryReader(input_dir=dir, file_extractor=file_extractor).load_data(num_workers=num_workers)
return documents
if __name__ == '__main__':
dir = "./docs/" # 本例中业务相关文件存储在当前路径下的docs文件夹请根据实际情况调整。
documents = read_parse_upload_local_documents(dir)
cloud_index_name = "my_first_index" # 设置云端知识库索引名称
index = DashScopeCloudIndex.from_documents(documents, cloud_index_name, verbose=True) # 创建云端知识库索引