save all
This commit is contained in:
28
llamaindex_cloud_rag/create_cloud_index.py
Normal file
28
llamaindex_cloud_rag/create_cloud_index.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from llama_index.core import SimpleDirectoryReader
|
||||
from llama_index.readers.dashscope.base import DashScopeParse
|
||||
from llama_index.readers.dashscope.utils import ResultType
|
||||
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
|
||||
|
||||
|
||||
def read_parse_upload_local_documents(dir, num_workers=1):
|
||||
"""读取、解析、上传本地文件到百炼数据管理平台。
|
||||
|
||||
Args:
|
||||
dir (str): 本地文件存储的路径。
|
||||
num_workers (int, optional): 执行的并发数。
|
||||
|
||||
Returns:
|
||||
已上传到云端的文件列表
|
||||
"""
|
||||
parse = DashScopeParse(result_type=ResultType.DASHSCOPE_DOCMIND)
|
||||
file_extractor = {'.txt': parse, '.docx': parse, ".pdf": parse} # 设置需要读取解析的文件格式,请根据实际需求调整
|
||||
documents = SimpleDirectoryReader(input_dir=dir, file_extractor=file_extractor).load_data(num_workers=num_workers)
|
||||
|
||||
return documents
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dir = "./docs/" # 本例中,业务相关文件存储在当前路径下的docs文件夹,请根据实际情况调整。
|
||||
documents = read_parse_upload_local_documents(dir)
|
||||
cloud_index_name = "my_first_index" # 设置云端知识库索引名称
|
||||
index = DashScopeCloudIndex.from_documents(documents, cloud_index_name, verbose=True) # 创建云端知识库索引
|
||||
Reference in New Issue
Block a user