save all
This commit is contained in:
107
local_rag/upload_file.py
Normal file
107
local_rag/upload_file.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#####################################
|
||||
####### 上传文件 #######
|
||||
#####################################
|
||||
import gradio as gr
|
||||
import os
|
||||
import shutil
|
||||
import pandas as pd
|
||||
STRUCTURED_FILE_PATH = "File/Structured"
|
||||
UNSTRUCTURED_FILE_PATH = "File/Unstructured"
|
||||
# 刷新非结构化类目
|
||||
def refresh_label():
|
||||
return os.listdir(UNSTRUCTURED_FILE_PATH)
|
||||
|
||||
# 刷新结构化数据表
|
||||
def refresh_data_table():
|
||||
return os.listdir(STRUCTURED_FILE_PATH)
|
||||
|
||||
# 上传非结构化数据
|
||||
def upload_unstructured_file(files,label_name):
|
||||
if files is None:
|
||||
gr.Info("请上传文件")
|
||||
elif len(label_name) == 0:
|
||||
gr.Info("请输入类目名称")
|
||||
# 判断类目是否存在
|
||||
elif label_name in os.listdir(UNSTRUCTURED_FILE_PATH):
|
||||
gr.Info(f"{label_name}类目已存在")
|
||||
else:
|
||||
try:
|
||||
if not os.path.exists(os.path.join(UNSTRUCTURED_FILE_PATH,label_name)):
|
||||
os.mkdir(os.path.join(UNSTRUCTURED_FILE_PATH,label_name))
|
||||
for file in files:
|
||||
print(file)
|
||||
file_path = file.name
|
||||
file_name = os.path.basename(file_path)
|
||||
destination_file_path = os.path.join(UNSTRUCTURED_FILE_PATH,label_name,file_name)
|
||||
shutil.move(file_path,destination_file_path)
|
||||
gr.Info(f"文件已上传至{label_name}类目中,请前往创建知识库")
|
||||
except:
|
||||
gr.Info(f"请勿重复上传")
|
||||
|
||||
# 上传结构化数据
|
||||
def upload_structured_file(files,label_name):
|
||||
if files is None:
|
||||
gr.Info("请上传文件")
|
||||
elif len(label_name) == 0:
|
||||
gr.Info("请输入数据表名称")
|
||||
# 判断数据表是否存在
|
||||
elif label_name in os.listdir(STRUCTURED_FILE_PATH):
|
||||
gr.Info(f"{label_name}数据表已存在")
|
||||
else:
|
||||
try:
|
||||
if not os.path.exists(os.path.join(STRUCTURED_FILE_PATH,label_name)):
|
||||
os.mkdir(os.path.join(STRUCTURED_FILE_PATH,label_name))
|
||||
for file in files:
|
||||
file_path = file.name
|
||||
file_name = os.path.basename(file_path)
|
||||
destination_file_path = os.path.join(STRUCTURED_FILE_PATH,label_name,file_name)
|
||||
shutil.move(file_path,destination_file_path)
|
||||
if os.path.splitext(destination_file_path)[1] == ".xlsx":
|
||||
df = pd.read_excel(destination_file_path)
|
||||
elif os.path.splitext(destination_file_path)[1] == ".csv":
|
||||
df = pd.read_csv(destination_file_path)
|
||||
txt_file_name = os.path.splitext(file_name)[0]+'.txt'
|
||||
columns = df.columns
|
||||
with open(os.path.join(STRUCTURED_FILE_PATH,label_name,txt_file_name),"w") as file:
|
||||
for idx,row in df.iterrows():
|
||||
file.write("【")
|
||||
info = []
|
||||
for col in columns:
|
||||
info.append(f"{col}:{row[col]}")
|
||||
infos = ",".join(info)
|
||||
file.write(infos)
|
||||
if idx != len(df)-1:
|
||||
file.write("】\n")
|
||||
else:
|
||||
file.write("】")
|
||||
os.remove(destination_file_path)
|
||||
gr.Info(f"文件已上传至{label_name}数据表中,请前往创建知识库")
|
||||
except:
|
||||
gr.Info(f"请勿重复上传")
|
||||
|
||||
# 实时更新结构化数据表
|
||||
def update_datatable():
|
||||
return gr.update(choices=os.listdir(STRUCTURED_FILE_PATH))
|
||||
|
||||
|
||||
# 实时更新非结构化类目
|
||||
def update_label():
|
||||
return gr.update(choices=os.listdir(UNSTRUCTURED_FILE_PATH))
|
||||
|
||||
# 删除类目
|
||||
def delete_label(label_name):
|
||||
if label_name is not None:
|
||||
for label in label_name:
|
||||
folder_path = os.path.join(UNSTRUCTURED_FILE_PATH,label)
|
||||
if os.path.exists(folder_path):
|
||||
shutil.rmtree(folder_path)
|
||||
gr.Info(f"{label}类目已删除")
|
||||
|
||||
# 删除数据表
|
||||
def delete_data_table(table_name):
|
||||
if table_name is not None:
|
||||
for table in table_name:
|
||||
folder_path = os.path.join(STRUCTURED_FILE_PATH,table)
|
||||
if os.path.exists(folder_path):
|
||||
shutil.rmtree(folder_path)
|
||||
gr.Info(f"{table}数据表已删除")
|
||||
Reference in New Issue
Block a user