knightutils/local_rag/upload_file.py

#####################################
#######       上传文件         #######
#####################################
import gradio as gr
import os
import shutil
import pandas as pd
STRUCTURED_FILE_PATH = "File/Structured"
UNSTRUCTURED_FILE_PATH = "File/Unstructured"
# 刷新非结构化类目
def refresh_label():
    return os.listdir(UNSTRUCTURED_FILE_PATH)

# 刷新结构化数据表
def refresh_data_table():
    return os.listdir(STRUCTURED_FILE_PATH)

# 上传非结构化数据
def upload_unstructured_file(files,label_name):
    if files is None:
        gr.Info("请上传文件")
    elif len(label_name) == 0:
        gr.Info("请输入类目名称")
    # 判断类目是否存在
    elif label_name in os.listdir(UNSTRUCTURED_FILE_PATH):
        gr.Info(f"{label_name}类目已存在")
    else:
        try:
            if not os.path.exists(os.path.join(UNSTRUCTURED_FILE_PATH,label_name)):
                os.mkdir(os.path.join(UNSTRUCTURED_FILE_PATH,label_name))
            for file in files:
                print(file)
                file_path = file.name
                file_name = os.path.basename(file_path)
                destination_file_path = os.path.join(UNSTRUCTURED_FILE_PATH,label_name,file_name)
                shutil.move(file_path,destination_file_path)
            gr.Info(f"文件已上传至{label_name}类目中，请前往创建知识库")
        except:
            gr.Info(f"请勿重复上传")

# 上传结构化数据
def upload_structured_file(files,label_name):
    if files is None:
        gr.Info("请上传文件")
    elif len(label_name) == 0:
        gr.Info("请输入数据表名称")
    # 判断数据表是否存在
    elif label_name in os.listdir(STRUCTURED_FILE_PATH):
        gr.Info(f"{label_name}数据表已存在")
    else:
        try:
            if not os.path.exists(os.path.join(STRUCTURED_FILE_PATH,label_name)):
                os.mkdir(os.path.join(STRUCTURED_FILE_PATH,label_name))
            for file in files:
                file_path = file.name
                file_name = os.path.basename(file_path)
                destination_file_path = os.path.join(STRUCTURED_FILE_PATH,label_name,file_name)
                shutil.move(file_path,destination_file_path)
                if os.path.splitext(destination_file_path)[1] == ".xlsx":
                    df = pd.read_excel(destination_file_path)
                elif os.path.splitext(destination_file_path)[1] == ".csv":
                    df = pd.read_csv(destination_file_path)
                txt_file_name = os.path.splitext(file_name)[0]+'.txt'
                columns = df.columns
                with open(os.path.join(STRUCTURED_FILE_PATH,label_name,txt_file_name),"w") as file:
                    for idx,row in df.iterrows():
                        file.write("【")
                        info = []
                        for col in columns:
                            info.append(f"{col}:{row[col]}")
                        infos = ",".join(info)
                        file.write(infos)
                        if idx != len(df)-1:
                            file.write("】\n")
                        else:
                            file.write("】")
                os.remove(destination_file_path)
            gr.Info(f"文件已上传至{label_name}数据表中，请前往创建知识库")
        except:
            gr.Info(f"请勿重复上传")

# 实时更新结构化数据表
def update_datatable():
    return gr.update(choices=os.listdir(STRUCTURED_FILE_PATH))


# 实时更新非结构化类目
def update_label():
    return gr.update(choices=os.listdir(UNSTRUCTURED_FILE_PATH))

# 删除类目
def delete_label(label_name):
    if label_name is not None:
        for label in label_name:
            folder_path = os.path.join(UNSTRUCTURED_FILE_PATH,label)
            if os.path.exists(folder_path):
                shutil.rmtree(folder_path)
                gr.Info(f"{label}类目已删除")

# 删除数据表
def delete_data_table(table_name):
    if table_name is not None:
        for table in table_name:
            folder_path = os.path.join(STRUCTURED_FILE_PATH,table)
            if os.path.exists(folder_path):
                shutil.rmtree(folder_path)
                gr.Info(f"{table}数据表已删除")