File size: 3,310 Bytes
be3a2ca
a928ae7
be3a2ca
35aaa48
 
 
 
 
 
8fae5ad
35aaa48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fae5ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from fastapi import FastAPI
import os


import pandas as pd
from io import StringIO 
import os
import base64

app = FastAPI()

def get_download_link_dify(df):
    # code to save file in dify framework
    import requests

    # API Configuration
    BASE_URL = "http://redmindgpt.redmindtechnologies.com:81/v1"
    DATASET_ID = "084ae979-d101-414b-8854-9bbf5d3a442e" 
    API_KEY = "dataset-feqz5KrqHkFRdWbh2DInt58L"  

    dataset_name = 'output_dataset'
    # Endpoint URL
    url = f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file"
    print(url)
    # Headers
    headers = {
        "Authorization": f"Bearer {API_KEY}"
    }

    # Data payload (form data as a plain text string)
    data_payload = {
        "data": """
        {
            "indexing_technique": "high_quality",
            "process_rule": {
                "rules": {
                    "pre_processing_rules": [
                        {"id": "remove_extra_spaces", "enabled": true},
                        {"id": "remove_urls_emails", "enabled": true}
                    ],
                    "segmentation": {
                        "separator": "###",
                        "max_tokens": 500
                    }
                },
                "mode": "custom"
            }
        }
        """
    }

    # Convert DataFrame to binary (in-memory)
    file_buffer = dataframe_to_binary(df)
    
    files = {
        "file": ("output.xlsx", file_buffer, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
    }

    # Send the POST request
    response = requests.post(url, headers=headers, data=data_payload, files=files)
    print(response)
    data = response.json()
    document_id = data['document']['id']

    # code to get download_url
    url = f"http://redmindgpt.redmindtechnologies.com:81/v1/datasets/{DATASET_ID}/documents/{document_id}/upload-file"
   
    response = requests.get(url, headers=headers)
    print(response)
    
    download_url = response.json().get("download_url")
    download_url = download_url.replace("download/","")
    return download_url

def dataframe_to_binary(df):
    import io
    # Create a BytesIO stream
    output = io.BytesIO()
    
    # Write the DataFrame to this in-memory buffer as an Excel file
    df.to_excel(output, index=False, engine="openpyxl")
    
    # Move the cursor to the beginning of the stream
    output.seek(0)
    
    return output
    


# FastAPI Endpoints
@app.get("/")
def greet_json():
    # Run Data Processing
    #process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
    return {"Document store": "created!"}

@app.get("/save_file_dify")
def save_file_dify(csv_data: str):    
    
    # Split into lines
    lines = csv_data.split("\n")

    # Find the max number of columns
    max_cols = max(line.count(",") + 1 for line in lines if line.strip())

    # Normalize all rows to have the same number of columns
    fixed_lines = [line + "," * (max_cols - line.count(",") - 1) for line in lines]

    # Reconstruct CSV string
    fixed_csv_data = "\n".join(fixed_lines)
    
    # Convert CSV string to DataFrame
    df = pd.read_csv(StringIO(fixed_csv_data))

    
    #save in dify dataset and return download link
    download_link = get_download_link_dify(df)
        
    return download_link