File size: 1,695 Bytes
57535ba
9663a4b
57535ba
 
 
 
 
 
 
 
 
 
 
 
 
9663a4b
57535ba
 
9663a4b
57535ba
 
 
 
 
 
 
 
9663a4b
57535ba
9663a4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57535ba
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from umap_reducer import UMAPReducer
from embeddings_encoder import EmbeddingsEncoder
from flask import Flask, request, render_template, jsonify, make_response
from flask_cors import CORS
import os
from dotenv import load_dotenv
import feedparser
import json
from dateutil import parser
import re
import numpy as np
import gzip

load_dotenv()


app = Flask(__name__, static_url_path='/static')
reducer = UMAPReducer()
encoder = EmbeddingsEncoder()
CORS(app)


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/run-umap', methods=['POST'])
def run_umap():
    input_data = request.get_json()
    sentences = input_data['data']['sentences']
    umap_options = input_data['data']['umap_options']
    cluster_options = input_data['data']['cluster_options']

    print("input options:", umap_options, cluster_options)
    try:
        embeddings = encoder.encode(sentences)
        # UMAP embeddings
        reducer.setParams(umap_options, cluster_options)
        umap_embeddings = reducer.embed(embeddings)
        # HDBScan cluster analysis
        clusters = reducer.clusterAnalysis(umap_embeddings)
        content = gzip.compress(json.dumps(
            {
                "embeddings": umap_embeddings.tolist(),
                "clusters": clusters.labels_.tolist()
            }
        ).encode('utf8'), 5)
        response = make_response(content)
        response.headers['Content-length'] = len(content)
        response.headers['Content-Encoding'] = 'gzip'
        return response
    except Exception as e:
        return jsonify({"error": str(e)}), 201


if __name__ == '__main__':
    app.run(host='0.0.0.0',  port=int(os.environ.get('PORT', 7860)))