albertmartinez commited on
Commit
fb3abe1
·
0 Parent(s):

initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. .gitignore +99 -0
  3. README.md +12 -0
  4. app.py +61 -0
  5. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python build
2
+ .eggs/
3
+ gradio.egg-info
4
+ dist/
5
+ dist-lite/
6
+ *.pyc
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ build/
11
+ !js/build/
12
+ !js/build/dist/
13
+ __tmp/*
14
+ *.pyi
15
+ !gradio/stubs/**/*.pyi
16
+ py.typed
17
+ .ipynb_checkpoints/
18
+ .python-version
19
+ =23.2
20
+
21
+ # JS build
22
+ gradio/templates/*
23
+ gradio/node/*
24
+ gradio/_frontend_code/*
25
+ js/gradio-preview/test/*
26
+
27
+ # Secrets
28
+ .env
29
+
30
+ # Gradio run artifacts
31
+ *.db
32
+ *.sqlite3
33
+ gradio/launches.json
34
+ gradio/hash_seed.txt
35
+ .gradio/
36
+
37
+ tmp.zip
38
+
39
+ # Tests
40
+ .coverage
41
+ coverage.xml
42
+ test.txt
43
+ **/snapshots/**/*.png
44
+ playwright-report/
45
+ .hypothesis
46
+ .lite-perf.json
47
+
48
+ # Demos
49
+ demo/tmp.zip
50
+ demo/files/*.avi
51
+ demo/files/*.mp4
52
+ demo/all_demos/demos/*
53
+ demo/all_demos/requirements.txt
54
+ demo/*/config.json
55
+ demo/annotatedimage_component/*.png
56
+ demo/fake_diffusion_with_gif/*.gif
57
+ demo/cancel_events/cancel_events_output_log.txt
58
+ demo/unload_event_test/output_log.txt
59
+ demo/stream_video_out/output_*.ts
60
+ demo/stream_video_out/output_*.mp4
61
+ demo/stream_audio_out/*.mp3
62
+
63
+ # Etc
64
+ .idea/*
65
+ .DS_Store
66
+ *.bak
67
+ workspace.code-workspace
68
+ *.h5
69
+
70
+ # dev containers
71
+ .pnpm-store/
72
+
73
+ # log files
74
+ .pnpm-debug.log
75
+
76
+ # Local virtualenv for devs
77
+ .venv*
78
+
79
+ # FRP
80
+ gradio/frpc_*
81
+ .vercel
82
+
83
+ # js
84
+ node_modules
85
+ public/build/
86
+ test-results
87
+ client/js/dist/*
88
+ client/js/test.js
89
+ .config/test.py
90
+ .svelte-kit
91
+
92
+
93
+ # storybook
94
+ storybook-static
95
+ build-storybook.log
96
+ js/storybook/theme.css
97
+
98
+ # playwright
99
+ .config/playwright/.cache
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Sentence Transformers
3
+ emoji: 🏢
4
+ colorFrom: green
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 5.3.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import polars as pl
5
+ from datasets import Dataset
6
+ from sentence_transformers import SentenceTransformer
7
+ from sentence_transformers.util import paraphrase_mining
8
+ import torch
9
+
10
+
11
+ def upload_file(filepath):
12
+ name = Path(filepath).name
13
+ return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {name}", value=filepath, visible=True)]
14
+
15
+
16
+ def getData(path):
17
+ #data = Dataset.from_csv(path, column_names=["text"])
18
+ data = Dataset.from_pandas(pd.read_csv(path, on_bad_lines='skip', names=["text"]))
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",
21
+ backend="openvino",
22
+ device=device,
23
+ trust_remote_code=True)
24
+
25
+ paraphrases = paraphrase_mining(
26
+ model,
27
+ data["text"],
28
+ corpus_chunk_size=len(data),
29
+ show_progress_bar=True,
30
+ batch_size=1024,
31
+ max_pairs=len(data) ** 2
32
+ )
33
+
34
+ df_pd = pd.DataFrame(paraphrases)
35
+ df = pl.from_pandas(df_pd)
36
+ df = df.rename({"0": "score", "1": "sentence_1", "2": "sentence_2"})
37
+
38
+ union_df = pl.DataFrame(data.to_pandas())
39
+
40
+ df = df.with_columns([
41
+ pl.col("score").round(3).cast(pl.Float32),
42
+ union_df.select(pl.col("text")).to_series()[df["sentence_1"].cast(pl.Int32)].alias("sentence_1"),
43
+ union_df.select(pl.col("text")).to_series()[df["sentence_2"].cast(pl.Int32)].alias("sentence_2"),
44
+ ]).filter(pl.col("score") > 0.96).sort(["score"], descending=True)
45
+
46
+ data = pl.from_arrow(data.data.table)
47
+
48
+ return [data, df]
49
+
50
+
51
+ with gr.Blocks() as demo:
52
+ with gr.Column():
53
+ upload_button = gr.UploadButton(label="upload csv", file_types=['.csv'], file_count="single")
54
+ output_data = gr.Dataframe(headers=["text"], col_count=1, label="Uploaded Data")
55
+ output_paraphrases = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars",
56
+ label="Paraphrase Mining Results")
57
+
58
+ upload_button.upload(fn=getData, inputs=upload_button, outputs=[output_data, output_paraphrases])
59
+
60
+ if __name__ == "__main__":
61
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ pandas
4
+ polars
5
+ datasets
6
+ sentence-transformers[openvino,onnx-gpu,onnx]