Shakshi3104 commited on
Commit
c2e0e94
Β·
unverified Β·
2 Parent(s): cb0a298 6b670b2

Merge pull request #2 from Shakshi3104/feature

Browse files
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # copy requirements
4
+ COPY requirements.txt requirements.txt
5
+
6
+ # install
7
+ RUN apt-get update && apt-get clean;
8
+ RUN apt-get install -y libgl1-mesa-dev
9
+ RUN apt-get install -y libssl-dev
10
+
11
+ RUN pip install --upgrade pip
12
+ RUN pip install -r requirements.txt
13
+
14
+ COPY .env .env
15
+ COPY model/ model/
16
+ # COPY view/ view/
17
+ COPY app.py app.py
18
+
19
+ ENV PYTHONUNBUFFERED 1
20
+
21
+ CMD gradio app.py
README.md CHANGED
@@ -2,6 +2,16 @@
2
 
3
  Cobalt is a demo app for hybrid search with vector and surface search using [Ruri](https://huggingface.co/cl-nagoya/ruri-large), [BM25](https://github.com/dorianbrown/rank_bm25) and [Voyager](https://spotify.github.io/voyager/). The name cobalt is derived from the word η‘ η’ƒ (Ruri), which refers to cobalt glass.
4
 
 
 
 
 
 
 
 
 
 
 
5
  ## Usage
6
 
7
  ```python
 
2
 
3
  Cobalt is a demo app for hybrid search with vector and surface search using [Ruri](https://huggingface.co/cl-nagoya/ruri-large), [BM25](https://github.com/dorianbrown/rank_bm25) and [Voyager](https://spotify.github.io/voyager/). The name cobalt is derived from the word η‘ η’ƒ (Ruri), which refers to cobalt glass.
4
 
5
+ ## Demo
6
+ This demo app is made by Gradio.
7
+
8
+ ```bash
9
+ docker compose up --build
10
+ ```
11
+ and, access to http://localhost:7860/
12
+
13
+ ![](./materials/cobalt-gradio-demo.png)
14
+
15
  ## Usage
16
 
17
  ```python
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ from model.search.hybrid import HybridSearchClient
5
+ from model.data.notion_db import fetch_sakurap_corpus
6
+
7
+
8
+ def search(search_client: HybridSearchClient):
9
+ def _search(query: str) -> pd.DataFrame:
10
+ results = search_client.search_top_n(query)
11
+ result = results[0]
12
+ result["rank"] = result["rank"] + 1
13
+ result = result[["rank", "title", "content", "rank_sparse", "rank_dense"]]
14
+ result.columns = ["rank", "title", "rap lyric", "rank: surface", "rank: vector"]
15
+ return result
16
+
17
+ return _search
18
+
19
+
20
+ if __name__ == "__main__":
21
+ # Load dataset
22
+ sakurap_df = fetch_sakurap_corpus("./data/sakurap_corpus.csv")
23
+ # Initialize search client
24
+ search_client = HybridSearchClient.from_dataframe(sakurap_df, "content")
25
+
26
+ with gr.Blocks() as search_interface:
27
+ gr.Markdown("""
28
+ # πŸ’Ž Cobalt
29
+ Demo app for hybrid search with vector and surface search using [Ruri](https://huggingface.co/cl-nagoya/ruri-large), [BM25](https://github.com/dorianbrown/rank_bm25) and [Voyager](https://spotify.github.io/voyager/).
30
+ """)
31
+ # Input query
32
+ search_query = gr.Textbox(label="Query", submit_btn=True)
33
+
34
+ gr.Markdown("""
35
+ ## Search Results
36
+
37
+ """)
38
+ # Search result
39
+ result_table = gr.DataFrame(label="Result",
40
+ column_widths=["5%", "20%", "65%", "5%", "5%"],
41
+ wrap=True,
42
+ datatype=["str", "str", "markdown", "str", "str"],
43
+ interactive=False)
44
+
45
+ # Event handler
46
+ search_query.submit(fn=search(search_client), inputs=search_query, outputs=result_table)
47
+
48
+ # App launch
49
+ search_interface.queue()
50
+ search_interface.launch(server_name="0.0.0.0")
compose.yml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ cobalt:
3
+ container_name: cobalt-hybrid-search
4
+ build:
5
+ context: .
6
+ dockerfile: Dockerfile
7
+ ports:
8
+ - "7860:7860"
9
+ volumes:
10
+ - type: bind
11
+ source: ./model
12
+ target: /model
cli_example.py β†’ example.py RENAMED
File without changes
materials/cobalt-gradio-demo.png ADDED