File size: 4,544 Bytes
fbc7e49
d6ac230
a5c05cc
 
e800a62
 
36d4625
a5c05cc
7ffc9ec
 
 
 
 
a5c05cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52a9cd3
 
a5c05cc
 
 
 
 
 
81d4c87
36d4625
a5c05cc
e800a62
a5c05cc
36d4625
a5c05cc
52a9cd3
a5c05cc
36d4625
 
 
 
 
 
 
 
 
a5c05cc
 
36d4625
c85c204
36d4625
 
a5c05cc
fbc7e49
a5c05cc
 
 
 
 
 
81d4c87
a5c05cc
 
 
 
 
e800a62
a5c05cc
e800a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5c05cc
 
c927e38
a5c05cc
36d4625
 
 
 
 
 
 
 
 
 
 
 
 
c927e38
a5c05cc
 
afc3612
a5c05cc
e800a62
36d4625
a5c05cc
 
7ffc9ec
afc3612
e800a62
02b7760
c927e38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
import os
from langchain_community.document_loaders import JSONLoader
from langchain_community.vectorstores import Qdrant
from qdrant_client.http import models as rest
from qdrant_client import QdrantClient, models
from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings
from sentence_transformers.cross_encoder import CrossEncoder
from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API"),
)

# loading data
json_path = "format_food.json"

def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["title"] = record.get("title")
    metadata["cuisine"] = record.get("cuisine")
    metadata["time"] = record.get("time")
    metadata["instructions"] = record.get("instructions")
    return metadata

def reranking_results(query, top_k_results, rerank_model):
    # Load the model, here we use our base sized model
    top_results_formatted = [f"{item.metadata['title']}, {item.page_content}" for item in top_k_results]
    reranked_results = rerank_model.rank(query, top_results_formatted, return_documents=True)
    return reranked_results


loader = JSONLoader(
    file_path=json_path,
    jq_schema='.dishes[].dish',
    text_content=False,
    content_key='doc',
    metadata_func=metadata_func
)

data = loader.load()
country_list  = list(set([item.metadata['cuisine'] for item in data]))
# Models
# model_name = "Snowflake/snowflake-arctic-embed-xs"
# rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")

# Embedding
# model_kwargs = {"device": "cpu"}
# encode_kwargs = {"normalize_embeddings": True}
# hf_embedding = HuggingFaceEmbeddings(
#     model_name=model_name, 
#     encode_kwargs=encode_kwargs, 
#     model_kwargs=model_kwargs,
#     show_progress=True
# )
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embedding = HuggingFaceBgeEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs, 
    encode_kwargs=encode_kwargs
)

qdrant = Qdrant.from_documents(
    data,
    hf_embedding,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="my_documents",
)

def format_to_markdown(response_list):
    response_list[0] = "- " + response_list[0]
    temp_string = "\n- ".join(response_list)
    return temp_string

def run_query(query: str, groq: bool, countries: str = "None"):
    print("Running Query")
    if countries != "None":
        countries_select = models.Filter(
            must=[
                models.FieldCondition(
                    key="metadata.cuisine",  # Adjust key based on your data structure
                    match=models.MatchValue(value=countries),
                )
            ]
        )
    else:
        countries_select = None

    answer = qdrant.similarity_search(
        query=query, 
        k=10, 
        filter=countries_select
    )
    title_and_description = f"# Best Choice:\nA {answer[0].metadata['title']}: {answer[0].page_content}"
    instructions = format_to_markdown(answer[0].metadata['instructions'])
    recipe = f"# Standard Method\n## Cooking time:\n{answer[0].metadata['time']}\n\n## Recipe:\n{instructions}"
    print("Returning query")
    if groq:
        chat_completion = client.chat.completions.create(
            messages=[
            {
                    "role": "user",
                    "content": f"please write a more detailed recipe for the following recipe:\n{recipe}\n\n please return it in the same format.",
                }
            ],
            model="Llama3-70b-8192",
        )
        groq_update = "# Groq Update\n"+chat_completion.choices[0].message.content
    else:
        groq_update = "# Groq Update \nPlease select the tick box if you need more information."
    return title_and_description, recipe, groq_update

with gr.Blocks() as demo:
    gr.Markdown("Start typing below and then click **Run** to see the output.")
    inp = gr.Textbox(placeholder="What sort of meal are you after?")
    dropdown = gr.Dropdown(['None'] + country_list, label='Filter on countries', value='None')
    groq_button = gr.Checkbox(value=False, label="Use Llama for a better recipe?")
    title_output = gr.Markdown(label="Title and description")
    instructions_output = gr.Markdown(label="Recipe")
    updated_recipe = gr.Markdown(label="Updated Recipe")
    btn = gr.Button("Run")
    btn.click(fn=run_query, inputs=[inp, groq_button, dropdown], outputs=[title_output, instructions_output, updated_recipe])

demo.launch()