Spaces:

Amirizaniani
/

AuditLLM

Runtime error

App Files Files Community

Amirizaniani commited on Feb 28, 2024

Commit

313e518

verified ·

1 Parent(s): 3011d90

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -177

app.py CHANGED Viewed

@@ -1,126 +1,43 @@
 import gradio as gr
 from dotenv import load_dotenv
-from langchain.chains import LLMChain
-from langchain_community.llms import CTransformers
-from langchain_core.prompts import PromptTemplate
-from sentence_transformers import SentenceTransformer
-from sklearn.cluster import KMeans
-from nltk.tokenize import word_tokenize
-import numpy as np
-import scipy.spatial
-from scipy.spatial.distance import cosine
 load_dotenv()
 def generate_prompts(user_input):
     prompt_template = PromptTemplate(
         input_variables=["Question"],
-        template= f"Your task is to formulate 5 unique queries for each given question. These queries must adhere to the criteria of relevance and diversity.write the questions in seperate lines.{user_input} "
     )
-    config = {'max_new_tokens': 2048, 'temperature': 0.7, 'context_length': 4096}
     llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
-                        config=config,
-                        threads=os.cpu_count())
     hub_chain = LLMChain(prompt = prompt_template, llm = llm)
     input_data = {"Question": user_input}
-    # Here you would integrate your prompt template with your model
-    # For demonstration, this is just a placeholder
-    generated_prompts = hub_chain.run(input_data)  # Modify this part based on how you run the model
     questions_list = generated_prompts.split('\n')
     formatted_questions = "\n".join(f"Question: {question}" for i, question in enumerate(questions_list) if question.strip())
     questions_list = formatted_questions.split("Question:")[1:]
     return questions_list
-def answer_question(prompt, model_name):
     prompt_template = PromptTemplate(
         input_variables=["Question"],
-        template=f"Give a short answer to this question '{prompt}' and do not consider the number behind it."
     )
-    config = {'max_new_tokens': 512, 'temperature': 0.7, 'context_length': 512}
-    llm = CTransformers(model=model_name, #"TheBloke/Llama-2-7B-Chat-GGML",
-                        config=config,
-                        threads=os.cpu_count())
     hub_chain = LLMChain(prompt = prompt_template, llm = llm)
     input_data = {"Question": prompt}
     generated_answer = hub_chain.run(input_data)
     return generated_answer
-def calculate_similarity(word, other_words, model, threshold=0.5):
-    embeddings_word = model.encode([word])
-    embeddings_other_words = model.encode(other_words)
-    for i, embedding in enumerate(embeddings_other_words):
-        similarity = 1 - scipy.spatial.distance.cosine(embeddings_word[0], embedding)
-        if similarity > threshold and similarity < 0.85:
-            return i, similarity
-    return None, None
-def highlight_words_within_cluster(sentences, model, exclude_words):
-    # Create a dictionary to map words to color codes
-    word_to_color = {}
-    color_codes = [
-    "\033[41m",  # Background Red
-    "\033[42m",  # Background Green
-    "\033[43m",  # Background Yellow
-    "\033[44m",  # Background Blue
-    "\033[45m",  # Background Purple
-    "\033[46m",  # Background Cyan
-    "\033[100m", # Background Dark Gray
-    "\033[101m", # Background Light Red
-    "\033[102m", # Background Light Green
-    "\033[103m", # Background Light Yellow
-    "\033[104m", # Background Light Blue
-    "\033[105m", # Background Light Purple
-    "\033[106m", # Background Light Cyan
-    "\033[47m"   # Background Gray
-    ]
-    html_color_codes = ["red", "green", "blue", "purple", "cyan", "fuchsia", "lime", "maroon", "olive", "navy", "teal", "gray"]
-    color_index = 0
-    highlighted_sentences = []
-    for sentence in sentences:
-        words = word_tokenize(sentence)
-        other_sentences = [s for s in sentences if s != sentence]
-        all_other_words = [word for s in other_sentences for word in word_tokenize(s) if word.lower() not in exclude_words and word.isalnum()]
-        highlighted_words = []
-        for word in words:
-            if word.lower() not in exclude_words and word.isalnum():
-                match_index, similarity = calculate_similarity(word, all_other_words, model)
-                if match_index is not None:
-                    # Assign color to the word if not already assigned
-                    if word not in word_to_color:
-                        word_to_color[word] = html_color_codes[color_index % len(html_color_codes)]
-                        color_index += 1
-                    # Highlight the word
-                    #highlighted_word = f"{word_to_color[word]}{word}\033[0m"
-                    highlighted_word = "<span style='color: "+ word_to_color[word] +"'>"+ word +"</span>"
-                else:
-                    highlighted_word = word
-                highlighted_words.append(highlighted_word)
-            else:
-                highlighted_words.append(word)
-        highlighted_sentences.append(' '.join(highlighted_words))
-    return highlighted_sentences
-# Rest of the code, including the cluster_sentences function, remains the same
-def cluster_sentences(sentences, model, num_clusters=3):
-    embeddings = model.encode(sentences)
-    kmeans = KMeans(n_clusters=num_clusters)
-    kmeans.fit(embeddings)
-    return kmeans.labels_
-model = SentenceTransformer('all-mpnet-base-v2')
-exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"}
 text_list = []
@@ -128,44 +45,12 @@ def updateChoices(prompt):
     newChoices = generate_prompts(prompt)
     return gr.CheckboxGroup(choices=newChoices)
-def setTextVisibility(cbg, model_name_input):
-    sentences = []
-    result = []
-    model = SentenceTransformer('all-mpnet-base-v2')
-    exclude_words = {"a", "the", "for", "from", "of", "in", "over", "as", "on", "is", "am", "have", "an", "has", "had", "and", "by", "it", "its", "those", "these", "above", "to"}
-    sentences_org = ["In a quaint little town nestled in the heart of the mountains, a small bakery famous for its artisanal breads and pastries had a line of customers stretching out the door, eagerly waiting to savor the freshly baked goods that were known far and wide for their delightful flavors.",
-                "Within a picturesque mountain village, there stood a renowned bakery, celebrated for its handcrafted bread and sweet treats, attracting a long queue of patrons each morning, all keen to enjoy the baked delicacies that had gained widespread acclaim for their exceptional taste.",
-                "A charming bakery, located in a small mountainous hamlet, renowned for producing exquisite handmade pastries and bread, was bustling with a crowd of eager customers lined up outside, each anticipating the chance to indulge in the famous baked items celebrated for their extraordinary deliciousness.",
-                "In a cozy, mountain-encircled village, a beloved bakery was the center of attraction, known for its traditional baking methods and delightful pastries, drawing a consistent stream of people waiting outside, all desiring to experience the renowned flavors that made the bakery's products distinctively mouth-watering."]
-    for text in cbg:
-         sentences.append(answer_question(text, model_name_input))
-    # Step 1: Cluster the sentences
-    num_clusters = 1
-    sentence_clusters = cluster_sentences(sentences, model, num_clusters)
-    # Step 2: Highlight similar words within each cluster
-    clustered_sentences = [[] for _ in range(num_clusters)]
-    for sentence, cluster_id in zip(sentences, sentence_clusters):
-        clustered_sentences[cluster_id].append(sentence)
-    highlighted_clustered_sentences = []
-    for cluster in clustered_sentences:
-        highlighted_clustered_sentences.extend(highlight_words_within_cluster(cluster, model, exclude_words))
-    for idx, sentence in enumerate(highlighted_clustered_sentences):
-        result.append("<p><strong>"+ cbg[idx] +"</strong></p><p>"+ sentence +"</p><br/>")
-    return result
-    # update_show = [gr.Textbox(visible=True, label=text, value=answer_question(text, model_name_input)) for text in cbg]
-    # update_hide = [gr.Textbox(visible=False, label="") for _ in range(10-len(cbg))]
-    # return update_show + update_hide
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.HTML("""
     <div style="text-align: center; max-width: 1240px; margin: 0 auto;">
@@ -174,55 +59,28 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     </h1>
     <hr style="margin-bottom:5px; margin-top:5px;">
     </div>
     """)
-    with gr.Tab("Live Mode"):
-        with gr.Row():
-            model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model")
-        with gr.Row():
-            prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question")
-        with gr.Row():
-            generate_button = gr.Button("Generate", variant="primary", min_width=300)
-        with gr.Column():
-            cbg = gr.CheckboxGroup(choices=[], label="List of the prompts", interactive=True)
-        generate_button.click(updateChoices, inputs=[prompt_input], outputs=[cbg])
-        with gr.Row() as exec:
-            btnExec = gr.Button("Execute", variant="primary", min_width=200)
-        with gr.Column() as texts:
-            for i in range(10):
-                text = gr.Textbox(label="_", visible=False)
-                text_list.append(text)
-        with gr.Column():
-            html_result = gr.HTML("""<div style="color: red"></div>""")
-        #btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=text_list)
-        btnExec.click(setTextVisibility, inputs=[cbg, model_name_input], outputs=html_result)
-        gr.HTML("""
-        <div style="text-align: center; font-size: 24px; font-weight: bold;">Similarity Score: 76%</div>
-                """)
-        clear = gr.ClearButton(link = "http://127.0.0.1:7865")
-    with gr.Tab("Batch Mode"):
-        with gr.Row():
-            model_name_input = gr.Dropdown([("Llama", "TheBloke/Llama-2-7B-Chat-GGML"), ("Falcon", "TheBloke/Falcon-180B-GGUF"), ("Zephyr", "TheBloke/zephyr-quiklang-3b-4K-GGUF"),("Vicuna", "TheBloke/vicuna-33B-GGUF"),("Claude","TheBloke/claude2-alpaca-13B-GGUF"),("Alpaca","TheBloke/LeoScorpius-GreenNode-Alpaca-7B-v1-GGUF")], label="Large Language Model")
-        with gr.Row():
-            prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question")
-        with gr.Row():
-            prompt_input = gr.Textbox(label="RELAVENCY", placeholder="Relavancy")
-            prompt_input = gr.Textbox(label="Diversity", placeholder="Diversity")
-        with gr.Row():
-            prompt_input = gr.Textbox(label="Enter your email address", placeholder="Enter Your Email Address")
-        with gr.Row():
-            generate_button = gr.Button("Submit", variant="primary")
 # Launch the Gradio app
 demo.launch(share=True)

 import gradio as gr
 from dotenv import load_dotenv
+from langchain import PromptTemplate, LLMChain, HuggingFaceHub
+from langchain.llms import CTransformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers import pipeline
+from langchain.llms.huggingface_pipeline import HuggingFacePipeline
 load_dotenv()
 def generate_prompts(user_input):
     prompt_template = PromptTemplate(
         input_variables=["Question"],
+        template=f"Just list 10 question prompts for {user_input} and don't put number before each of the prompts."
     )
+    config = {'max_new_tokens': 64, 'temperature': 0.7, 'context_length': 64}
     llm = CTransformers(model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+                        config=config)
     hub_chain = LLMChain(prompt = prompt_template, llm = llm)
     input_data = {"Question": user_input}
+    generated_prompts = hub_chain.run(input_data)
     questions_list = generated_prompts.split('\n')
     formatted_questions = "\n".join(f"Question: {question}" for i, question in enumerate(questions_list) if question.strip())
     questions_list = formatted_questions.split("Question:")[1:]
     return questions_list
+def answer_question(prompt):
     prompt_template = PromptTemplate(
         input_variables=["Question"],
+        template=f"give one answer for {prompt} and do not consider the number behind it."
     )
+    config = {'max_new_tokens': 64, 'temperature': 0.7, 'context_length': 64}
+    llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML",
+                        config=config)
     hub_chain = LLMChain(prompt = prompt_template, llm = llm)
     input_data = {"Question": prompt}
     generated_answer = hub_chain.run(input_data)
     return generated_answer
 text_list = []
     newChoices = generate_prompts(prompt)
     return gr.CheckboxGroup(choices=newChoices)
+def setTextVisibility(cbg):
+    update_show = [gr.Textbox(visible=True, label=text, value=answer_question(text)) for text in cbg]
+    update_hide = [gr.Textbox(visible=False, label="") for _ in range(10-len(cbg))]
+    return update_show + update_hide
+with gr.Blocks() as demo:
     gr.HTML("""
     <div style="text-align: center; max-width: 1240px; margin: 0 auto;">
     </h1>
     <hr style="margin-bottom:5px; margin-top:5px;">
     </div>
     """)
+    with gr.Row():
+        prompt_input = gr.Textbox(label="Enter your question", placeholder="Enter Your Question")
+    with gr.Row():
+        generate_button = gr.Button("Generate")
+    with gr.Column():
+        cbg = gr.CheckboxGroup(choices=[], label="List of the prompts", interactive=True)
+    generate_button.click(updateChoices, inputs=[prompt_input], outputs=[cbg])
+    with gr.Row(variant="compact") as exec:
+        btnExec = gr.Button("Execute")
+    with gr.Column() as texts:
+        for i in range(10):
+            text = gr.Textbox(label="_", visible=False)
+            text_list.append(text)
+    btnExec.click(setTextVisibility, inputs=cbg, outputs=text_list)
+    Clear = gr.ClearButton([prompt_input, cbg, text], scale=1)
 # Launch the Gradio app
 demo.launch(share=True)