Spaces:

Yoxas
/

testchatbot

Runtime error

App Files Files Community

Yoxas commited on Jun 1, 2024

Commit

921072e

verified ·

1 Parent(s): 079a471

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -153

app.py CHANGED Viewed

@@ -1,156 +1,32 @@
 import gradio as gr
-from datasets import load_dataset
-import os
-import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
-import torch
-from threading import Thread
-from sentence_transformers import SentenceTransformer
-import numpy as np
-token = os.environ["HF_TOKEN"]
-ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
-dataset = load_dataset("Yoxas/statistical_literacyv2")
-data = dataset["train"]
-# Convert string embeddings to numpy arrays and ensure they are 2D
-def convert_and_ensure_2d_embeddings(example):
-    embedding_str = example['embedding']
-    embedding_str = embedding_str.replace('\n', ' ').replace('...', '')
-    embedding_list = list(map(float, embedding_str.strip("[]").split()))
-    embeddings = np.array(embedding_list, dtype=np.float32)
-    # Ensure the embeddings are 2-dimensional
-    if embeddings.ndim == 1:
-        embeddings = embeddings.reshape(1, -1)
-    return {'embedding': embeddings}
-# Apply the function to ensure embeddings are 2-dimensional and of type float32
-data = data.map(convert_and_ensure_2d_embeddings)
-# Flatten embeddings if they are nested 2D arrays
-def flatten_embeddings(example):
-    embedding = np.array(example['embedding'], dtype=np.float32)
-    if embedding.ndim == 2 and embedding.shape[0] == 1:
-        embedding = embedding.flatten()
-    return {'embedding': embedding}
-data = data.map(flatten_embeddings)
-# Extract embeddings and convert to numpy array
-embeddings = np.vstack([example['embedding'] for example in data])
-# Add FAISS index
-data = data.add_faiss_index_from_external_arrays("embedding", embeddings)
-model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-# Use quantization to lower GPU usage
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
-)
-tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-    quantization_config=bnb_config,
-    token=token
 )
-terminators = [
-    tokenizer.eos_token_id,
-    tokenizer.convert_tokens_to_ids("")
-]
-SYS_PROMPT = """You are an assistant for answering questions.
-You are given the extracted parts of a long document and a question. Provide a conversational answer.
-If you don't know the answer, just say "I do not know." Don't make up an answer."""
-def search(query: str, k: int = 3):
-    """A function that embeds a new query and returns the most probable results."""
-    embedded_query = ST.encode(query)  # Embed new query
-    scores, retrieved_examples = data.get_nearest_examples(  # Retrieve results
-        "embedding", embedded_query,  # Compare our new embedded query with the dataset embeddings
-        k=k  # Get only top k results
-    )
-    return scores, retrieved_examples
-def format_prompt(prompt, retrieved_documents, k):
-    """Using the retrieved documents we will prompt the model to generate our responses."""
-    PROMPT = f"Question:{prompt}\nContext:"
-    for idx in range(k):
-        PROMPT += f"{retrieved_documents['text'][idx]}\n"
-    return PROMPT
-@spaces.GPU(duration=150)
-def talk(prompt, history):
-    k = 1  # Number of retrieved documents
-    scores, retrieved_documents = search(prompt, k)
-    formatted_prompt = format_prompt(prompt, retrieved_documents, k)
-    formatted_prompt = formatted_prompt[:2000]  # To avoid GPU OOM
-    messages = [{"role": "system", "content": SYS_PROMPT}, {"role": "user", "content": formatted_prompt}]
-    # Tell the model to generate
-    input_ids = tokenizer.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    outputs = model.generate(
-        input_ids,
-        max_new_tokens=1024,
-        eos_token_id=terminators,
-        do_sample=True,
-        temperature=0.6,
-        top_p=0.9,
-    )
-    streamer = TextIteratorStreamer(
-        tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
-    )
-    generate_kwargs = dict(
-        input_ids=input_ids,
-        streamer=streamer,
-        max_new_tokens=1024,
-        do_sample=True,
-        top_p=0.95,
-        temperature=0.75,
-        eos_token_id=terminators,
-    )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    outputs = []
-    for text in streamer:
-        outputs.append(text)
-        print(outputs)
-        yield "".join(outputs)
-TITLE = "# RAG"
-DESCRIPTION = """
-A RAG pipeline with a chatbot feature
-Resources used to build this project:
-* Embedding model: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
-* Dataset: https://huggingface.co/datasets/not-lain/wikipedia
-* FAISS docs: https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets.Dataset.add_faiss_index
-* Chatbot: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
-"""
-demo = gr.ChatInterface(
-    fn=talk,
-    chatbot=gr.Chatbot(
-        show_label=True,
-        show_share_button=True,
-        show_copy_button=True,
-        likeable=True,
-        layout="bubble",
-        bubble_full_width=False,
-    ),
-    theme="Soft",
-    examples=[["what's anarchy?"]],
-    title=TITLE,
-    description=DESCRIPTION,
-)
-demo.launch(debug=True)

 import gradio as gr
+import pandas as pd
+from transformers import pipeline
+# Load CSV data
+data = pd.read_csv('documents.csv')
+# Load a transformer model (you can choose a suitable model from Hugging Face)
+# For this example, we'll use a simple QA model
+qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
+# Function to retrieve the relevant document and generate a response
+def retrieve_and_generate(question):
+    # Combine all abstracts into a single string (you can improve this by better retrieval methods)
+    abstracts = " ".join(data['Abstract'].fillna("").tolist())
+    # Retrieve the most relevant section from the combined abstracts
+    response = qa_model(question=question, context=abstracts)
+    return response['answer']
+# Create a Gradio interface
+interface = gr.Interface(
+    fn=retrieve_and_generate,
+    inputs=gr.inputs.Textbox(lines=2, placeholder="Ask a question about the documents..."),
+    outputs="text",
+    title="RAG Chatbot",
+    description="Ask questions about the documents in the CSV file."
 )
+# Launch the Gradio app
+interface.launch()