Spaces:

abhi1nandy2
/

my-chatbot

Runtime error

App Files Files Community

abhi1nandy2 commited on Feb 7

Commit

b9fcfca

verified ·

1 Parent(s): 113e4ab

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -42

app.py CHANGED Viewed

@@ -32,46 +32,29 @@ SYSTEM_MESSAGE = (
     "Context: " + " ".join(text_list)
 )
-# Use a model that is both lightweight and includes a proper chat configuration.
-client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")
-def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
-            max_tokens=100, temperature=0.7, top_p=0.95):
-    messages = [{"role": "system", "content": system_message}]
-    for q, a in history:
-        messages.append({"role": "user", "content": "Question: " + q})
-        messages.append({"role": "assistant", "content": "Answer: " + a})
-    messages.append({"role": "user", "content": message})
-    try:
-        # Enable streaming mode to receive output faster.
-        response_stream = client.chat_completion(
-            messages,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            stream=True,
-        )
-        output = ""
-        for chunk in response_stream:
-            if hasattr(chunk, "choices") and chunk.choices:
-                part = chunk.choices[0].message.get("content", "")
-                output += part
-        return output.strip()
-    except Exception as e:
-        print(f"An error occurred: {e}")
-        return str(e)
-initial_message = [("user", "Yo who dis Abhilash?")]
-markdown_note = "## Ask Anything About Me! (Might show a tad bit of hallucination!)"
-demo = gr.Blocks()
-with demo:
-    gr.Markdown(markdown_note)
-    gr.ChatInterface(
-        fn=respond,
-        # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
-        additional_inputs=[],
-    )
-if __name__ == "__main__":
-    demo.launch()

     "Context: " + " ".join(text_list)
 )
+# Create a Hugging Face Inference client using a CPU-friendly model.
+# Here we use 'google/flan-t5-base' as an example; you can adjust the model if needed.
+client = InferenceClient(model="google/flan-t5-base")
+def answer_query(query):
+    # Compose a prompt using the system message, user query, and a reminder for a short answer.
+    prompt = SYSTEM_MESSAGE + "\nUser: " + query + "\nAnswer in less than 30 words:"
+    # Generate answer with a limit on new tokens to ensure brevity.
+    result = client.text_generation(prompt, max_new_tokens=60)
+    # Handle both list or direct string responses from the inference client.
+    if isinstance(result, list):
+        answer = result[0].get("generated_text", "")
+    else:
+        answer = result
+    return answer.strip()
+iface = gr.Interface(
+    fn=answer_query,
+    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
+    outputs="text",
+    title="Homepage QA Chatbot",
+    description="A chatbot answering queries about the homepage using pre-fetched context."
+)
+if __name__ == '__main__':
+    iface.launch()