abhi1nandy2 commited on
Commit
b9fcfca
·
verified ·
1 Parent(s): 113e4ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -42
app.py CHANGED
@@ -32,46 +32,29 @@ SYSTEM_MESSAGE = (
32
  "Context: " + " ".join(text_list)
33
  )
34
 
35
- # Use a model that is both lightweight and includes a proper chat configuration.
36
- client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")
37
-
38
- def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
39
- max_tokens=100, temperature=0.7, top_p=0.95):
40
- messages = [{"role": "system", "content": system_message}]
41
- for q, a in history:
42
- messages.append({"role": "user", "content": "Question: " + q})
43
- messages.append({"role": "assistant", "content": "Answer: " + a})
44
- messages.append({"role": "user", "content": message})
45
- try:
46
- # Enable streaming mode to receive output faster.
47
- response_stream = client.chat_completion(
48
- messages,
49
- max_tokens=max_tokens,
50
- temperature=temperature,
51
- top_p=top_p,
52
- stream=True,
53
- )
54
- output = ""
55
- for chunk in response_stream:
56
- if hasattr(chunk, "choices") and chunk.choices:
57
- part = chunk.choices[0].message.get("content", "")
58
- output += part
59
- return output.strip()
60
- except Exception as e:
61
- print(f"An error occurred: {e}")
62
- return str(e)
63
-
64
- initial_message = [("user", "Yo who dis Abhilash?")]
65
- markdown_note = "## Ask Anything About Me! (Might show a tad bit of hallucination!)"
66
-
67
- demo = gr.Blocks()
68
- with demo:
69
- gr.Markdown(markdown_note)
70
- gr.ChatInterface(
71
- fn=respond,
72
- # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
73
- additional_inputs=[],
74
- )
75
 
76
- if __name__ == "__main__":
77
- demo.launch()
 
32
  "Context: " + " ".join(text_list)
33
  )
34
 
35
+ # Create a Hugging Face Inference client using a CPU-friendly model.
36
+ # Here we use 'google/flan-t5-base' as an example; you can adjust the model if needed.
37
+ client = InferenceClient(model="google/flan-t5-base")
38
+
39
+ def answer_query(query):
40
+ # Compose a prompt using the system message, user query, and a reminder for a short answer.
41
+ prompt = SYSTEM_MESSAGE + "\nUser: " + query + "\nAnswer in less than 30 words:"
42
+ # Generate answer with a limit on new tokens to ensure brevity.
43
+ result = client.text_generation(prompt, max_new_tokens=60)
44
+ # Handle both list or direct string responses from the inference client.
45
+ if isinstance(result, list):
46
+ answer = result[0].get("generated_text", "")
47
+ else:
48
+ answer = result
49
+ return answer.strip()
50
+
51
+ iface = gr.Interface(
52
+ fn=answer_query,
53
+ inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
54
+ outputs="text",
55
+ title="Homepage QA Chatbot",
56
+ description="A chatbot answering queries about the homepage using pre-fetched context."
57
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ if __name__ == '__main__':
60
+ iface.launch()