gemma-3-chat-api

Sleeping

Pamudu13 commited on Apr 3

Commit

47994b5

verified ·

1 Parent(s): 2e84d85

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,13 +11,18 @@ app = Flask(__name__, template_folder=os.getcwd())
 # Default settings
 class ChatConfig:
-    MODEL = "google/gemma-3-27b-it"
     DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
     DEFAULT_MAX_TOKENS = 512
     DEFAULT_TEMP = 0.3
     DEFAULT_TOP_P = 0.95
-client = InferenceClient(ChatConfig.MODEL)
 embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
 vector_dim = 384  # Embedding size
 index = faiss.IndexFlatL2(vector_dim)  # FAISS index

 # Default settings
 class ChatConfig:
+    MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
     DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
     DEFAULT_MAX_TOKENS = 512
     DEFAULT_TEMP = 0.3
     DEFAULT_TOP_P = 0.95
+# Get the token from environment variable
+HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
+client = InferenceClient(
+    ChatConfig.MODEL,
+    token=HF_TOKEN  # Add your Hugging Face token here
+)
 embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
 vector_dim = 384  # Embedding size
 index = faiss.IndexFlatL2(vector_dim)  # FAISS index