Spaces:

ccm
/

chat-with-SFF

Sleeping

App Files Files Community

ccm commited on Oct 31, 2024

Commit

9fb5cd1

verified ·

1 Parent(s): 6f5ab24

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -24

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import transformers  # LLM Loading
 import langchain_community.vectorstores  # Vectorstore for publications
 import langchain_huggingface  # Embeddings
 # Greeting message
 GREETING = (
     "Howdy! I'm an AI agent that uses "
@@ -19,7 +20,9 @@ LLM_MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
 PUBLICATIONS_TO_RETRIEVE = 10
-def embedding(device: str = "cuda", normalize_embeddings: bool = False) -> langchain_huggingface.HuggingFaceEmbeddings:
     """Loads embedding model with specified device and normalization."""
     return langchain_huggingface.HuggingFaceEmbeddings(
         model_name=EMBEDDING_MODEL_NAME,
@@ -43,8 +46,12 @@ def load_publication_vectorstore() -> langchain_community.vectorstores.FAISS:
 # Load vectorstore and models
 publication_vectorstore = load_publication_vectorstore()
-tokenizer = transformers.AutoTokenizer.from_pretrained(LLM_MODEL_NAME, trust_remote_code=True)
-streamer = transformers.TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 chatmodel = transformers.AutoModelForCausalLM.from_pretrained(
     LLM_MODEL_NAME, device_map="auto", torch_dtype="auto", trust_remote_code=True
 )
@@ -81,28 +88,11 @@ def reply(message: str, history: list[str]) -> str:
     Generates a response to the user’s message.
     """
     # Preprocess message
-    message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
-    history_formatted = [
-        {"role": role, "content": message_pair[idx]}
-        for message_pair in history
-        for idx, role in enumerate(["user", "assistant"])
-        if message_pair[idx] is not None
-    ] + [{"role": "user", "content": message}]
-    # Tokenize and prepare model input
-    text = tokenizer.apply_chat_template(
-        history_formatted, tokenize=False, add_generation_prompt=True
-    )
-    model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
-    # Generate response directly
-    output_tokens = chatmodel.generate(
-        **model_inputs, max_new_tokens=512
-    )
-    # Decode the output tokens
-    response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
-    return response
 # Example Queries for Interface

 import langchain_community.vectorstores  # Vectorstore for publications
 import langchain_huggingface  # Embeddings
 # Greeting message
 GREETING = (
     "Howdy! I'm an AI agent that uses "
 PUBLICATIONS_TO_RETRIEVE = 10
+def embedding(
+    device: str = "cuda", normalize_embeddings: bool = False
+) -> langchain_huggingface.HuggingFaceEmbeddings:
     """Loads embedding model with specified device and normalization."""
     return langchain_huggingface.HuggingFaceEmbeddings(
         model_name=EMBEDDING_MODEL_NAME,
 # Load vectorstore and models
 publication_vectorstore = load_publication_vectorstore()
+tokenizer = transformers.AutoTokenizer.from_pretrained(
+    LLM_MODEL_NAME, trust_remote_code=True
+)
+streamer = transformers.TextIteratorStreamer(
+    tokenizer, skip_prompt=True, skip_special_tokens=True
+)
 chatmodel = transformers.AutoModelForCausalLM.from_pretrained(
     LLM_MODEL_NAME, device_map="auto", torch_dtype="auto", trust_remote_code=True
 )
     Generates a response to the user’s message.
     """
     # Preprocess message
+    pipe = transformers.pipeline("text-generation", model="Qwen/Qwen2.5-7B-Instruct")
+    message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
+    return pipe(message, max_length=512)[0]["generated_text"]
 # Example Queries for Interface