ccm commited on
Commit
9fb5cd1
·
verified ·
1 Parent(s): 6f5ab24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -24
app.py CHANGED
@@ -4,6 +4,7 @@ import transformers # LLM Loading
4
  import langchain_community.vectorstores # Vectorstore for publications
5
  import langchain_huggingface # Embeddings
6
 
 
7
  # Greeting message
8
  GREETING = (
9
  "Howdy! I'm an AI agent that uses "
@@ -19,7 +20,9 @@ LLM_MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
19
  PUBLICATIONS_TO_RETRIEVE = 10
20
 
21
 
22
- def embedding(device: str = "cuda", normalize_embeddings: bool = False) -> langchain_huggingface.HuggingFaceEmbeddings:
 
 
23
  """Loads embedding model with specified device and normalization."""
24
  return langchain_huggingface.HuggingFaceEmbeddings(
25
  model_name=EMBEDDING_MODEL_NAME,
@@ -43,8 +46,12 @@ def load_publication_vectorstore() -> langchain_community.vectorstores.FAISS:
43
 
44
  # Load vectorstore and models
45
  publication_vectorstore = load_publication_vectorstore()
46
- tokenizer = transformers.AutoTokenizer.from_pretrained(LLM_MODEL_NAME, trust_remote_code=True)
47
- streamer = transformers.TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
 
 
48
  chatmodel = transformers.AutoModelForCausalLM.from_pretrained(
49
  LLM_MODEL_NAME, device_map="auto", torch_dtype="auto", trust_remote_code=True
50
  )
@@ -81,28 +88,11 @@ def reply(message: str, history: list[str]) -> str:
81
  Generates a response to the user’s message.
82
  """
83
  # Preprocess message
84
- message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
85
- history_formatted = [
86
- {"role": role, "content": message_pair[idx]}
87
- for message_pair in history
88
- for idx, role in enumerate(["user", "assistant"])
89
- if message_pair[idx] is not None
90
- ] + [{"role": "user", "content": message}]
91
-
92
- # Tokenize and prepare model input
93
- text = tokenizer.apply_chat_template(
94
- history_formatted, tokenize=False, add_generation_prompt=True
95
- )
96
- model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
97
 
98
- # Generate response directly
99
- output_tokens = chatmodel.generate(
100
- **model_inputs, max_new_tokens=512
101
- )
102
-
103
- # Decode the output tokens
104
- response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
105
- return response
106
 
107
 
108
  # Example Queries for Interface
 
4
  import langchain_community.vectorstores # Vectorstore for publications
5
  import langchain_huggingface # Embeddings
6
 
7
+
8
  # Greeting message
9
  GREETING = (
10
  "Howdy! I'm an AI agent that uses "
 
20
  PUBLICATIONS_TO_RETRIEVE = 10
21
 
22
 
23
+ def embedding(
24
+ device: str = "cuda", normalize_embeddings: bool = False
25
+ ) -> langchain_huggingface.HuggingFaceEmbeddings:
26
  """Loads embedding model with specified device and normalization."""
27
  return langchain_huggingface.HuggingFaceEmbeddings(
28
  model_name=EMBEDDING_MODEL_NAME,
 
46
 
47
  # Load vectorstore and models
48
  publication_vectorstore = load_publication_vectorstore()
49
+ tokenizer = transformers.AutoTokenizer.from_pretrained(
50
+ LLM_MODEL_NAME, trust_remote_code=True
51
+ )
52
+ streamer = transformers.TextIteratorStreamer(
53
+ tokenizer, skip_prompt=True, skip_special_tokens=True
54
+ )
55
  chatmodel = transformers.AutoModelForCausalLM.from_pretrained(
56
  LLM_MODEL_NAME, device_map="auto", torch_dtype="auto", trust_remote_code=True
57
  )
 
88
  Generates a response to the user’s message.
89
  """
90
  # Preprocess message
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ pipe = transformers.pipeline("text-generation", model="Qwen/Qwen2.5-7B-Instruct")
93
+
94
+ message = preprocess(message, PUBLICATIONS_TO_RETRIEVE)
95
+ return pipe(message, max_length=512)[0]["generated_text"]
 
 
 
 
96
 
97
 
98
  # Example Queries for Interface