snsynth commited on
Commit
8cec7ed
·
1 Parent(s): 2d994f7

add probability calculation

Browse files
Files changed (1) hide show
  1. rag_app/rag_2.py +10 -18
rag_app/rag_2.py CHANGED
@@ -36,14 +36,14 @@ def completion_to_prompt(completion):
36
  llm = LlamaCPP(
37
  model_path="models/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
38
  temperature=0.1,
39
- max_new_tokens=256,
40
  context_window=16384,
41
  model_kwargs={"n_gpu_layers":-1, 'logits_all': False},
42
  messages_to_prompt=messages_to_prompt,
43
  completion_to_prompt=completion_to_prompt,)
44
 
45
  llm2 = Llama(model_path="models/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
46
- n_gpu_layers=-1, n_ctx=8000)
47
 
48
 
49
  embedding_model = HuggingFaceEmbedding(
@@ -92,24 +92,16 @@ def is_relevant(query, index, threshold=0.7):
92
 
93
  def get_sequence_probability(llm, input_sequence):
94
  input_tokens = llm.tokenize(input_sequence.encode("utf-8"))
95
- sequence_logits = []
96
- sequence_logprobs = []
97
-
98
- eval_tokens = input_tokens[:1]
99
-
100
- for token in input_tokens[1:]:
101
- print("evaluating tokens for calculating log probs")
102
- llm.eval(eval_tokens)
103
 
104
- probs = llm.logits_to_logprobs(llm.eval_logits)
105
- sequence_logits.append(llm.eval_logits[-1][token])
106
- sequence_logprobs.append(probs[-1][token])
107
- eval_tokens.append(token)
108
-
109
- total_log_prob = sum(sequence_logprobs)
110
  sequence_probability = math.exp(total_log_prob)
111
  return sequence_probability
112
-
113
 
114
  def answer_question(query):
115
  if is_harmful(query):
@@ -142,7 +134,7 @@ def answer_question(query):
142
  retriever=retriever,
143
  node_postprocessors=[reranker],
144
  )
145
- response = keyword_query_engine.query(query)
146
  response_text = str(response)
147
  response_prob = get_sequence_probability(llm2, response_text)
148
  print(f"Output probability: {response_prob}")
 
36
  llm = LlamaCPP(
37
  model_path="models/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
38
  temperature=0.1,
39
+ max_new_tokens=128,
40
  context_window=16384,
41
  model_kwargs={"n_gpu_layers":-1, 'logits_all': False},
42
  messages_to_prompt=messages_to_prompt,
43
  completion_to_prompt=completion_to_prompt,)
44
 
45
  llm2 = Llama(model_path="models/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
46
+ n_gpu_layers=-1, n_ctx=8000, logits_all=True)
47
 
48
 
49
  embedding_model = HuggingFaceEmbedding(
 
92
 
93
  def get_sequence_probability(llm, input_sequence):
94
  input_tokens = llm.tokenize(input_sequence.encode("utf-8"))
95
+ llm.eval(input_tokens)
96
+ probs = llm.logits_to_logprobs(llm.eval_logits)
97
+ total_log_prob = 0.0
98
+ for i, token in enumerate(input_tokens):
99
+ token_log_prob = probs[i, token]
100
+ total_log_prob += token_log_prob
 
 
101
 
 
 
 
 
 
 
102
  sequence_probability = math.exp(total_log_prob)
103
  return sequence_probability
104
+
105
 
106
  def answer_question(query):
107
  if is_harmful(query):
 
134
  retriever=retriever,
135
  node_postprocessors=[reranker],
136
  )
137
+ response = keyword_query_engine.query(f"Answer in less than 100 words: \nQuery:{query}")
138
  response_text = str(response)
139
  response_prob = get_sequence_probability(llm2, response_text)
140
  print(f"Output probability: {response_prob}")