RakeshUtekar commited on
Commit
73cd0d4
·
verified ·
1 Parent(s): 85c57d3

Update generate.py

Browse files
Files changed (1) hide show
  1. generate.py +46 -25
generate.py CHANGED
@@ -1,38 +1,59 @@
1
  import os
2
-
3
- import openai
4
  from dotenv import load_dotenv
5
 
6
- # Load environment variables from .env file
 
 
7
  load_dotenv()
8
 
9
- # Retrieve the OpenAI API key from the environment variable
10
- openai.api_key = os.getenv('api_key')
11
 
12
- def generate_response(retrieved_texts, query, max_tokens=2000):
13
- """
14
- Generates a response based on the retrieved texts and query.
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
16
  Args:
17
- retrieved_texts (list): List of retrieved text strings.
18
- query (str): Query string.
19
- max_tokens (int): Maximum number of tokens for the response.
20
 
21
  Returns:
22
- str: Generated response.
23
  """
 
24
  context = "\n".join(retrieved_texts)
25
- prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
26
-
27
- response = openai.ChatCompletion.create(
28
- model="gpt-3.5-turbo",
29
- messages=[
30
- {"role": "system", "content": "You are a helpful assistant."},
31
- {"role": "user", "content": prompt}
32
- ],
33
- max_tokens=max_tokens,
34
- n=1,
35
- stop=None,
36
- temperature=0.5,
37
  )
38
- return response.choices[0].message['content']
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
2
  from dotenv import load_dotenv
3
 
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
+
6
+ # Load environment variables if needed
7
  load_dotenv()
8
 
9
+ # Use the Qwen2.5-7B-Instruct-1M model from Hugging Face
10
+ MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct-1M"
11
 
12
+ # Initialize tokenizer and model
13
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ MODEL_NAME,
16
+ device_map="auto", # or "cpu", "cuda", etc. as appropriate
17
+ trust_remote_code=True
18
+ )
19
 
20
+ # Create pipeline
21
+ qwen_pipeline = pipeline(
22
+ "text-generation",
23
+ model=model,
24
+ tokenizer=tokenizer
25
+ )
26
+
27
+ def generate_response(retrieved_texts, query, max_new_tokens=512):
28
+ """
29
+ Generates a response based on the retrieved texts and query using the Qwen pipeline.
30
  Args:
31
+ retrieved_texts (list): List of retrieved text strings.
32
+ query (str): The user's query string.
33
+ max_new_tokens (int): Maximum number of tokens for the generated answer.
34
 
35
  Returns:
36
+ str: Generated response.
37
  """
38
+ # Construct a simple prompt using your retrieved context
39
  context = "\n".join(retrieved_texts)
40
+ prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
41
+
42
+ # Generate the text
43
+ result = qwen_pipeline(
44
+ prompt,
45
+ max_new_tokens=max_new_tokens,
46
+ do_sample=True, # or False if you prefer deterministic output
47
+ temperature=0.7, # adjust as needed
 
 
 
 
48
  )
49
+
50
+ # Extract the generated text from the pipeline's output
51
+ generated_text = result[0]["generated_text"]
52
+
53
+ # Optional: Clean up the output to isolate the answer portion
54
+ if "Answer:" in generated_text:
55
+ answer_part = generated_text.split("Answer:")[-1].strip()
56
+ else:
57
+ answer_part = generated_text
58
+
59
+ return answer_part