|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
|
|
|
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_NAME, |
|
device_map="auto", |
|
trust_remote_code=True |
|
) |
|
|
|
|
|
qwen_pipeline = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer |
|
) |
|
|
|
def generate_response(retrieved_texts, query, max_new_tokens=500): |
|
""" |
|
Generates a response based on the retrieved texts and query using Qwen. |
|
Args: |
|
retrieved_texts (list): List of retrieved text strings (e.g., from BLIP). |
|
query (str): The user's question about the image. |
|
max_new_tokens (int): Maximum tokens to generate for the answer. |
|
Returns: |
|
str: The generated answer. |
|
""" |
|
|
|
context = "\n".join(retrieved_texts) |
|
prompt = f"This is the detail about the image:\n{context}\n\nQuestion: {query}\nAnswer:" |
|
|
|
|
|
result = qwen_pipeline( |
|
prompt, |
|
max_new_tokens=max_new_tokens, |
|
do_sample=True, |
|
temperature=0.7, |
|
) |
|
|
|
|
|
full_generation = result[0]["generated_text"] |
|
|
|
|
|
if "Answer:" in full_generation: |
|
final_answer = full_generation.split("Answer:")[-1].strip() |
|
else: |
|
final_answer = full_generation |
|
|
|
return final_answer |
|
|