File size: 1,796 Bytes
4399eba
 
 
d8690ff
4399eba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8690ff
4399eba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Replace with your target Qwen model on Hugging Face
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",        # or "cuda", etc. if you want to specify
    trust_remote_code=True
)

# Create pipeline
qwen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

def generate_response(retrieved_texts, query, max_new_tokens=500):
    """
    Generates a response based on the retrieved texts and query using Qwen.
    Args:
        retrieved_texts (list): List of retrieved text strings (e.g., from BLIP).
        query (str): The user's question about the image.
        max_new_tokens (int): Maximum tokens to generate for the answer.
    Returns:
        str: The generated answer.
    """
    # Construct a prompt that includes the image details as context
    context = "\n".join(retrieved_texts)
    prompt = f"This is the detail about the image:\n{context}\n\nQuestion: {query}\nAnswer:"

    # Generate the text
    result = qwen_pipeline(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,       # or False if you want deterministic output
        temperature=0.7,      # tweak as needed
    )

    # The pipeline returns a list of dicts with key "generated_text"
    full_generation = result[0]["generated_text"]

    # Optionally parse out the final answer if the model repeats the prompt
    if "Answer:" in full_generation:
        final_answer = full_generation.split("Answer:")[-1].strip()
    else:
        final_answer = full_generation

    return final_answer