File size: 5,661 Bytes
206bc94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f5ab24
 
 
 
 
b884aa9
206bc94
 
9fb5cd1
206bc94
 
 
 
b884aa9
206bc94
 
b884aa9
206bc94
 
b884aa9
206bc94
 
6f5ab24
206bc94
 
 
 
 
 
 
 
 
 
6f5ab24
206bc94
b9f86a4
 
 
 
 
206bc94
b884aa9
 
206bc94
b884aa9
6f5ab24
206bc94
 
 
 
 
 
b884aa9
6f5ab24
206bc94
 
 
b884aa9
 
206bc94
 
 
 
 
6f5ab24
 
b884aa9
206bc94
 
 
dc294fb
b884aa9
 
 
316ac93
b884aa9
6f5ab24
b884aa9
206bc94
 
 
 
 
 
 
9fb5cd1
b9f86a4
 
 
 
 
 
 
b884aa9
b9f86a4
dc294fb
6f5ab24
d6de8db
 
 
 
 
 
 
 
 
 
 
 
 
 
b884aa9
6f5ab24
b884aa9
 
62841b2
81fddf5
b884aa9
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
This script sets up a Gradio interface for querying an AI assistant about additive manufacturing research.
It uses a vectorstore to retrieve relevant research excerpts and a language model to generate responses.

Modules:
    - gradio: Interface handling
    - spaces: For GPU
    - transformers: LLM Loading
    - langchain_community.vectorstores: Vectorstore for publications
    - langchain_huggingface: Embeddings

Constants:
    - PUBLICATIONS_TO_RETRIEVE: The number of publications to retrieve for the prompt
    - RAG_TEMPLATE: The template for the RAG prompt

Functions:
    - preprocess(query: str) -> str: Generates a prompt based on the top k documents matching the query.
    - reply(message: str, history: list[str]) -> str: Generates a response to the user’s message.

Example Queries:
    - "What is multi-material 3D printing?"
    - "How is additive manufacturing being applied in aerospace?"
    - "Tell me about innovations in metal 3D printing techniques."
    - "What are some sustainable materials for 3D printing?"
    - "What are the biggest challenges with support structures in additive manufacturing?"
    - "How is 3D printing impacting the medical field?"
    - "What are some common applications of additive manufacturing in industry?"
    - "What are the benefits and limitations of using polymers in 3D printing?"
    - "Tell me about the environmental impacts of additive manufacturing."
    - "What are the primary limitations of current 3D printing technologies?"
    - "How are researchers improving the speed of 3D printing processes?"
    - "What are the best practices for managing post-processing in additive manufacturing?"
"""

import gradio  # Interface handling
import spaces  # For GPU
import transformers  # LLM Loading
import langchain_community.vectorstores  # Vectorstore for publications
import langchain_huggingface  # Embeddings

# The number of publications to retrieve for the prompt
PUBLICATIONS_TO_RETRIEVE = 5

# The template for the RAG prompt
RAG_TEMPLATE = """You are an AI assistant who enjoys helping users learn about research. 
Answer the USER_QUERY on additive manufacturing research using the RESEARCH_EXCERPTS. 
Provide a concise ANSWER based on these excerpts. Avoid listing references.

===== RESEARCH_EXCERPTS =====
{research_excerpts}

===== USER_QUERY =====
{query}

===== ANSWER =====
"""

# Load vectorstore of SFF publications
publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
    folder_path="publication_vectorstore",
    embeddings=langchain_huggingface.HuggingFaceEmbeddings(
        model_name="all-MiniLM-L12-v2",
        model_kwargs={"device": "cuda"},
        encode_kwargs={"normalize_embeddings": False},
    ),
    allow_dangerous_deserialization=True,
)

# Create the callable LLM
# llm = transformers.pipeline(
#     task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4", device="cuda"
# )
llm = llama_cpp.Llama.from_pretrained(
    repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF", filename="Qwen2.5-7B-Instruct-Q4_K_M.gguf", 
)


def preprocess(query: str) -> str:
    """
    Generates a prompt based on the top k documents matching the query.

    Args:
        query (str): The user's query.

    Returns:
        str: The formatted prompt containing research excerpts and the user's query.
    """

    # Search for the top k documents matching the query
    documents = publication_vectorstore.search(
        query, k=PUBLICATIONS_TO_RETRIEVE, search_type="similarity"
    )

    # Extract the page content from the documents
    research_excerpts = [f'"... {doc.page_content}..."' for doc in documents]

    # Format the prompt with the research excerpts and the user's query
    prompt = RAG_TEMPLATE.format(
        research_excerpts="\n\n".join(research_excerpts), query=query
    )

    # Print the prompt for debugging purposes
    print(prompt)

    return prompt


@spaces.GPU
def reply(message: str, history: list[str]) -> str:
    """
    Generates a response to the user’s message.

    Args:
        message (str): The user's message or query.
        history (list[str]): The conversation history.

    Returns:
        str: The generated response from the language model.
    """

    # return llm(
    #     preprocess(message),
    #     max_new_tokens=512,
    #     return_full_text=False,
    # )[
    #     0
    # ]["generated_text"]

    return llm(preprocess(message))["choices"][0]["text"]

# Example Queries for Interface
EXAMPLE_QUERIES = [
    "What is multi-material 3D printing?",
    "How is additive manufacturing being applied in aerospace?",
    "Tell me about innovations in metal 3D printing techniques.",
    "What are some sustainable materials for 3D printing?",
    "What are the biggest challenges with support structures in additive manufacturing?",
    "How is 3D printing impacting the medical field?",
    "What are some common applications of additive manufacturing in industry?",
    "What are the benefits and limitations of using polymers in 3D printing?",
    "Tell me about the environmental impacts of additive manufacturing.",
    "What are the primary limitations of current 3D printing technologies?",
    "How are researchers improving the speed of 3D printing processes?",
    "What are the best practices for managing post-processing in additive manufacturing?",
]

# Run the Gradio Interface
gradio.ChatInterface(
    reply,
    examples=EXAMPLE_QUERIES,
    cache_examples=False,
    chatbot=gradio.Chatbot(
        show_label=False,
        show_share_button=False,
        show_copy_button=False,
        bubble_full_width=False,
    ),
).launch(debug=True)