Spaces:
Paused
Paused
Robin Genolet
commited on
Commit
·
90d439d
1
Parent(s):
6355832
feat: use langchain
Browse files- app.py +2 -1
- utils/epfl_meditron_utils.py +16 -3
app.py
CHANGED
@@ -83,7 +83,8 @@ def display_streamlit_sidebar():
|
|
83 |
st.sidebar.write('**Parameters**')
|
84 |
form = st.sidebar.form("config_form", clear_on_submit=True)
|
85 |
|
86 |
-
|
|
|
87 |
model_filename = form.text_input(label="File name", value=st.session_state["model_filename"])
|
88 |
model_type = form.text_input(label="Model type", value=st.session_state["model_type"])
|
89 |
gpu_layers = form.slider('GPU Layers', min_value=0,
|
|
|
83 |
st.sidebar.write('**Parameters**')
|
84 |
form = st.sidebar.form("config_form", clear_on_submit=True)
|
85 |
|
86 |
+
model_option = form.selectbox("Quickly select a model", ("llama", "meditron"))
|
87 |
+
model_repo_id = form.text_input(label="Repo", value=model_option)#value=st.session_state["model_repo_id"])
|
88 |
model_filename = form.text_input(label="File name", value=st.session_state["model_filename"])
|
89 |
model_type = form.text_input(label="Model type", value=st.session_state["model_type"])
|
90 |
gpu_layers = form.slider('GPU Layers', min_value=0,
|
utils/epfl_meditron_utils.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
from ctransformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
from transformers import pipeline
|
3 |
import streamlit as st
|
|
|
|
|
4 |
|
5 |
# Simple inference example
|
6 |
# output = llm(
|
@@ -12,14 +14,25 @@ import streamlit as st
|
|
12 |
|
13 |
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def get_llm_response(repo, filename, model_type, gpu_layers, prompt):
|
16 |
print("Loading model")
|
17 |
-
|
18 |
print("Model loaded")
|
19 |
|
20 |
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt)
|
21 |
print(f"LLM prompt: {prompt}")
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
24 |
|
25 |
return response
|
|
|
1 |
from ctransformers import AutoModelForCausalLM, AutoTokenizer
|
2 |
from transformers import pipeline
|
3 |
import streamlit as st
|
4 |
+
from langchain.chains import LLMChain
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
|
7 |
# Simple inference example
|
8 |
# output = llm(
|
|
|
14 |
|
15 |
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
16 |
|
17 |
+
|
18 |
+
template = """Question: {question}
|
19 |
+
|
20 |
+
Answer:"""
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
def get_llm_response(repo, filename, model_type, gpu_layers, prompt):
|
26 |
print("Loading model")
|
27 |
+
llm = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers)
|
28 |
print("Model loaded")
|
29 |
|
30 |
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt)
|
31 |
print(f"LLM prompt: {prompt}")
|
32 |
+
|
33 |
+
prompt = PromptTemplate(template=template, input_variables=["question"])
|
34 |
+
|
35 |
+
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
36 |
+
response = llm_chain.run(prompt)
|
37 |
|
38 |
return response
|