File size: 3,131 Bytes
6e2b09a 0b6c3ef 6e2b09a f75a474 0b6c3ef d1fcdad 6e2b09a 0b6c3ef cc9711e 027f581 cc9711e f75a474 7af2e78 c32042d 03b3b02 0c1bec0 7ef7fc8 42c830e 558d777 03b3b02 027f581 7af2e78 cc9711e 7af2e78 cc9711e d1fcdad cc9711e d1fcdad cc9711e d1fcdad cc9711e f75a474 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import streamlit as st
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.ollama import Ollama
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import Settings
import time
# Ustawienia strony
st.title("Aplikacja z LlamaIndex")
db = chromadb.PersistentClient(path="./abc")
chroma_collection = db.get_or_create_collection("pomoc_ukrainie")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5", device="cpu")
# Utwórz indeks
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
# Utwórz silnik zapytań
# Load the correct tokenizer and LLM
from transformers import AutoTokenizer
llm = HuggingFaceLLM(
model_name="PrunaAI/eryk-mazus-polka-1.1b-bnb-4bit-smashed", # Mały model 1.3B
tokenizer=AutoTokenizer.from_pretrained("eryk-mazus/polka-1.1b"),
trust_remote_code=True,
device_map="auto"
# model_kwargs={
# "quantization_config": quantization_config,
# }
)
#Query Engine
query_engine = index.as_query_engine(
llm=llm,
response_mode='compact')
# app gui
# Store LLM generated responses
if "messages" not in st.session_state.keys():
st.session_state.messages = [{"role": "assistant", "content": "Zadaj mi pytanie..."}]
# Display chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
# User-provided prompt
if input := st.chat_input():
st.session_state.messages.append({"role": "user", "content": input})
with st.chat_message("user"):
st.write(input)
# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
with st.chat_message("assistant"):
with st.spinner("Czekaj, odpowiedź jest generowana.."):
start_time = time.time() # Start timing
response = query_engine.query(input)
end_time = time.time() # End timing
generation_time = end_time - start_time
# Zbuduj treść wiadomości z odpowiedzią i score
content = str(response.response) # Upewnij się, że response jest stringiem
if hasattr(response, 'source_nodes') and response.source_nodes: # Sprawdź, czy source_nodes istnieje
# Dodaj score pierwszego węzła (jeśli istnieje)
content += f"\nScore: {response.source_nodes[0].score:.4f}" # Dodaj score
# Add generation time
content += f"\nCzas generowania: {generation_time:.2f} sekund"
st.write(content) # Wyświetl całą treść w Streamlit
message = {"role": "assistant", "content": content} # Zapisz całą treść w wiadomości
st.session_state.messages.append(message) |