Spaces:

clui
/

PICK

Running

App Files Files Community

clui commited on Apr 16

Commit

d1fcdad

verified ·

1 Parent(s): 7ef7fc8

Add generation time

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -10,8 +10,7 @@ from llama_index.llms.ollama import Ollama
 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core import Settings
-from transformers import BitsAndBytesConfig
 # Ustawienia strony
 st.title("Aplikacja z LlamaIndex")
@@ -29,11 +28,6 @@ index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model
 # Load the correct tokenizer and LLM
 from transformers import AutoTokenizer
-# quantization_config = BitsAndBytesConfig(
-#     load_in_4bit=True,
-#     bnb_4bit_compute_dtype="float16"
-# )
 llm = HuggingFaceLLM(
     model_name="eryk-mazus/polka-1.1b", # Mały model 1.3B
     tokenizer=AutoTokenizer.from_pretrained("eryk-mazus/polka-1.1b"),
@@ -71,14 +65,20 @@ if input := st.chat_input():
 if st.session_state.messages[-1]["role"] != "assistant":
     with st.chat_message("assistant"):
         with st.spinner("Czekaj, odpowiedź jest generowana.."):
             response = query_engine.query(input)
             # Zbuduj treść wiadomości z odpowiedzią i score
             content = str(response.response)  # Upewnij się, że response jest stringiem
             if hasattr(response, 'source_nodes') and response.source_nodes:  # Sprawdź, czy source_nodes istnieje
                 # Dodaj score pierwszego węzła (jeśli istnieje)
                 content += f"\nScore: {response.source_nodes[0].score:.4f}"  # Dodaj score
             st.write(content)  # Wyświetl całą treść w Streamlit
     message = {"role": "assistant", "content": content}  # Zapisz całą treść w wiadomości

 from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.core import Settings
+import time
 # Ustawienia strony
 st.title("Aplikacja z LlamaIndex")
 # Load the correct tokenizer and LLM
 from transformers import AutoTokenizer
 llm = HuggingFaceLLM(
     model_name="eryk-mazus/polka-1.1b", # Mały model 1.3B
     tokenizer=AutoTokenizer.from_pretrained("eryk-mazus/polka-1.1b"),
 if st.session_state.messages[-1]["role"] != "assistant":
     with st.chat_message("assistant"):
         with st.spinner("Czekaj, odpowiedź jest generowana.."):
+            start_time = time.time() # Start timing
             response = query_engine.query(input)
+            end_time = time.time() # End timing
+            generation_time = end_time - start_time
             # Zbuduj treść wiadomości z odpowiedzią i score
             content = str(response.response)  # Upewnij się, że response jest stringiem
             if hasattr(response, 'source_nodes') and response.source_nodes:  # Sprawdź, czy source_nodes istnieje
                 # Dodaj score pierwszego węzła (jeśli istnieje)
                 content += f"\nScore: {response.source_nodes[0].score:.4f}"  # Dodaj score
+            # Add generation time
+            content += f"\nCzas generowania: {generation_time:.2f} sekund"
             st.write(content)  # Wyświetl całą treść w Streamlit
     message = {"role": "assistant", "content": content}  # Zapisz całą treść w wiadomości