from langchain_ollama import OllamaLLM from langchain_huggingface import HuggingFaceEmbeddings import streamlit as st @st.cache_resource def initialize_llm(model_name, temperature, top_p, max_tokens): # Configure the LLM with additional parameters llm = OllamaLLM( model=model_name, base_url="https://deepak7376-ollama-server.hf.space", temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness) max_tokens=max_tokens, # Limit the number of tokens in the output top_p=top_p # Nucleus sampling for controlling diversity ) return llm @st.cache_resource def initialize_embeddings(): embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") return embeddings