Spaces:
Running
Running
File size: 993 Bytes
7d9087b 7f98036 e50c54a 7d9087b e50c54a 7f98036 7d9087b e50c54a 7d9087b e50c54a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from langchain_ollama import OllamaLLM
from llama_cpp import Llama
from langchain_huggingface import HuggingFaceEmbeddings
import streamlit as st
@st.cache_resource
def initialize_llm(model_name, temperature, top_p, max_tokens):
# # Configure the LLM with additional parameters
# llm = OllamaLLM(
# model=model_name,
# base_url="https://deepak7376-ollama-server.hf.space",
# temperature=temperature, # Controls randomness (0 = deterministic, 1 = max randomness)
# max_tokens=max_tokens, # Limit the number of tokens in the output
# top_p=top_p # Nucleus sampling for controlling diversity
# )
llm = Llama.from_pretrained(
repo_id="bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
filename="DeepSeek-R1-Distill-Qwen-1.5B-IQ4_XS.gguf",
n_ctx=max_tokens
)
return llm
@st.cache_resource
def initialize_embeddings():
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
return embeddings
|