File size: 993 Bytes
7d9087b
7f98036
e50c54a
7d9087b
 
 
 
e50c54a
7f98036
 
 
 
 
 
 
 
 
 
 
 
 
7d9087b
e50c54a
7d9087b
 
e50c54a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from langchain_ollama import OllamaLLM
from llama_cpp import Llama
from langchain_huggingface import HuggingFaceEmbeddings
import streamlit as st


@st.cache_resource
def initialize_llm(model_name, temperature, top_p, max_tokens):
    # # Configure the LLM with additional parameters
    # llm = OllamaLLM(
    #     model=model_name,
    #     base_url="https://deepak7376-ollama-server.hf.space",
    #     temperature=temperature,   # Controls randomness (0 = deterministic, 1 = max randomness)
    #     max_tokens=max_tokens,   # Limit the number of tokens in the output
    #     top_p=top_p          # Nucleus sampling for controlling diversity
    # )
    llm = Llama.from_pretrained(
	repo_id="bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
	filename="DeepSeek-R1-Distill-Qwen-1.5B-IQ4_XS.gguf",
    n_ctx=max_tokens
    
    )
    return llm

@st.cache_resource
def initialize_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings