Spaces:

DeepVen
/

rag-test-venkat

Paused

File size: 1,753 Bytes

84947fc
27b1266
84947fc
 
93bc725
 
fa2ae65
84947fc
 
 
 
 
 
93bc725
 
84947fc
 
93bc725
84947fc
fa2ae65
84947fc
4e6886d
 
 
 
 
a953553
fa2ae65
 
 
84947fc
 
 
 
93bc725
84947fc
fa2ae65
92cbd75
84947fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93bc725

from fastapi import FastAPI
from transformers import pipeline
from txtai.embeddings import Embeddings
from txtai.pipeline import Extractor
from llama_cpp import Llama

from huggingface_hub import hf_hub_download

# NOTE - we configure docs_url to serve the interactive Docs at the root path
# of the app. This way, we can use the docs as a landing page for the app on Spaces.
app = FastAPI(docs_url="/")

# Create embeddings model with content support
# embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
# embeddings.load('index')

# Create extractor instance
#extractor = Extractor(embeddings, "google/flan-t5-base")

# pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")

# model_name_or_path = "TheBloke/Llama-2-7B-GGUF"
# model_basename = "llama-2-7b.Q4_0.gguf"

model_name_or_path = "Llama-2-13B-GGUF"
model_basename = "llama-2-13b.Q3_K_S.gguf"

model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

llm = Llama(model_path=model_path)

@app.get("/generate")
def generate(text: str):
    """
    llama2 q4 backend
    """
    output = llm(text)
    return {"output": output}


def prompt(question):
  return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
            Question: {question}
            Context: """


def search(query, question=None):
  # Default question to query if empty
  if not question:
    question = query

  return extractor([("answer", query, prompt(question), False)])[0][1]


# @app.get("/rag")
# def rag(question: str):
#     # question = "what is the document about?"
#     answer = search(question)
#     # print(question, answer)
#     return {answer}