Spaces:
Paused
Paused
File size: 1,753 Bytes
84947fc 27b1266 84947fc 93bc725 fa2ae65 84947fc 93bc725 84947fc 93bc725 84947fc fa2ae65 84947fc 4e6886d a953553 fa2ae65 84947fc 93bc725 84947fc fa2ae65 92cbd75 84947fc 93bc725 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from fastapi import FastAPI
from transformers import pipeline
from txtai.embeddings import Embeddings
from txtai.pipeline import Extractor
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# NOTE - we configure docs_url to serve the interactive Docs at the root path
# of the app. This way, we can use the docs as a landing page for the app on Spaces.
app = FastAPI(docs_url="/")
# Create embeddings model with content support
# embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
# embeddings.load('index')
# Create extractor instance
#extractor = Extractor(embeddings, "google/flan-t5-base")
# pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
# model_name_or_path = "TheBloke/Llama-2-7B-GGUF"
# model_basename = "llama-2-7b.Q4_0.gguf"
model_name_or_path = "Llama-2-13B-GGUF"
model_basename = "llama-2-13b.Q3_K_S.gguf"
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
llm = Llama(model_path=model_path)
@app.get("/generate")
def generate(text: str):
"""
llama2 q4 backend
"""
output = llm(text)
return {"output": output}
def prompt(question):
return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
Question: {question}
Context: """
def search(query, question=None):
# Default question to query if empty
if not question:
question = query
return extractor([("answer", query, prompt(question), False)])[0][1]
# @app.get("/rag")
# def rag(question: str):
# # question = "what is the document about?"
# answer = search(question)
# # print(question, answer)
# return {answer}
|