Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,18 @@
|
|
1 |
from langchain_core.prompts import PromptTemplate
|
2 |
from langchain.chains import create_retrieval_chain
|
3 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
4 |
-
import gradio as gr
|
5 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
6 |
import numpy as np
|
7 |
from langchain_ollama import OllamaLLM
|
8 |
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
-
from langchain_community.llms import HuggingFacePipeline
|
10 |
from load_document import load_data
|
11 |
from split_document import split_docs
|
12 |
from embed_docs import embed_docs
|
13 |
from retrieve import retrieve
|
14 |
from datetime import datetime
|
15 |
-
|
16 |
-
|
17 |
import os
|
18 |
import glob
|
19 |
from fastapi import FastAPI, Query, Request
|
@@ -40,34 +39,13 @@ embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6
|
|
40 |
|
41 |
def fetch_doc():
|
42 |
# Adjust the path as needed, e.g., './' for current directory
|
43 |
-
pdf_files = glob.glob("
|
44 |
-
|
45 |
-
# If you want to include subdirectories:
|
46 |
-
# pdf_files = glob.glob("**/*.pdf", recursive=True)
|
47 |
|
48 |
return pdf_files
|
49 |
|
50 |
# # Define llm
|
51 |
hf_token = os.environ.get("HF_TOKEN").strip() # Ensure to set your Hugging Face token in the environment variable HF_TOKEN
|
52 |
-
|
53 |
-
# #llm = OllamaLLM(model="mistral:7b-instruct", base_url="http://host.docker.internal:11434")
|
54 |
-
model_id = "google/gemma-2b-it"
|
55 |
-
|
56 |
-
# # Load tokenizer and model
|
57 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
|
58 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu", torch_dtype="auto", token=hf_token)
|
59 |
-
|
60 |
-
# # Create text generation pipeline
|
61 |
-
hf_pipe = pipeline(
|
62 |
-
"text-generation",
|
63 |
-
model=model,
|
64 |
-
tokenizer=tokenizer,
|
65 |
-
max_new_tokens=512,
|
66 |
-
temperature=0.7,
|
67 |
-
top_p=0.9,
|
68 |
-
do_sample=True
|
69 |
-
)
|
70 |
-
llm = HuggingFacePipeline(pipeline=hf_pipe)
|
71 |
|
72 |
pdf_files = fetch_doc() #Fetch Dataset
|
73 |
chunks = None
|
|
|
1 |
from langchain_core.prompts import PromptTemplate
|
2 |
from langchain.chains import create_retrieval_chain
|
3 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
4 |
+
# import gradio as gr
|
|
|
5 |
import numpy as np
|
6 |
from langchain_ollama import OllamaLLM
|
7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
8 |
+
# from langchain_community.llms import HuggingFacePipeline
|
9 |
from load_document import load_data
|
10 |
from split_document import split_docs
|
11 |
from embed_docs import embed_docs
|
12 |
from retrieve import retrieve
|
13 |
from datetime import datetime
|
14 |
+
from js import js
|
15 |
+
from theme import theme
|
16 |
import os
|
17 |
import glob
|
18 |
from fastapi import FastAPI, Query, Request
|
|
|
39 |
|
40 |
def fetch_doc():
|
41 |
# Adjust the path as needed, e.g., './' for current directory
|
42 |
+
pdf_files = glob.glob("*.pdf")
|
|
|
|
|
|
|
43 |
|
44 |
return pdf_files
|
45 |
|
46 |
# # Define llm
|
47 |
hf_token = os.environ.get("HF_TOKEN").strip() # Ensure to set your Hugging Face token in the environment variable HF_TOKEN
|
48 |
+
llm = OllamaLLM(model="mistral:7b-instruct")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
pdf_files = fetch_doc() #Fetch Dataset
|
51 |
chunks = None
|