khoatran94 commited on
Commit
725770a
·
1 Parent(s): bbbdb3f
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -7,7 +7,7 @@ import torch
7
  import gradio as gr
8
  from prepare import prepare
9
 
10
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
11
  from langchain_community.llms import HuggingFacePipeline
12
  from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
13
  from langchain_core.output_parsers import StrOutputParser
@@ -51,16 +51,17 @@ def read_pdf(file_path):
51
  @spaces.GPU
52
  def query_huggingface(text):
53
  print(zero.device)
54
- load_dotenv()
55
- api_token = os.getenv("API_TOKEN")
56
- repo_id = "google/gemma-2-9b-it"
57
- task = "text-generation"
58
- chat_model = HuggingFaceEndpoint(
59
- huggingfacehub_api_token=api_token,
60
- repo_id=repo_id,
61
- task=task
62
- )
63
- return chat_model.invoke(text)
 
64
 
65
  # Gradio Interface for PDF Processing
66
  def process_file(file, query):
 
7
  import gradio as gr
8
  from prepare import prepare
9
 
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
11
  from langchain_community.llms import HuggingFacePipeline
12
  from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
13
  from langchain_core.output_parsers import StrOutputParser
 
51
  @spaces.GPU
52
  def query_huggingface(text):
53
  print(zero.device)
54
+ pipe = pipeline(
55
+ "text-generation",
56
+ model="google/gemma-2-9b-it",
57
+ model_kwargs={"torch_dtype": torch.bfloat16},
58
+ device="cuda", # replace with "mps" to run on a Mac device
59
+ )
60
+ messages = [
61
+ {"role": "user", "content": text},
62
+ ]
63
+ outputs = pipe(messages, max_new_tokens=256)
64
+ return outputs[0]["generated_text"][-1]["content"].strip()
65
 
66
  # Gradio Interface for PDF Processing
67
  def process_file(file, query):