amiguel commited on
Commit
2db00ad
·
verified ·
1 Parent(s): ae4177c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -1,8 +1,8 @@
1
-
2
  import streamlit as st
3
  import torch
4
  import os
5
  import time
 
6
  from threading import Thread
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
8
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
@@ -34,7 +34,7 @@ if "messages" not in st.session_state or clear_chat:
34
  # --- Load Model + Tokenizer ---
35
  @st.cache_resource
36
  def load_model():
37
- model_id = "amiguel/GM_Qwen1.8B_Finetune" #"tiiuae/falcon-7b-instruct"
38
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
39
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto", token=HF_TOKEN)
40
  return tokenizer, model
@@ -45,10 +45,15 @@ tokenizer, model = load_model()
45
  def process_documents(files):
46
  documents = []
47
  for file in files:
48
- if file.name.endswith(".pdf"):
49
- loader = PyPDFLoader(file)
 
 
 
 
 
50
  else:
51
- loader = TextLoader(file)
52
  docs = loader.load()
53
  documents.extend(docs)
54
  return documents
 
 
1
  import streamlit as st
2
  import torch
3
  import os
4
  import time
5
+ import tempfile
6
  from threading import Thread
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
8
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
 
34
  # --- Load Model + Tokenizer ---
35
  @st.cache_resource
36
  def load_model():
37
+ model_id = "tiiuae/falcon-7b-instruct"
38
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
39
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto", token=HF_TOKEN)
40
  return tokenizer, model
 
45
  def process_documents(files):
46
  documents = []
47
  for file in files:
48
+ suffix = ".pdf" if file.name.endswith(".pdf") else ".txt"
49
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
50
+ tmp_file.write(file.read())
51
+ tmp_file_path = tmp_file.name
52
+
53
+ if suffix == ".pdf":
54
+ loader = PyPDFLoader(tmp_file_path)
55
  else:
56
+ loader = TextLoader(tmp_file_path)
57
  docs = loader.load()
58
  documents.extend(docs)
59
  return documents