akash015 commited on
Commit
a39222a
·
verified ·
1 Parent(s): 9d249d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -10
app.py CHANGED
@@ -15,7 +15,7 @@ import pdfkit
15
  from paddleocr import PaddleOCR
16
  import fitz
17
  import asyncio
18
- from langchain_nomic.embeddings import NomicEmbeddings
19
 
20
  # initialise LLM model
21
  llm_groq = ChatGroq(
@@ -26,13 +26,7 @@ llm_groq = ChatGroq(
26
  anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
27
 
28
  # initalise nomic embedding model
29
- embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
30
-
31
- def embed_text(text):
32
- if len(text.split()) <= 50:
33
- return embeddings.embed_query(text)
34
- else:
35
- return embeddings.embed_document(text)
36
 
37
  def extract_text_from_pdf(file_path):
38
  pdf = PyPDF2.PdfReader(file_path)
@@ -154,7 +148,7 @@ async def on_chat_start():
154
  # without splitting into chunks
155
  # {
156
  # Create a Chroma vector store
157
- # embeddings = OllamaEmbeddings(model="nomic-embed-text")
158
  docsearch = await cl.make_async(Chroma.from_texts)(
159
  [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
160
  )
@@ -198,7 +192,7 @@ async def main(message: cl.Message):
198
  # Call the chain with user's message content
199
  res = await chain.ainvoke(message.content, callbacks=[cb])
200
  answer = anonymizer.deanonymize(
201
- "ok"+res["answer"]
202
  )
203
  text_elements = []
204
 
 
15
  from paddleocr import PaddleOCR
16
  import fitz
17
  import asyncio
18
+ # from langchain_nomic.embeddings import NomicEmbeddings
19
 
20
  # initialise LLM model
21
  llm_groq = ChatGroq(
 
26
  anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN'], faker_seed=18)
27
 
28
  # initalise nomic embedding model
29
+ # embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
 
 
 
 
 
 
30
 
31
  def extract_text_from_pdf(file_path):
32
  pdf = PyPDF2.PdfReader(file_path)
 
148
  # without splitting into chunks
149
  # {
150
  # Create a Chroma vector store
151
+ embeddings = OllamaEmbeddings(model="nomic-embed-text")
152
  docsearch = await cl.make_async(Chroma.from_texts)(
153
  [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
154
  )
 
192
  # Call the chain with user's message content
193
  res = await chain.ainvoke(message.content, callbacks=[cb])
194
  answer = anonymizer.deanonymize(
195
+ res["answer"]
196
  )
197
  text_elements = []
198