KrishP-12 commited on
Commit
49ce73b
·
verified ·
1 Parent(s): 2f85788

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import gradio as gr
4
+ from PIL import Image
5
+ from pdf2image import convert_from_path
6
+ import pytesseract
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain.memory import ConversationBufferMemory
11
+ from langchain.prompts import PromptTemplate
12
+ from langchain.chains import RetrievalQA
13
+ from langchain_groq import ChatGroq
14
+
15
+
16
+ class ChatbotModel:
17
+ def __init__(self):
18
+ os.environ["GROQ_API_KEY"] = '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'
19
+
20
+ self.embeddings = HuggingFaceEmbeddings(
21
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
22
+ model_kwargs={'device': 'cpu'},
23
+ encode_kwargs={'normalize_embeddings': True}
24
+ )
25
+
26
+ self.llm = ChatGroq(
27
+ model='llama3-70b-8192',
28
+ temperature=0.5,
29
+ max_tokens=None,
30
+ timeout=None,
31
+ max_retries=2,
32
+ )
33
+
34
+ self.memory = ConversationBufferMemory(memory_key="history", input_key="question")
35
+
36
+ self.template = """You are an intelligent assistant... (Rest of your prompt as is)"""
37
+
38
+ self.QA_CHAIN_PROMPT = PromptTemplate(
39
+ input_variables=["history", "context", "question"],
40
+ template=self.template
41
+ )
42
+ self.db1 = None
43
+ self.qa_chain = None
44
+
45
+ def ocr_image(self, image_path, language='eng+guj'):
46
+ img = Image.open(image_path)
47
+ return pytesseract.image_to_string(img, lang=language)
48
+
49
+ def ocr_pdf(self, pdf_path, language='eng+guj'):
50
+ images = convert_from_path(pdf_path)
51
+ return "\n".join([pytesseract.image_to_string(img, lang=language) for img in images])
52
+
53
+ def process_file(self, uploaded_file):
54
+ _, file_extension = os.path.splitext(uploaded_file.name)
55
+ file_extension = file_extension.lower()
56
+
57
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as temp_file:
58
+ temp_file.write(uploaded_file.read())
59
+ temp_path = temp_file.name
60
+
61
+ if file_extension == '.pdf':
62
+ raw_text = self.ocr_pdf(temp_path, language='guj+eng')
63
+ elif file_extension in ['.jpg', '.jpeg', '.png', '.bmp']:
64
+ raw_text = self.ocr_image(temp_path, language='guj+eng')
65
+ else:
66
+ return "Unsupported file format."
67
+
68
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
69
+ text_chunks = text_splitter.split_text(raw_text)
70
+
71
+ self.db1 = FAISS.from_documents(text_chunks, self.embeddings)
72
+ self.qa_chain = RetrievalQA.from_chain_type(
73
+ self.llm,
74
+ retriever=self.db1.as_retriever(),
75
+ chain_type='stuff',
76
+ verbose=True,
77
+ chain_type_kwargs={
78
+ "verbose": True,
79
+ "prompt": self.QA_CHAIN_PROMPT,
80
+ "memory": self.memory
81
+ }
82
+ )
83
+
84
+ return "File processed successfully!"
85
+
86
+ def get_response(self, user_input):
87
+ if not self.qa_chain:
88
+ return "Please upload and process a file before asking questions."
89
+ response = self.qa_chain({"query": user_input})
90
+ return response["result"]
91
+
92
+
93
+ chatbot = ChatbotModel()
94
+
95
+
96
+ def upload_and_process(file):
97
+ return chatbot.process_file(file)
98
+
99
+
100
+ def ask_question(question):
101
+ return chatbot.get_response(question)
102
+
103
+
104
+ interface = gr.Blocks()
105
+
106
+ with interface:
107
+ gr.Markdown("# Educational Chatbot with Document Analysis")
108
+ with gr.Row():
109
+ file_upload = gr.File(label="Upload PDF or Image")
110
+ upload_btn = gr.Button("Process File")
111
+ output = gr.Textbox(label="File Processing Status")
112
+
113
+ with gr.Row():
114
+ question_box = gr.Textbox(label="Ask a Question")
115
+ ask_btn = gr.Button("Submit")
116
+ answer = gr.Textbox(label="Answer")
117
+
118
+ upload_btn.click(upload_and_process, inputs=file_upload, outputs=output)
119
+ ask_btn.click(ask_question, inputs=question_box, outputs=answer)
120
+
121
+ interface.launch()