Jacek Zadrożny
commited on
Commit
·
dedd8bb
1
Parent(s):
b9145a7
Aktualizacja pliku zależności i głównej aplikacji
Browse files- app.py +26 -4
- requirements.txt +4 -1
app.py
CHANGED
@@ -7,6 +7,8 @@ from langchain_openai import ChatOpenAI
|
|
7 |
from langchain_core.output_parsers import StrOutputParser
|
8 |
from pydantic import BaseModel, Field, validator
|
9 |
from pydantic import BaseModel, Field, field_validator
|
|
|
|
|
10 |
|
11 |
|
12 |
# %%
|
@@ -92,7 +94,27 @@ def prepare_questions(df):
|
|
92 |
|
93 |
|
94 |
# %%
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
"""Analizuje ogłoszenie o pracę przy użyciu LangChain i OpenAI."""
|
97 |
questions = prepare_questions(matryca_df)
|
98 |
prompt_template = PromptTemplate.from_template(
|
@@ -155,7 +177,7 @@ from io import BytesIO
|
|
155 |
from datetime import datetime
|
156 |
import tempfile
|
157 |
def create_report(result: pd.DataFrame) -> BytesIO:
|
158 |
-
doc = Document()
|
159 |
doc.add_heading('Raport analizy ogłoszenia o pracę', 0)
|
160 |
doc.add_paragraph(f'Data wygenerowania: {datetime.now().strftime("%d.%m.%Y %H:%M")}')
|
161 |
for _, row in result.iterrows():
|
@@ -185,8 +207,8 @@ def create_report(result: pd.DataFrame) -> BytesIO:
|
|
185 |
# %%
|
186 |
demo=gr.Interface(
|
187 |
fn=analyze_job_ad,
|
188 |
-
inputs=gr.TextArea(),
|
189 |
-
outputs=[gr.
|
190 |
title="KoREKtor"
|
191 |
).launch(inbrowser=True)
|
192 |
|
|
|
7 |
from langchain_core.output_parsers import StrOutputParser
|
8 |
from pydantic import BaseModel, Field, validator
|
9 |
from pydantic import BaseModel, Field, field_validator
|
10 |
+
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
|
11 |
+
import os
|
12 |
|
13 |
|
14 |
# %%
|
|
|
94 |
|
95 |
|
96 |
# %%
|
97 |
+
# Konwersja plików PDF i Word do tekstu
|
98 |
+
def doc_to_text(file):
|
99 |
+
extension = os.path.splitext(file)[1].lower()
|
100 |
+
if extension==".docx":
|
101 |
+
loader = Docx2txtLoader(file)
|
102 |
+
elif extension==".pdf":
|
103 |
+
loader = PyPDFLoader(file)
|
104 |
+
else:
|
105 |
+
return "error"
|
106 |
+
pages = loader.load()
|
107 |
+
text=''
|
108 |
+
for page in pages:
|
109 |
+
text += page.page_content + "\n"
|
110 |
+
return text
|
111 |
+
|
112 |
+
|
113 |
+
def analyze_job_ad(job_ad, file):
|
114 |
+
if file:
|
115 |
+
job_ad=doc_to_text(file)
|
116 |
+
if job_ad == "error":
|
117 |
+
return "{}", None
|
118 |
"""Analizuje ogłoszenie o pracę przy użyciu LangChain i OpenAI."""
|
119 |
questions = prepare_questions(matryca_df)
|
120 |
prompt_template = PromptTemplate.from_template(
|
|
|
177 |
from datetime import datetime
|
178 |
import tempfile
|
179 |
def create_report(result: pd.DataFrame) -> BytesIO:
|
180 |
+
doc = Document('template.docx')
|
181 |
doc.add_heading('Raport analizy ogłoszenia o pracę', 0)
|
182 |
doc.add_paragraph(f'Data wygenerowania: {datetime.now().strftime("%d.%m.%Y %H:%M")}')
|
183 |
for _, row in result.iterrows():
|
|
|
207 |
# %%
|
208 |
demo=gr.Interface(
|
209 |
fn=analyze_job_ad,
|
210 |
+
inputs=[gr.TextArea(), gr.File()],
|
211 |
+
outputs=[gr.JSON(), gr.DownloadButton(label='Pobierz raport w formacie Word')],
|
212 |
title="KoREKtor"
|
213 |
).launch(inbrowser=True)
|
214 |
|
requirements.txt
CHANGED
@@ -4,4 +4,7 @@ langchain_core
|
|
4 |
langchain_openai
|
5 |
langchain
|
6 |
pydantic
|
7 |
-
openai
|
|
|
|
|
|
|
|
4 |
langchain_openai
|
5 |
langchain
|
6 |
pydantic
|
7 |
+
openai
|
8 |
+
langchain_community
|
9 |
+
os
|
10 |
+
docx
|