Rekruter / app.py
Jacek Zadrożny
Usunąłem share
ff37902
import gradio as gr
import pandas as pd
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from pydantic import BaseModel, Field, field_validator
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain.output_parsers import PydanticOutputParser
from docx import Document
from datetime import datetime
import os
import tempfile
# Model danych
class QuestionAnswer(BaseModel):
question_number: int = Field(..., description="Numer pytania")
answer: str = Field(..., description="Odpowiedź, tylko TAK lub NIE")
citation: str = Field(..., description="Fragment cytatu")
@field_validator("answer")
def validate_answer(cls, v):
if v not in {"TAK", "NIE"}:
raise ValueError("Odpowiedź musi być TAK lub NIE")
return v
class JobAdAnalysis(BaseModel):
answers: list[QuestionAnswer]
parser = PydanticOutputParser(pydantic_object=JobAdAnalysis)
# Wczytanie matrycy danych
matryca_df = pd.read_csv('matryca.csv', header=None,
names=['area', 'prompt', 'true', 'false', 'more', 'hint'])
question_to_area_map = {}
def prepare_questions(df):
global question_to_area_map
question_to_area_map = {}
questions_text = ""
for index, row in df.iterrows():
question_number = index + 1
questions_text += f"{question_number} {row['prompt']}\n"
question_to_area_map[question_number] = {
'area': row['area'],
'true': row['true'],
'false': row['false'],
'hint': row['hint'],
'more': row['more']
}
return questions_text
def doc_to_text(file):
extension = os.path.splitext(file.name)[1].lower()
if extension == ".docx":
loader = Docx2txtLoader(file.name)
elif extension == ".pdf":
loader = PyPDFLoader(file.name)
else:
return "error"
pages = loader.load()
return "\n".join(page.page_content for page in pages)
def create_report(result: pd.DataFrame) -> str:
doc = Document('template.docx')
doc.add_heading('Raport analizy ogłoszenia o pracę', 0)
doc.add_paragraph(f'Data wygenerowania: {datetime.now().strftime("%d.%m.%Y %H:%M")}')
for _, row in result.iterrows():
doc.add_heading(str(row['area']), 1)
doc.add_paragraph(str(row['citation']), style='Intense Quote')
for line in str(row['content']).split('\n'):
if line.strip():
doc.add_paragraph(line)
if pd.notna(row['more']):
for line in str(row['more']).split('\n'):
if line.strip():
doc.add_paragraph(line)
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp:
doc.save(tmp.name)
return tmp.name # Zwracamy ścieżkę do pliku tymczasowego
def analyze_job_ad(job_ad, file):
if file:
job_ad = doc_to_text(file)
if job_ad == "error":
return None, None
questions = prepare_questions(matryca_df)
prompt_template = PromptTemplate.from_template(
"""Przeanalizuj poniższe ogłoszenie o pracę pod kątem dostępności dla osób z niepełnosprawnościami.
Ogłoszenie:
{job_ad}
Odpowiedz na następujące pytania:
{questions}
Format odpowiedzi powinien być w następującej strukturze JSON:
{{
"answers": [
{{
"question_number": 1,
"answer": "TAK/NIE",
"citation": "dokładny cytat z tekstu"
}}
]
}}
"""
)
model = ChatOpenAI(temperature=0, model="gpt-4o-mini")
chain = prompt_template | model | parser
response = chain.invoke({"job_ad": job_ad, "questions": questions})
output_df = pd.DataFrame(columns=['area', 'answer', 'citation', 'content', 'more'])
for i in range(16):
if response.answers[i].answer in {"TAK", "NIE"}:
new_row = {
'area': matryca_df.area[i],
'answer': response.answers[i].answer,
'citation': response.answers[i].citation,
'content': matryca_df.true[i] if response.answers[i].answer == 'TAK' else matryca_df.false[i],
'more': matryca_df.more[i]
}
output_df = pd.concat([output_df, pd.DataFrame([new_row])], ignore_index=True)
word_file_path = create_report(output_df)
json_output = output_df.to_dict(orient="records")
return json_output, word_file_path
# Interfejs Gradio
demo = gr.Interface(
fn=analyze_job_ad,
inputs=[gr.TextArea(label="Ogłoszenie (opcjonalnie)"), gr.File(label="Plik PDF lub DOCX")],
outputs=[gr.JSON(label="Wyniki analizy"), gr.File(label="Pobierz raport w formacie Word")],
title="KoREKtor – analiza ogłoszenia",
).launch()