Spaces:
Build error
Build error
import os | |
import shutil | |
from dedoc import DedocManager | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import PromptTemplate | |
from langchain_core.output_parsers import JsonOutputParser | |
from ResumeStructure import ResumeStructure | |
from fastapi import UploadFile | |
from prompt_template import template_format_instructions, template | |
from typing import List | |
# Create a directory to store temporary files | |
TEMP_DIR = "/temp_files" | |
# if not os.path.exists(TEMP_DIR): | |
# os.makedirs(TEMP_DIR) | |
async def process_file_with_dedoc(file: UploadFile): | |
""" | |
Process the file using Dedoc and return the output data. | |
Args: | |
- file: The UploadedFile object to be processed. | |
Returns: | |
- Output data if the file is processed successfully, None otherwise. | |
""" | |
manager = DedocManager() | |
supported_formats = ['jpg', 'jpeg', 'png', 'docx', 'pdf', 'html', 'doc'] | |
print(f"Processing file '{file.filename}'...") | |
# Save the uploaded file to a temporary directory | |
file_path = os.path.join(TEMP_DIR, file.filename) | |
with open(file_path, "wb") as buffer: | |
shutil.copyfileobj(file.file, buffer) | |
# Extract file extension from the file name | |
file_name, file_extension = os.path.splitext(file.filename) | |
file_extension = file_extension[1:].lower() # Remove the leading dot and convert to lowercase | |
# Check if the file extension is supported | |
if file_extension not in supported_formats: | |
print(f"Cannot process file '{file.filename}'. Unsupported file format.") | |
return None | |
# Process the file using Dedoc | |
output = manager.parse(file_path) | |
output_data = output.to_api_schema().model_dump() | |
# Remove the temporary file | |
os.remove(file_path) | |
return output_data | |
async def extract_text_from_all_levels(data): | |
""" | |
Extract text from all levels of subparagraphs in the JSON data. | |
Args: | |
- data: The JSON data containing subparagraphs. | |
Returns: | |
- A string containing the text from all levels of subparagraphs. | |
""" | |
text = "" | |
if 'subparagraphs' in data['content']['structure']: | |
subparagraphs = data['content']['structure']['subparagraphs'] | |
text += await extract_text_from_subparagraphs(subparagraphs) | |
return text | |
async def extract_text_from_subparagraphs(subparagraphs): | |
""" | |
Recursively extract text from subparagraphs. | |
Args: | |
- subparagraphs: A list of subparagraphs. | |
Returns: | |
- A string containing the text from all subparagraphs. | |
""" | |
text = "" | |
for subpara in subparagraphs: | |
text += subpara['text'] + "\n" | |
if 'subparagraphs' in subpara: | |
text += await extract_text_from_subparagraphs(subpara['subparagraphs']) | |
return text | |
def generate_formatted_resume(resume, chat_llm): | |
prompt = PromptTemplate( | |
template=template, | |
input_variables=["text"], | |
) | |
chain = prompt | chat_llm | |
result = chain.invoke({"text": resume}) | |
return result.content | |
def generate_json_structured_resume(resume, chat_llm): | |
parser = JsonOutputParser(pydantic_object=ResumeStructure) | |
prompt = PromptTemplate( | |
template=template_format_instructions, | |
input_variables=["text"], | |
partial_variables={"format_instructions": parser.get_format_instructions()} | |
) | |
chain = prompt | chat_llm | parser | |
result = chain.invoke({"text": resume}) | |
return result | |
def delete_files_in_directory(directory): | |
""" | |
Deletes all files in the specified directory. | |
Args: | |
directory (str): The path to the directory containing files to be deleted. | |
Returns: | |
None | |
""" | |
# Check if the directory exists | |
if not os.path.exists(directory): | |
print(f"Directory '{directory}' does not exist.") | |
return | |
# Get a list of all files in the directory | |
files = os.listdir(directory) | |
# Iterate over each file and delete it | |
for file in files: | |
file_path = os.path.join(directory, file) | |
if os.path.isfile(file_path): | |
os.remove(file_path) | |
print(f"Deleted file: {file_path}") | |