Spaces:
Build error
Build error
File size: 4,113 Bytes
71843ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import shutil
from dedoc import DedocManager
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from ResumeStructure import ResumeStructure
from fastapi import UploadFile
from prompt_template import template_format_instructions, template
from typing import List
# Create a directory to store temporary files
TEMP_DIR = "/temp_files"
# if not os.path.exists(TEMP_DIR):
# os.makedirs(TEMP_DIR)
async def process_file_with_dedoc(file: UploadFile):
"""
Process the file using Dedoc and return the output data.
Args:
- file: The UploadedFile object to be processed.
Returns:
- Output data if the file is processed successfully, None otherwise.
"""
manager = DedocManager()
supported_formats = ['jpg', 'jpeg', 'png', 'docx', 'pdf', 'html', 'doc']
print(f"Processing file '{file.filename}'...")
# Save the uploaded file to a temporary directory
file_path = os.path.join(TEMP_DIR, file.filename)
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# Extract file extension from the file name
file_name, file_extension = os.path.splitext(file.filename)
file_extension = file_extension[1:].lower() # Remove the leading dot and convert to lowercase
# Check if the file extension is supported
if file_extension not in supported_formats:
print(f"Cannot process file '{file.filename}'. Unsupported file format.")
return None
# Process the file using Dedoc
output = manager.parse(file_path)
output_data = output.to_api_schema().model_dump()
# Remove the temporary file
os.remove(file_path)
return output_data
async def extract_text_from_all_levels(data):
"""
Extract text from all levels of subparagraphs in the JSON data.
Args:
- data: The JSON data containing subparagraphs.
Returns:
- A string containing the text from all levels of subparagraphs.
"""
text = ""
if 'subparagraphs' in data['content']['structure']:
subparagraphs = data['content']['structure']['subparagraphs']
text += await extract_text_from_subparagraphs(subparagraphs)
return text
async def extract_text_from_subparagraphs(subparagraphs):
"""
Recursively extract text from subparagraphs.
Args:
- subparagraphs: A list of subparagraphs.
Returns:
- A string containing the text from all subparagraphs.
"""
text = ""
for subpara in subparagraphs:
text += subpara['text'] + "\n"
if 'subparagraphs' in subpara:
text += await extract_text_from_subparagraphs(subpara['subparagraphs'])
return text
def generate_formatted_resume(resume, chat_llm):
prompt = PromptTemplate(
template=template,
input_variables=["text"],
)
chain = prompt | chat_llm
result = chain.invoke({"text": resume})
return result.content
def generate_json_structured_resume(resume, chat_llm):
parser = JsonOutputParser(pydantic_object=ResumeStructure)
prompt = PromptTemplate(
template=template_format_instructions,
input_variables=["text"],
partial_variables={"format_instructions": parser.get_format_instructions()}
)
chain = prompt | chat_llm | parser
result = chain.invoke({"text": resume})
return result
def delete_files_in_directory(directory):
"""
Deletes all files in the specified directory.
Args:
directory (str): The path to the directory containing files to be deleted.
Returns:
None
"""
# Check if the directory exists
if not os.path.exists(directory):
print(f"Directory '{directory}' does not exist.")
return
# Get a list of all files in the directory
files = os.listdir(directory)
# Iterate over each file and delete it
for file in files:
file_path = os.path.join(directory, file)
if os.path.isfile(file_path):
os.remove(file_path)
print(f"Deleted file: {file_path}")
|