hienbm commited on
Commit
55364c5
·
verified ·
1 Parent(s): 8639761

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -50
app.py CHANGED
@@ -1,64 +1,98 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
27
 
28
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
 
41
 
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
  ],
 
60
  )
61
 
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import PyPDF2
4
+ from langchain.prompts import PromptTemplate
5
 
6
+ # Initialize the Hugging Face client with gemma2-9b-it
7
+ client = InferenceClient("HuggingFaceH4/gemma2-9b-it")
 
 
8
 
9
+ # Function to read text from a PDF file
10
+ def pdf_to_text(pdf_path):
11
+ with open(pdf_path, 'rb') as file:
12
+ pdf_reader = PyPDF2.PdfReader(file)
13
+ text = ''
14
+ for page in range(len(pdf_reader.pages)):
15
+ text += pdf_reader.pages[page].extract_text()
16
+ return text
17
 
18
+ # Function to analyze CV content
19
+ def analyze_cv(cv_text):
20
+ if not cv_text or not isinstance(cv_text, str):
21
+ raise ValueError("The CV text must be a non-empty string.")
22
+ prompt_template = PromptTemplate.from_template('''
23
+ You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's resume or CV and extract the following details:
 
 
 
24
 
25
+ **CV**
26
+ {cv_text}
 
 
 
27
 
28
+ **Information Extraction and Output Format**
29
+ For the given resume, extract and present the following details in the specified format:
30
 
31
+ 1. Candidate Information
32
+ - Full Name
33
+ - Contact Information (Phone, Email, Address, etc.)
34
+ - Date of Birth (if available)
35
+ - Habitat (if specified, e.g., location, region, or country of residence)
36
+
37
+ 2. Education
38
+ - Degree Name (e.g., Bachelor's, Master's, Ph.D.)
39
+ - Field of Study (e.g., Computer Science, Business Administration)
40
+ - Institution Name
41
+ - Year(s) of Graduation
42
+
43
+ 3. Professional Experience
44
+ - For each job extract:
45
+ - Job Title
46
+ - Company Name
47
+ - Duration (start and end dates, or years of experience)
48
+ - Summary of Key Responsibilities and Achievements
49
+
50
+ 4. Skills
51
+ - List of Skills (include technical, soft, and industry-specific skills mentioned in the resume)
52
+
53
+ 5. Certifications
54
+ - Certification Name
55
+ - Issuing Organization
56
+ - Year of Issuance
57
+
58
+ 6. Language Proficiency
59
+ - Languages Mentioned (include proficiency levels if specified in the resume)
60
 
61
+ Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in Vietnamese. Let's work this out in a step by step way to ensure the correct answer. [END].
62
+ ''')
63
+ prompt = prompt_template.format(cv_text=cv_text)
64
+ response = client.text_generation(prompt, max_tokens=2048, temperature=0.0)
65
+ return response
 
 
 
66
 
67
+ # Chatbot with PDF and CV analysis
68
+ def chatbot_with_pdf(pdf_file, user_message, history, system_message, max_tokens, temperature, top_p):
69
+ if pdf_file is not None:
70
+ pdf_text = pdf_to_text(pdf_file.name)
71
+ cv_analysis = analyze_cv(pdf_text) # Call analyze_cv with the extracted PDF text
72
+ user_message = f"CV Analysis:\n{cv_analysis}\n\nUser Message:\n{user_message}"
73
+ response_gen = respond(
74
+ user_message, history, system_message, max_tokens, temperature, top_p
75
+ )
76
+ return list(response_gen)[-1], history + [(user_message, "")]
77
 
78
+ # Define Gradio interface
79
+ interface = gr.Interface(
80
+ fn=chatbot_with_pdf,
81
+ inputs=[
82
+ gr.File(label="Upload a PDF File"),
83
+ gr.Textbox(label="Your Message"),
84
+ gr.State(label="Chat History"),
85
+ gr.Textbox(label="System Message", value="You are an AI assistant."),
86
+ gr.Slider(label="Max Tokens", minimum=1, maximum=1000, value=200),
87
+ gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.7, step=0.1),
88
+ gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1),
89
+ ],
90
+ outputs=[
91
+ gr.Textbox(label="Response"),
92
+ gr.State(label="Chat History"),
 
 
93
  ],
94
+ title="Chatbot with CV Analysis and PDF Integration",
95
  )
96
 
97
+ # Launch Gradio app
98
+ interface.launch()