reab5555 commited on
Commit
523a4a0
·
verified ·
1 Parent(s): af13060

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ knowledge/faiss_index_all_documents/index.faiss filter=lfs diff=lfs merge=lfs -text
ai_config.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from openai import OpenAI
3
+ import tiktoken
4
+ import os
5
+
6
+ def n_of_questions():
7
+ n_of_questions = 25
8
+ return n_of_questions
9
+
10
+ openai_api_key = os.environ.get("openai_api_key")
11
+
12
+ model = "gpt-4o-mini"
13
+
14
+ def load_model(openai_api_key):
15
+ return ChatOpenAI(
16
+ model_name=model,
17
+ openai_api_key=openai_api_key,
18
+ temperature=0.1,
19
+ top_p=0.85
20
+ )
21
+
22
+ # Initialize the OpenAI client with the API key
23
+ client = OpenAI(api_key=openai_api_key)
24
+
25
+
26
+ def convert_text_to_speech(text, output_file):
27
+ try:
28
+ # Convert the final text to speech
29
+ response = client.audio.speech.create(model="tts-1", voice="alloy", input=text)
30
+
31
+ with open(output_file, 'wb') as f:
32
+ for chunk in response.iter_bytes():
33
+ f.write(chunk)
34
+
35
+ except Exception as e:
36
+ print(f"An error occurred: {e}")
37
+ response = client.audio.speech.create(model="tts-1", voice="alloy", input='Here is my Report!')
38
+
39
+ with open(output_file, 'wb') as f:
40
+ for chunk in response.iter_bytes():
41
+ f.write(chunk)
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ import os
4
+ from pathlib import Path
5
+ from settings import (
6
+ respond,
7
+ generate_random_string,
8
+ reset_interview,
9
+ generate_interview_report,
10
+ generate_report_from_file,
11
+ interview_history,
12
+ question_count,
13
+ language,
14
+ )
15
+ from ai_config import convert_text_to_speech, n_of_questions
16
+ from prompt_instructions import get_interview_initial_message
17
+
18
+ # Global variables
19
+ temp_mp3_files = []
20
+ initial_audio_path = None
21
+
22
+
23
+ # Initialize Gradio interface
24
+ def create_app():
25
+ global initial_audio_path
26
+ initial_message = get_interview_initial_message()
27
+
28
+ # Generate and save the audio for the initial message in a temporary file
29
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_initial_audio:
30
+ initial_audio_path = temp_initial_audio.name
31
+ convert_text_to_speech(initial_message, initial_audio_path)
32
+ temp_mp3_files.append(initial_audio_path)
33
+
34
+ with gr.Blocks(title="Clinical Psychologist Interviewer 𝚿") as demo:
35
+ gr.Markdown(
36
+ """
37
+ # Clinical Psychologist Interviewer 𝚿
38
+ This chatbot conducts clinical interviews based on psychological knowledge.
39
+ Please note that this is a simulation and should not be used as a substitute for professional medical advice.
40
+ """
41
+ )
42
+
43
+ with gr.Tab("Interview"):
44
+ audio_output = gr.Audio(
45
+ label="Sarah",
46
+ scale=1,
47
+ value=initial_audio_path,
48
+ autoplay=True,
49
+ visible=False,
50
+ show_download_button=False,
51
+ )
52
+ chatbot = gr.Chatbot(value=[("", f"{initial_message}")], label=f"Clinical Interview 𝚿📋")
53
+ msg = gr.Textbox(label="Type your message here...")
54
+ send_button = gr.Button("Send")
55
+
56
+ pdf_output = gr.File(label="Download Report", visible=False)
57
+
58
+ def user(user_message, history):
59
+ if not isinstance(history, list):
60
+ history = []
61
+ return "", history + [[user_message, None]]
62
+
63
+ def bot_response(chatbot, message):
64
+ global question_count, temp_mp3_files
65
+ question_count += 1
66
+
67
+ response, audio = respond(chatbot, message)
68
+
69
+ if isinstance(audio, str) and audio.endswith('.mp3'):
70
+ temp_mp3_files.append(audio)
71
+
72
+ if question_count >= n_of_questions():
73
+ conclusion_message = "Thank you for participating in this interview. We have reached the end of our session. I hope this conversation has been helpful. Take care!"
74
+ response.append((message, conclusion_message))
75
+
76
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
77
+ audio_path = temp_audio.name
78
+ convert_text_to_speech(conclusion_message, audio_path)
79
+ audio = audio_path
80
+ temp_mp3_files.append(audio_path)
81
+
82
+ # Generate report automatically
83
+ report_content, _ = generate_interview_report(interview_history, language)
84
+ with tempfile.NamedTemporaryFile(mode='w', suffix=".txt", delete=False,
85
+ encoding='utf-8') as temp_report:
86
+ temp_report.write(report_content)
87
+ temp_report_path = temp_report.name
88
+
89
+ _, pdf_path = generate_report_from_file(temp_report_path, language)
90
+
91
+ # Add report to the chat
92
+ response.append(("", f"Interview Report:\n\n{report_content}"))
93
+
94
+ # Clean up temporary files
95
+ os.unlink(temp_report_path)
96
+
97
+ # Clean up all MP3 files
98
+ for mp3_file in temp_mp3_files:
99
+ if os.path.exists(mp3_file):
100
+ os.unlink(mp3_file)
101
+ temp_mp3_files.clear()
102
+
103
+ return response, audio, gr.File(visible=True, value=pdf_path), gr.Textbox(visible=False)
104
+
105
+ return response, audio, gr.File(visible=False), gr.Textbox(visible=True)
106
+
107
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
108
+ bot_response, [chatbot, msg], [chatbot, audio_output, pdf_output, msg]
109
+ )
110
+
111
+ send_button.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
112
+ bot_response, [chatbot, msg], [chatbot, audio_output, pdf_output, msg]
113
+ )
114
+
115
+ with gr.Tab("Upload Document"):
116
+ file_input = gr.File(label="Upload a TXT, PDF, or DOCX file")
117
+ language_input = gr.Textbox(label="Preferred Language for Report",
118
+ placeholder="Enter language")
119
+ generate_button = gr.Button("Generate Report")
120
+ report_output = gr.Textbox(label="Generated Report", lines=100)
121
+ pdf_output = gr.File(label="Download Report", visible=True)
122
+
123
+ def generate_report_and_pdf(file, language):
124
+ report_content, pdf_path = generate_report_from_file(file, language)
125
+ return report_content, pdf_path, gr.File(visible=True)
126
+
127
+ generate_button.click(
128
+ generate_report_and_pdf,
129
+ inputs=[file_input, language_input],
130
+ outputs=[report_output, pdf_output, pdf_output]
131
+ )
132
+
133
+ return demo
134
+
135
+
136
+ # Clean up function
137
+ def cleanup():
138
+ global temp_mp3_files, initial_audio_path
139
+ for mp3_file in temp_mp3_files:
140
+ if os.path.exists(mp3_file):
141
+ os.unlink(mp3_file)
142
+ temp_mp3_files.clear()
143
+
144
+ if initial_audio_path and os.path.exists(initial_audio_path):
145
+ os.unlink(initial_audio_path)
146
+
147
+
148
+ if __name__ == "__main__":
149
+ app = create_app()
150
+ try:
151
+ app.launch()
152
+ finally:
153
+ cleanup()
knowledge/faiss_index_all_documents/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12862dc2569f08087b731be2f75ff4a81e940f9573a742e16cb55b2c6e2f8e37
3
+ size 5283885
knowledge/faiss_index_all_documents/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e90c1c58172c666127f1d39aef3aaf8293e83069778e5363d3fb3242027f17
3
+ size 2786587
knowledge_retrieval.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_openai import OpenAIEmbeddings
4
+ from langchain.chains import create_retrieval_chain
5
+ from langchain.chains.combine_documents import create_stuff_documents_chain
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain.retrievers import EnsembleRetriever
8
+ from ai_config import n_of_questions, openai_api_key
9
+ from prompt_instructions import get_interview_prompt, get_report_prompt
10
+
11
+ n_of_questions = n_of_questions()
12
+
13
+ def setup_knowledge_retrieval(llm, language='english'):
14
+ embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
15
+
16
+ documents_faiss_index = FAISS.load_local("knowledge/faiss_index_all_documents", embedding_model,
17
+ allow_dangerous_deserialization=True)
18
+
19
+
20
+ documents_retriever = documents_faiss_index.as_retriever()
21
+
22
+ combined_retriever = EnsembleRetriever(
23
+ retrievers=[documents_retriever]
24
+ )
25
+
26
+ interview_prompt = ChatPromptTemplate.from_messages([
27
+ ("system", get_interview_prompt(language, n_of_questions)),
28
+ ("human", "{input}")
29
+ ])
30
+
31
+ report_prompt = ChatPromptTemplate.from_messages([
32
+ ("system", get_report_prompt(language)),
33
+ ("human", "Please provide a concise clinical report based on the interview.")
34
+ ])
35
+
36
+ interview_chain = create_stuff_documents_chain(llm, interview_prompt)
37
+ report_chain = create_stuff_documents_chain(llm, report_prompt)
38
+
39
+ interview_retrieval_chain = create_retrieval_chain(combined_retriever, interview_chain)
40
+ report_retrieval_chain = create_retrieval_chain(combined_retriever, report_chain)
41
+
42
+ return interview_retrieval_chain, report_retrieval_chain, combined_retriever
43
+
44
+
45
+ def get_next_response(interview_chain, message, history, question_count):
46
+ combined_history = "\n".join(history)
47
+
48
+ # Check if the interview should end
49
+ if question_count >= n_of_questions:
50
+ return "Thank you for your responses. I will now prepare a report."
51
+
52
+ # Generate the next question
53
+ result = interview_chain.invoke({
54
+ "input": f"Based on the patient's last response: '{message}', and considering the full interview history, ask a specific, detailed question that hasn't been asked before and is relevant to the patient's situation.",
55
+ "history": combined_history,
56
+ "question_number": question_count + 1 # Increment question number here
57
+ })
58
+
59
+ next_question = result.get("answer", "Could you provide more details on that?")
60
+
61
+ # Update history with the new question and response
62
+ history.append(f"Q{question_count + 1}: {next_question}")
63
+ history.append(f"A{question_count + 1}: {message}")
64
+
65
+ return next_question
66
+
67
+
68
+ def generate_report(report_chain, history, language):
69
+ combined_history = "\n".join(history)
70
+
71
+ result = report_chain.invoke({
72
+ "input": "Please provide a clinical report based on the interview.",
73
+ "history": combined_history,
74
+ "language": language
75
+ })
76
+
77
+ return result.get("answer", "Unable to generate report due to insufficient information.")
78
+
79
+
80
+ def get_initial_question(interview_chain):
81
+ result = interview_chain.invoke({
82
+ "input": "What should be the first question in a clinical psychology interview?",
83
+ "history": "",
84
+ "question_number": 1
85
+ })
86
+ return result.get("answer", "Could you tell me a little bit about yourself and what brings you here today?")
prompt_instructions.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ current_datetime = datetime.now()
3
+ current_date = current_datetime.strftime("%Y-%m-%d")
4
+
5
+ def get_interview_initial_message():
6
+ return """Hello, I'm Sarah, an AI clinical psychologist, and I'll be conducting a clinical interview with you.
7
+
8
+ Before we begin, I want to assure you that this is a safe and confidential space.
9
+
10
+ Our session will involve a series of questions to help me understand your situation better.
11
+
12
+ Feel free to share as much or as little as you're comfortable with. There are no right or wrong answers - I'm here to listen and learn about your experiences.
13
+
14
+ To ensure I can communicate with you most effectively, could you please tell me which language you prefer to speak or conduct this interview in?"""
15
+
16
+ def get_interview_prompt(language, n_of_questions):
17
+ return f"""You are a Psychologist or Psychiatrist conducting a clinical interview in {language}.
18
+ Use the following context and interview history to guide your response.
19
+ Keep your responses concise and to the point:
20
+
21
+ Context from knowledge base: {{context}}
22
+
23
+ Previous interview history:
24
+ {{history}}
25
+
26
+ Current question number: {{question_number}}
27
+
28
+ Respond to the patient's input briefly and directly in {language}.
29
+ Ask a specific, detailed question that hasn't been asked before, and keep it short.
30
+ Do not repeat the same questions.
31
+ When asking questions, the way the questions are asked must take into account the patient's personality.
32
+ For example, if the person is more introverted or extraverted, the way the questions are asked will be accordingly.
33
+ If the person is very sensitive for example, there is a need to take that into consideration when asking the questions.
34
+ If you perceive particularly special, or unusual, or strange things in the answers that require deepening or in-depth understanding - ask about it or direct your question to get answers about it and clarify the matter - this information maybe benefitial and may hint about the patient personality or traits.
35
+ The first few questions are general questions about the patient that can give us an overall view.
36
+ The 1st question is to ask for name.
37
+ The 2nd question is to ask for age.
38
+ The 3rd question is to ask where they live.
39
+ The 4th questions is to ask what they does for work.
40
+ The 5th question is to ask about the nature of the relationship with their parents.
41
+ After {n_of_questions} interactions, indicate that you will prepare a report based on the gathered information."""
42
+
43
+ def get_report_prompt(language):
44
+ return f"""You are a Psychologist or Psychiatrist preparing a clinical report in {language}.
45
+ Use the following context and interview history to create your report.
46
+ Keep the report concise and focused on the key observations:
47
+
48
+ Context from knowledge base: {{context}}
49
+
50
+ Complete interview history:
51
+ {{history}}
52
+
53
+ Prepare a brief clinical report in {language} based strictly on the information gathered during the interview.
54
+ Date to specify in the report: {current_date}
55
+ - Use only the terms, criteria for diagnosis, and categories for clinical diagnosis or classifications
56
+ that are present in the provided knowledge base. Do not introduce any external information or terminology.
57
+ * In your diagnosis, you must be very careful. That is, you need to have enough evidence and information to rate or diagnose a patient.
58
+ * Your diagnoses must be fact-based when they are implied by what the speakers are saying.
59
+ * Write technical, clinical or professional terms only in the English language.
60
+ * As a rule, in cases where there is little information about the patient through the conversation or through
61
+ the things they say, the diagnosis will be more difficult, and the ratings will be lower,
62
+ because it is difficult to draw conclusions when our information about the patient is scarce.
63
+ be very selective and careful with your facts that you write or provide in the report.
64
+ in such a case, this also must be mentioned and taken into consideration.
65
+ * Do not provide any clinical diagnosis or any conclusions in the reports if there is not enough information that the patient provide.
66
+ * Any diagnosis or interpretation requires the presentation of facts, foundations, and explanations.
67
+ * You can also give examples or quotes.
68
+ * There are two parts for the report - main report and additional report.
69
+ * Structure the main report to include observed symptoms, potential diagnoses (if applicable), and any other
70
+ relevant clinical observations, all within the framework of the given knowledge.
71
+
72
+ First, write the main report, than, in addition to the main report, add the following sections as the additional report:
73
+ - An overall clinical impression
74
+ - Dominant personality characteristics
75
+ - Degree of psychological mental health assessment
76
+ - Style of communication
77
+ - What mainly preoccupies them - themes or topics that preoccupy them in particular
78
+ - Possible personal weaknesses or triggers
79
+ - How they are likely to react to stressful or emotionally charged situations or events
80
+ - How they might deal with unexpected situations or events
81
+ - How they might behave in a group vs alone
82
+ - How they might behave in intimate relationships
83
+ - How will they function in work environments, and will they be able to contribute and perform properly and over time in a stable manner.
84
+ - What will the experience be in general to meet such a person
85
+ - Other things or further assessments that can be examined from a psychological perspective, and in which situations it is necessary to examine the person's reactions in order to get more indications of a diagnosis of their personality
86
+ - The type of treatment that is recommended.
87
+
88
+ Furthermore, include the following:
89
+
90
+ Big Five Traits (ratings of 0-10):
91
+ Extraversion: [rating]
92
+ Agreeableness: [rating]
93
+ Conscientiousness: [rating]
94
+ Neuroticism: [rating]
95
+ Openness: [rating]
96
+ Big Five Traits explanation: [explanation]
97
+
98
+ Personality Disorders or Styles (ratings of 0-4):
99
+ Depressed Personality: [rating]
100
+ Paranoid: [rating]
101
+ Schizoid-Schizotypal: [rating]
102
+ Antisocial-Psychopathic: [rating]
103
+ Borderline-Dysregulated: [rating]
104
+ Narcissistic: [rating]
105
+ Anxious-Avoidant: [rating]
106
+ Dependent-Victimized: [rating]
107
+ Obsessional: [rating]
108
+ Personality Disorders or Styles explanation: [explanation]
109
+
110
+ Attachment Styles (ratings of 0-10):
111
+ Secured Attachment: [rating]
112
+ Anxious-Preoccupied: [rating]
113
+ Dismissive-Avoidant: [rating]
114
+ Fearful-Avoidant: [rating]
115
+ Avoidance: [rating]
116
+ Positive view toward the Self: [rating]
117
+ Positive view toward Others: [rating]
118
+ Attachment Styles explanation: [explanation]
119
+ """
settings.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+ import os
5
+ import random
6
+ import string
7
+ import tempfile
8
+ import re
9
+ import io
10
+ import PyPDF2
11
+ import docx
12
+ from reportlab.pdfgen import canvas
13
+ from reportlab.lib.pagesizes import letter
14
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
15
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
16
+ from reportlab.lib.enums import TA_JUSTIFY
17
+ from ai_config import n_of_questions, load_model, openai_api_key, convert_text_to_speech
18
+ from knowledge_retrieval import setup_knowledge_retrieval, generate_report
19
+ from ai_config import n_of_questions, openai_api_key, load_model
20
+
21
+ # Initialize settings
22
+ n_of_questions = n_of_questions()
23
+ current_datetime = datetime.now()
24
+ human_readable_datetime = current_datetime.strftime("%B %d, %Y at %H:%M")
25
+ current_date = current_datetime.strftime("%Y-%m-%d")
26
+
27
+ # Initialize the model and retrieval chain
28
+ try:
29
+ llm = load_model(openai_api_key)
30
+ interview_retrieval_chain, report_retrieval_chain, combined_retriever = setup_knowledge_retrieval(llm)
31
+ knowledge_base_connected = True
32
+ print("Successfully connected to the knowledge base.")
33
+ except Exception as e:
34
+ print(f"Error initializing the model or retrieval chain: {str(e)}")
35
+ knowledge_base_connected = False
36
+ print("Falling back to basic mode without knowledge base.")
37
+
38
+ question_count = 0
39
+ interview_history = []
40
+ last_audio_path = None # Variable to store the path of the last audio file
41
+ initial_audio_path = None # Variable to store the path of the initial audio file
42
+ language = None
43
+
44
+ def generate_random_string(length=5):
45
+ return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
46
+
47
+ def respond(message, history):
48
+ global question_count, interview_history, combined_retriever, last_audio_path, initial_audio_path, language, interview_retrieval_chain, report_retrieval_chain
49
+
50
+ if not isinstance(history, list):
51
+ history = []
52
+ if not history or not history[-1]:
53
+ history.append(["", ""])
54
+
55
+ # Extract the actual message text
56
+ if isinstance(message, list):
57
+ message = message[-1][0] if message and isinstance(message[-1], list) else message[-1]
58
+
59
+ question_count += 1
60
+ interview_history.append(f"Q{question_count}: {message}")
61
+ history_str = "\n".join(interview_history)
62
+
63
+ try:
64
+ if knowledge_base_connected:
65
+ if question_count == 1:
66
+ # Capture the language from the first response
67
+ language = message.strip().lower()
68
+ # Reinitialize the interview chain with the new language
69
+ interview_retrieval_chain, report_retrieval_chain, combined_retriever = setup_knowledge_retrieval(
70
+ llm, language)
71
+
72
+ if question_count < n_of_questions:
73
+ result = interview_retrieval_chain.invoke({
74
+ "input": f"Based on the patient's statement: '{message}', what should be the next question?",
75
+ "history": history_str,
76
+ "question_number": question_count + 1,
77
+ "language": language
78
+ })
79
+ question = result.get("answer", f"Can you tell me more about that? (in {language})")
80
+ else:
81
+ result = generate_report(report_retrieval_chain, interview_history, language)
82
+ question = result
83
+ speech_file_path = None # Skip audio generation for the report
84
+
85
+ if question:
86
+ random_suffix = generate_random_string()
87
+ speech_file_path = Path(__file__).parent / f"question_{question_count}_{random_suffix}.mp3"
88
+ convert_text_to_speech(question, speech_file_path)
89
+ print(f"Question {question_count} saved as audio at {speech_file_path}")
90
+
91
+ # Remove the last audio file if it exists
92
+ if last_audio_path and os.path.exists(last_audio_path):
93
+ os.remove(last_audio_path)
94
+ last_audio_path = speech_file_path
95
+ else:
96
+ speech_file_path = None # Skip audio generation for the report
97
+
98
+ else:
99
+ # Fallback mode without knowledge base
100
+ question = f"Can you elaborate on that? (in {language})"
101
+ if question_count < n_of_questions:
102
+ speech_file_path = Path(__file__).parent / f"question_{question_count}.mp3"
103
+ convert_text_to_speech(question, speech_file_path)
104
+ print(f"Question {question_count} saved as audio at {speech_file_path}")
105
+
106
+ if last_audio_path and os.path.exists(last_audio_path):
107
+ os.remove(last_audio_path)
108
+ last_audio_path = speech_file_path
109
+ else:
110
+ speech_file_path = None
111
+
112
+ history[-1][1] = f"{question}"
113
+
114
+ # Remove the initial question audio file after the first user response
115
+ if initial_audio_path and os.path.exists(initial_audio_path):
116
+ os.remove(initial_audio_path)
117
+ initial_audio_path = None
118
+
119
+ return history, str(speech_file_path) if speech_file_path else None
120
+
121
+ except Exception as e:
122
+ print(f"Error in retrieval chain: {str(e)}")
123
+ print(traceback.format_exc())
124
+ return history, None
125
+
126
+
127
+ def reset_interview():
128
+ """Reset the interview state."""
129
+ global question_count, interview_history, last_audio_path, initial_audio_path
130
+ question_count = 0
131
+ interview_history = []
132
+ if last_audio_path and os.path.exists(last_audio_path):
133
+ os.remove(last_audio_path)
134
+ last_audio_path = None
135
+ initial_audio_path = None
136
+
137
+
138
+ def read_file(file):
139
+ if file is None:
140
+ return "No file uploaded"
141
+
142
+ if isinstance(file, str):
143
+ with open(file, 'r', encoding='utf-8') as f:
144
+ return f.read()
145
+
146
+ if hasattr(file, 'name'): # Check if it's a file-like object
147
+ if file.name.endswith('.txt'):
148
+ return file.content
149
+ elif file.name.endswith('.pdf'):
150
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(file.content))
151
+ return "\n".join(page.extract_text() for page in pdf_reader.pages)
152
+ elif file.name.endswith('.docx'):
153
+ doc = docx.Document(io.BytesIO(file.content))
154
+ return "\n".join(paragraph.text for paragraph in doc.paragraphs)
155
+ else:
156
+ return "Unsupported file format"
157
+
158
+ return "Unable to read file"
159
+
160
+ def generate_report_from_file(file, language):
161
+ try:
162
+ file_content = read_file(file)
163
+ if file_content == "No file uploaded" or file_content == "Unsupported file format" or file_content == "Unable to read file":
164
+ return file_content
165
+
166
+ report_language = language.strip().lower() if language else "english"
167
+ print('preferred language:', report_language)
168
+ print(f"Generating report in language: {report_language}") # For debugging
169
+
170
+ # Reinitialize the report chain with the new language
171
+ _, report_retrieval_chain, _ = setup_knowledge_retrieval(llm, report_language)
172
+
173
+ result = report_retrieval_chain.invoke({
174
+ "input": "Please provide a clinical report based on the following content:",
175
+ "history": file_content,
176
+ "language": report_language
177
+ })
178
+ report_content = result.get("answer", "Unable to generate report due to insufficient information.")
179
+ pdf_path = create_pdf(report_content)
180
+ return report_content, pdf_path
181
+ except Exception as e:
182
+ return f"An error occurred while processing the file: {str(e)}", None
183
+
184
+
185
+ def generate_interview_report(interview_history, language):
186
+ try:
187
+ report_language = language.strip().lower() if language else "english"
188
+ print('preferred report_language language:', report_language)
189
+ _, report_retrieval_chain, _ = setup_knowledge_retrieval(llm, report_language)
190
+
191
+ result = report_retrieval_chain.invoke({
192
+ "input": "Please provide a clinical report based on the following interview:",
193
+ "history": "\n".join(interview_history),
194
+ "language": report_language
195
+ })
196
+ report_content = result.get("answer", "Unable to generate report due to insufficient information.")
197
+ pdf_path = create_pdf(report_content)
198
+ return report_content, pdf_path
199
+ except Exception as e:
200
+ return f"An error occurred while generating the report: {str(e)}", None
201
+
202
+ def create_pdf(content):
203
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
204
+ doc = SimpleDocTemplate(temp_file.name, pagesize=letter)
205
+ styles = getSampleStyleSheet()
206
+
207
+ # Create a custom style for bold text
208
+ bold_style = ParagraphStyle('Bold', parent=styles['Normal'], fontName='Helvetica-Bold', fontSize=10)
209
+
210
+ # Create a custom style for normal text with justification
211
+ normal_style = ParagraphStyle('Normal', parent=styles['Normal'], alignment=TA_JUSTIFY)
212
+
213
+ flowables = []
214
+
215
+ for line in content.split('\n'):
216
+ # Use regex to find words surrounded by **
217
+ parts = re.split(r'(\*\*.*?\*\*)', line)
218
+ paragraph_parts = []
219
+
220
+ for part in parts:
221
+ if part.startswith('**') and part.endswith('**'):
222
+ # Bold text
223
+ bold_text = part.strip('**')
224
+ paragraph_parts.append(Paragraph(bold_text, bold_style))
225
+ else:
226
+ # Normal text
227
+ paragraph_parts.append(Paragraph(part, normal_style))
228
+
229
+ flowables.extend(paragraph_parts)
230
+ flowables.append(Spacer(1, 12)) # Add space between paragraphs
231
+
232
+ doc.build(flowables)
233
+ return temp_file.name