Spaces:
Sleeping
Sleeping
Kevin Wu
commited on
Commit
·
6adea60
1
Parent(s):
95174f7
Initial
Browse files
app.py
CHANGED
|
@@ -113,7 +113,7 @@ def parse_xml_response(xml_string: str) -> pd.DataFrame:
|
|
| 113 |
print(f"Traceback: {traceback.format_exc()}")
|
| 114 |
return pd.DataFrame()
|
| 115 |
|
| 116 |
-
def get_response(
|
| 117 |
try:
|
| 118 |
thread = client.beta.threads.create(
|
| 119 |
messages=[
|
|
@@ -153,7 +153,7 @@ def process(file_content):
|
|
| 153 |
|
| 154 |
message_file = client.files.create(file=open(file_name, "rb"), purpose="assistants")
|
| 155 |
|
| 156 |
-
response = get_response(
|
| 157 |
df = parse_xml_response(response)
|
| 158 |
|
| 159 |
if df.empty:
|
|
@@ -216,9 +216,47 @@ def gradio_interface():
|
|
| 216 |
demo.queue()
|
| 217 |
demo.launch()
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
if __name__ == "__main__":
|
| 220 |
try:
|
| 221 |
gradio_interface()
|
|
|
|
| 222 |
except Exception as e:
|
| 223 |
print(f"Error launching Gradio interface: {str(e)}")
|
| 224 |
print(f"Traceback: {traceback.format_exc()}")
|
|
|
|
| 113 |
print(f"Traceback: {traceback.format_exc()}")
|
| 114 |
return pd.DataFrame()
|
| 115 |
|
| 116 |
+
def get_response(file_id, assistant_id):
|
| 117 |
try:
|
| 118 |
thread = client.beta.threads.create(
|
| 119 |
messages=[
|
|
|
|
| 153 |
|
| 154 |
message_file = client.files.create(file=open(file_name, "rb"), purpose="assistants")
|
| 155 |
|
| 156 |
+
response = get_response(message_file.id, demo.id)
|
| 157 |
df = parse_xml_response(response)
|
| 158 |
|
| 159 |
if df.empty:
|
|
|
|
| 216 |
demo.queue()
|
| 217 |
demo.launch()
|
| 218 |
|
| 219 |
+
def run_in_terminal():
|
| 220 |
+
print("Clinical Note Information Extractor")
|
| 221 |
+
print("This tool extracts key information from clinical notes in PDF format.")
|
| 222 |
+
print("Enter the path to your PDF file:")
|
| 223 |
+
file_path = input().strip()
|
| 224 |
+
|
| 225 |
+
if not os.path.exists(file_path):
|
| 226 |
+
print(f"Error: File not found at {file_path}")
|
| 227 |
+
return
|
| 228 |
+
|
| 229 |
+
try:
|
| 230 |
+
with open(file_path, "rb") as file:
|
| 231 |
+
file_content = file.read()
|
| 232 |
+
|
| 233 |
+
result = process(file_content)
|
| 234 |
+
|
| 235 |
+
if result.startswith("<p>"):
|
| 236 |
+
# Error message
|
| 237 |
+
print(result[3:-4]) # Remove <p> tags
|
| 238 |
+
else:
|
| 239 |
+
# Save the HTML output to a file
|
| 240 |
+
output_file = f"output_{time.time()}.html"
|
| 241 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 242 |
+
f.write(result)
|
| 243 |
+
print(f"Extraction completed. Results saved to {output_file}")
|
| 244 |
+
|
| 245 |
+
# Also print a simplified version to the console
|
| 246 |
+
df = pd.read_html(result)[0]
|
| 247 |
+
print("\nExtracted Information:")
|
| 248 |
+
for _, row in df.iterrows():
|
| 249 |
+
print(f"{row['Category']} - {row['Field']}: {row['Value']}")
|
| 250 |
+
|
| 251 |
+
except Exception as e:
|
| 252 |
+
print(f"An error occurred while processing the file: {str(e)}")
|
| 253 |
+
print(f"Traceback: {traceback.format_exc()}")
|
| 254 |
+
|
| 255 |
+
|
| 256 |
if __name__ == "__main__":
|
| 257 |
try:
|
| 258 |
gradio_interface()
|
| 259 |
+
# run_in_terminal()
|
| 260 |
except Exception as e:
|
| 261 |
print(f"Error launching Gradio interface: {str(e)}")
|
| 262 |
print(f"Traceback: {traceback.format_exc()}")
|