testing-groq

Sleeping

App Files Files Community

khurrameycon commited on Nov 27, 2024

Commit

e8ef368

verified ·

1 Parent(s): 1ed7eae

flask - from gradio to flask

Browse files

Files changed (1) hide show

app.py +55 -18

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import gradio as gr
 import os
 import torch
 from transformers import AutoProcessor, MllamaForConditionalGeneration, TextIteratorStreamer
@@ -8,6 +8,7 @@ import tempfile
 import requests
 from PyPDF2 import PdfReader
 from threading import Thread
 # Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled
 # IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
@@ -20,6 +21,8 @@ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
 print(f"Using device: {device}")
 print(f"Low memory mode: {LOW_MEMORY}")
 # Get Hugging Face token from environment variables
 HF_TOKEN = os.environ.get('HF_TOKEN')
@@ -79,9 +82,11 @@ def extract_text_from_pdf(pdf_url):
 # raise HTTPException(status_code=400, detail=f"Error extracting text from PDF: {str(e)}")
 @spaces.GPU
-def predict_text(text, url = 'https://arinsight.co/2024_FA_AEC_1200_GR1_GR2.pdf'):
-    pdf_text = extract_text_from_pdf('https://arinsight.co/2024_FA_AEC_1200_GR1_GR2.pdf')
-    text_combined = text + "\n\nExtracted Text from PDF:\n" + pdf_text
     # Prepare the input messages
     messages = [{"role": "user", "content": [{"type": "text", "text": text_combined}]}]
@@ -100,7 +105,7 @@ def predict_text(text, url = 'https://arinsight.co/2024_FA_AEC_1200_GR1_GR2.pdf'
     streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
     generated_text = ""
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
@@ -116,18 +121,50 @@ def predict_text(text, url = 'https://arinsight.co/2024_FA_AEC_1200_GR1_GR2.pdf'
     return buffer
-# Define the Gradio interface
-interface = gr.Interface(
-    fn=predict_text,
-    inputs=[
-        # gr.Image(type="pil", label="Image Input"),  # Image input with label
-        gr.Textbox(label="Text Input")  # Textbox input with label
-    ],
-    outputs=gr.Textbox(label="Generated Response"),  # Output with a more descriptive label
-    title="Llama 3.2 11B Vision Instruct Demo",  # Title of the interface
-    description="This demo uses Meta's Llama 3.2 11B Vision model to generate responses based on an image and text input.",  # Short description
-    theme="compact"  # Using a compact theme for a cleaner look
 )
-# Launch the interface
-interface.launch(debug=True)

+# import gradio as gr
 import os
 import torch
 from transformers import AutoProcessor, MllamaForConditionalGeneration, TextIteratorStreamer
 import requests
 from PyPDF2 import PdfReader
 from threading import Thread
+from flask import Flask, request, jsonify
 # Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled
 # IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
 print(f"Using device: {device}")
 print(f"Low memory mode: {LOW_MEMORY}")
+app = Flask(__name__)
 # Get Hugging Face token from environment variables
 HF_TOKEN = os.environ.get('HF_TOKEN')
 # raise HTTPException(status_code=400, detail=f"Error extracting text from PDF: {str(e)}")
 @spaces.GPU
+def predict_text(text):
+    # pdf_text = extract_text_from_pdf('https://arinsight.co/2024_FA_AEC_1200_GR1_GR2.pdf')
+    text_combined = text # + "\n\nExtracted Text from PDF:\n" + pdf_text
     # Prepare the input messages
     messages = [{"role": "user", "content": [{"type": "text", "text": text_combined}]}]
     streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
+    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
     generated_text = ""
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     return buffer
+PROMPT = (
+    "Extract the following information from the provided text ONLY "
+    "Course Code, Course Name, Credit, Delivery method, Course description, and Topical outline and do not add anything else except the information available in this text. "
 )
+@app.route("/", methods=["GET"])
+def home():
+    return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
+@app.route("/favicon.ico")
+def favicon():
+    return "", 204
+@app.route("/extract", methods=["POST"])
+def extract_info():
+    data = request.json
+    if not data or "url" not in data:
+        return jsonify({"error": "Please provide a PDF URL in the request body."}), 400
+    pdf_url = data["url"]
+    try:
+        pdf_text = extract_text_from_pdf(pdf_url)
+        prompt = f"{PROMPT}\n\n{pdf_text}"
+        response = predict_text(prompt)
+        return jsonify({"extracted_info": response})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)
+# # Define the Gradio interface
+# interface = gr.Interface(
+#     fn=predict_text,
+#     inputs=[
+#         # gr.Image(type="pil", label="Image Input"),  # Image input with label
+#         gr.Textbox(label="Text Input")  # Textbox input with label
+#     ],
+#     outputs=gr.Textbox(label="Generated Response"),  # Output with a more descriptive label
+#     title="Llama 3.2 11B Vision Instruct Demo",  # Title of the interface
+#     description="This demo uses Meta's Llama 3.2 11B Vision model to generate responses based on an image and text input.",  # Short description
+#     theme="compact"  # Using a compact theme for a cleaner look
+# )
+# # Launch the interface
+# interface.launch(debug=True)