mgbam commited on
Commit
4fe5752
·
1 Parent(s): 4520b6b

Add application file

Browse files
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy requirements and install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --upgrade pip && pip install -r requirements.txt
10
+
11
+ # Copy app code
12
+ COPY . .
13
+
14
+ # Expose the port FastAPI uses
15
+ EXPOSE 8000
16
+
17
+ # Run the FastAPI app and the Gradio demo concurrently using a process manager (or choose one)
18
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
README.md CHANGED
@@ -1,14 +1 @@
1
- ---
2
- title: Materials Ai App
3
- emoji: 🏃
4
- colorFrom: purple
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.23.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: Materials science web application that leverages domain‐spec
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Project overview and instructions
 
 
 
 
 
 
 
 
 
 
 
 
 
app/__init__.py ADDED
File without changes
app/main.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/main.py
2
+ import uvicorn
3
+ from fastapi import FastAPI, HTTPException, File, UploadFile
4
+ from pydantic import BaseModel
5
+ from app import models, openai_integration
6
+
7
+ app = FastAPI(title="Materials AI Extraction API")
8
+
9
+ # Pydantic models for request/response bodies
10
+ class ExtractionRequest(BaseModel):
11
+ text: str
12
+
13
+ class QueryRequest(BaseModel):
14
+ query: str
15
+
16
+ class SummarizeRequest(BaseModel):
17
+ text: str
18
+
19
+ @app.post("/extract")
20
+ async def extract_data(request: ExtractionRequest):
21
+ try:
22
+ # Use our domain-specific model (e.g. MatSciBERT or BatteryBERT) for token classification
23
+ extracted = models.extract_entities(request.text)
24
+ return {"entities": extracted}
25
+ except Exception as e:
26
+ raise HTTPException(status_code=500, detail=str(e))
27
+
28
+ @app.post("/query")
29
+ async def query_data(request: QueryRequest):
30
+ try:
31
+ # This endpoint performs a Q&A on the provided query using the domain models
32
+ answer = models.answer_question(request.query)
33
+ return {"answer": answer}
34
+ except Exception as e:
35
+ raise HTTPException(status_code=500, detail=str(e))
36
+
37
+ @app.post("/summarize")
38
+ async def summarize(request: SummarizeRequest):
39
+ try:
40
+ summary = openai_integration.generate_summary(request.text)
41
+ return {"summary": summary}
42
+ except Exception as e:
43
+ raise HTTPException(status_code=500, detail=str(e))
44
+
45
+ if __name__ == "__main__":
46
+ uvicorn.run(app, host="0.0.0.0", port=8000)
app/models.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/models.py
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
+ import torch
4
+
5
+ # Load a domain-specific model (example: MatSciBERT for materials text)
6
+ MATS_BERT_MODEL = "m3rg-iitd/matscibert" # adjust model name as needed
7
+ tokenizer = AutoTokenizer.from_pretrained(MATS_BERT_MODEL)
8
+ model = AutoModelForTokenClassification.from_pretrained(MATS_BERT_MODEL)
9
+
10
+ # Create a pipeline for token classification (NER, relation extraction)
11
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
12
+
13
+ def extract_entities(text: str):
14
+ """
15
+ Process text using a domain-specific BERT model to extract entities.
16
+ """
17
+ results = ner_pipeline(text)
18
+ # Format the output as a list of (entity, score, start, end)
19
+ entities = [{"entity": r["entity_group"], "word": r["word"], "score": r["score"]} for r in results]
20
+ return entities
21
+
22
+ def answer_question(query: str):
23
+ """
24
+ For demonstration, we use a simple approach.
25
+ In practice, you may combine a retrieval step with a Q&A model.
26
+ """
27
+ # For example purposes, we simulate an answer by echoing the query.
28
+ # Replace this with your domain-specific Q&A logic.
29
+ return f"Simulated answer for query: '{query}'"
30
+ # Model loading and inference functions
app/openai_integration.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/openai_integration.py
2
+ import openai
3
+ import os
4
+
5
+ # Set your OpenAI API key from environment variables
6
+ openai.api_key = os.getenv("OPENAI_API_KEY")
7
+
8
+ def generate_summary(text: str) -> str:
9
+ """
10
+ Generate a summary of the given text using GPT-4.
11
+ """
12
+ response = openai.ChatCompletion.create(
13
+ model="gpt-4",
14
+ messages=[{"role": "user", "content": f"Please summarize the following text:\n\n{text}"}],
15
+ temperature=0.3
16
+ )
17
+ summary = response.choices[0].message["content"].strip()
18
+ return summary
app/utils.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Utility functions, e.g., PDF/text parsing
gradio_app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio_app.py
2
+ import gradio as gr
3
+ import requests
4
+ import os
5
+
6
+ # URL of the backend API (if hosted separately, otherwise use local endpoints)
7
+ API_URL = os.getenv("API_URL", "http://localhost:8000")
8
+
9
+ def extract_interface(text):
10
+ response = requests.post(f"{API_URL}/extract", json={"text": text})
11
+ if response.ok:
12
+ return response.json()["entities"]
13
+ else:
14
+ return {"error": response.text}
15
+
16
+ def summarize_interface(text):
17
+ response = requests.post(f"{API_URL}/summarize", json={"text": text})
18
+ if response.ok:
19
+ return response.json()["summary"]
20
+ else:
21
+ return {"error": response.text}
22
+
23
+ with gr.Blocks(title="Materials AI Extraction Demo") as demo:
24
+ gr.Markdown("## Materials Science AI Extraction")
25
+ with gr.Tabs():
26
+ with gr.TabItem("Extract Entities"):
27
+ input_text = gr.Textbox(label="Enter Materials Science Text", lines=5)
28
+ output_entities = gr.JSON(label="Extracted Entities")
29
+ extract_btn = gr.Button("Extract")
30
+ extract_btn.click(fn=extract_interface, inputs=input_text, outputs=output_entities)
31
+ with gr.TabItem("Summarize Text"):
32
+ summary_input = gr.Textbox(label="Enter Text to Summarize", lines=5)
33
+ summary_output = gr.Textbox(label="Summary")
34
+ summarize_btn = gr.Button("Summarize")
35
+ summarize_btn.click(fn=summarize_interface, inputs=summary_input, outputs=summary_output)
36
+
37
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ transformers
4
+ torch
5
+ openai
6
+ gradio
7
+ requests
8
+ pydantic
run_backend.cmd ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ @echo off
2
+ REM Optionally activate your virtual environment here, e.g., call venv\Scripts\activate
3
+ python app/main.py
4
+ pause
run_gradio.cmd ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ @echo off
2
+ REM Optionally activate your virtual environment here, e.g., call venv\Scripts\activate
3
+ python gradio_app.py
4
+ pause