awacke1 commited on
Commit
6e50ac3
·
verified ·
1 Parent(s): f3ccadf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from datasets import load_dataset
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
+ import torch
5
+
6
+ def load_orca_dataset():
7
+ st.info("Loading dataset... This may take a while.")
8
+ return load_dataset("microsoft/orca-agentinstruct-1M-v1")
9
+
10
+ @st.cache_data
11
+
12
+ def load_model_and_tokenizer(model_name):
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
15
+ return tokenizer, model
16
+
17
+ def evaluate_model(ds, tokenizer, model, max_samples):
18
+ st.info("Evaluating the model...")
19
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
20
+
21
+ results = []
22
+ for i, example in enumerate(ds):
23
+ if i >= max_samples:
24
+ break
25
+ input_text = example["text"]
26
+ result = classifier(input_text)[0]
27
+ results.append({"input": input_text, "label": result["label"], "score": result["score"]})
28
+ return results
29
+
30
+ def main():
31
+ st.title("Orca Dataset Browser and Model Evaluator")
32
+
33
+ st.sidebar.header("Configuration")
34
+ load_dataset_btn = st.sidebar.button("Load Dataset")
35
+
36
+ if load_dataset_btn:
37
+ dataset = load_orca_dataset()
38
+ st.session_state["dataset"] = dataset
39
+
40
+ if "dataset" in st.session_state:
41
+ dataset = st.session_state["dataset"]
42
+
43
+ st.subheader("Dataset Explorer")
44
+ st.write(dataset["train"].info)
45
+
46
+ sample_size = st.slider("Number of Samples to Display", min_value=1, max_value=20, value=5)
47
+ st.write(dataset["train"].shuffle(seed=42).select(range(sample_size)))
48
+
49
+ st.subheader("Model Evaluator")
50
+ model_name = st.text_input("Enter Hugging Face Model Name", value="distilbert-base-uncased-finetuned-sst-2-english")
51
+ max_samples = st.number_input("Number of Samples to Evaluate", min_value=1, max_value=100, value=10)
52
+
53
+ if st.button("Load Model and Evaluate"):
54
+ tokenizer, model = load_model_and_tokenizer(model_name)
55
+
56
+ results = evaluate_model(dataset["train"].shuffle(seed=42).select(range(max_samples)), tokenizer, model, max_samples)
57
+
58
+ st.subheader("Evaluation Results")
59
+ st.write(results)
60
+
61
+ st.download_button(
62
+ label="Download Results as CSV",
63
+ data=pd.DataFrame(results).to_csv(index=False),
64
+ file_name="evaluation_results.csv",
65
+ mime="text/csv",
66
+ )
67
+
68
+ if __name__ == "__main__":
69
+ main()