manaviel85370
commited on
Commit
Β·
56abaa1
1
Parent(s):
5f8d317
refactor infos
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ st.set_page_config(
|
|
5 |
page_title="Hello",
|
6 |
page_icon="π",
|
7 |
)
|
8 |
-
st.info(f"Speicherauslastung vor imports: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
9 |
|
10 |
st.write("# Willkommen zum Event-Daten-Extraktions-Tool! π")
|
11 |
st.write("""
|
|
|
5 |
page_title="Hello",
|
6 |
page_icon="π",
|
7 |
)
|
|
|
8 |
|
9 |
st.write("# Willkommen zum Event-Daten-Extraktions-Tool! π")
|
10 |
st.write("""
|
pages/5_Playground.py
CHANGED
@@ -1,13 +1,6 @@
|
|
1 |
-
|
2 |
import logging
|
3 |
-
import os
|
4 |
-
import sys
|
5 |
-
import gc
|
6 |
-
import psutil
|
7 |
import streamlit as st
|
8 |
import pandas as pd
|
9 |
-
st.info(f"Speicherauslastung vor imports: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
10 |
-
|
11 |
|
12 |
from src.configuration.config import SessionStateConfig
|
13 |
from src.nlp.playground.textsummarization import SumySummarizer
|
@@ -76,9 +69,6 @@ def clear_st_cache():
|
|
76 |
db = init_connection()
|
77 |
data = init_data()
|
78 |
|
79 |
-
st.info(f"Speicherauslastung: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
80 |
-
|
81 |
-
|
82 |
with st.expander("Large Language Models"):
|
83 |
with st.form("Settings LLM"):
|
84 |
count = st.number_input("Wie viele Veranstaltungen sollen gestest werden?", step=1)
|
@@ -165,7 +155,6 @@ with st.expander("Titel Extraktion"):
|
|
165 |
if submit_title_extr:
|
166 |
init_session_state("title_extractor", TitleExtractor())
|
167 |
title_extractor = st.session_state.title_extractor
|
168 |
-
st.info(f"Speicherauslastung: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
169 |
|
170 |
for event in data:
|
171 |
text = normalize_data(event["data"])
|
@@ -191,7 +180,6 @@ with st.expander("Textsummarization"):
|
|
191 |
if submit_textsummarization:
|
192 |
init_session_state(SessionStateConfig.SUMY_SUMMARIZER, SumySummarizer())
|
193 |
sumy_summarizer = st.session_state[SessionStateConfig.SUMY_SUMMARIZER]
|
194 |
-
st.info(f"Speicherauslastung: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
195 |
for event in data:
|
196 |
try:
|
197 |
md = normalize_data(event["data"])
|
|
|
|
|
1 |
import logging
|
|
|
|
|
|
|
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
|
|
|
|
4 |
|
5 |
from src.configuration.config import SessionStateConfig
|
6 |
from src.nlp.playground.textsummarization import SumySummarizer
|
|
|
69 |
db = init_connection()
|
70 |
data = init_data()
|
71 |
|
|
|
|
|
|
|
72 |
with st.expander("Large Language Models"):
|
73 |
with st.form("Settings LLM"):
|
74 |
count = st.number_input("Wie viele Veranstaltungen sollen gestest werden?", step=1)
|
|
|
155 |
if submit_title_extr:
|
156 |
init_session_state("title_extractor", TitleExtractor())
|
157 |
title_extractor = st.session_state.title_extractor
|
|
|
158 |
|
159 |
for event in data:
|
160 |
text = normalize_data(event["data"])
|
|
|
180 |
if submit_textsummarization:
|
181 |
init_session_state(SessionStateConfig.SUMY_SUMMARIZER, SumySummarizer())
|
182 |
sumy_summarizer = st.session_state[SessionStateConfig.SUMY_SUMMARIZER]
|
|
|
183 |
for event in data:
|
184 |
try:
|
185 |
md = normalize_data(event["data"])
|
src/nlp/playground/llm.py
CHANGED
@@ -15,7 +15,6 @@ class QwenLlmHandler:
|
|
15 |
token=os.getenv("INFERENCE_API_TOKEN"),
|
16 |
)
|
17 |
st.info("Using LLM Qwen/Qwen2.5-Coder-32B-Instruct via inference API")
|
18 |
-
st.info(f"Speicherauslastung: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
19 |
|
20 |
|
21 |
|
|
|
15 |
token=os.getenv("INFERENCE_API_TOKEN"),
|
16 |
)
|
17 |
st.info("Using LLM Qwen/Qwen2.5-Coder-32B-Instruct via inference API")
|
|
|
18 |
|
19 |
|
20 |
|
src/nlp/playground/ner.py
CHANGED
@@ -9,8 +9,7 @@ LABELS = ["eventTitle", "eventLocation", "date", "time", "street", "city"]
|
|
9 |
class GlinerHandler:
|
10 |
def __init__(self, model_name="urchade/gliner_multi-v2.1"):
|
11 |
self.model = GLiNER.from_pretrained(model_name)
|
12 |
-
st.info("
|
13 |
-
st.info(f"Speicherauslastung vor imports: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
14 |
|
15 |
def extract_entities(self, text, labels=None, threshold=0.3):
|
16 |
if labels is None:
|
|
|
9 |
class GlinerHandler:
|
10 |
def __init__(self, model_name="urchade/gliner_multi-v2.1"):
|
11 |
self.model = GLiNER.from_pretrained(model_name)
|
12 |
+
st.info("Using NER Model Gliner")
|
|
|
13 |
|
14 |
def extract_entities(self, text, labels=None, threshold=0.3):
|
15 |
if labels is None:
|
src/nlp/playground/pipelines/description_extractor.py
CHANGED
@@ -17,7 +17,6 @@ class DescriptionExtractor:
|
|
17 |
if SessionStateConfig.SUMY_SUMMARIZER not in st.session_state:
|
18 |
st.session_state[SessionStateConfig.SUMY_SUMMARIZER] = SumySummarizer()
|
19 |
sumy_summary = st.session_state[SessionStateConfig.SUMY_SUMMARIZER].summarize(text)
|
20 |
-
st.info("Loaded Sumy Summarizer Model")
|
21 |
st.info(f"{psutil.virtual_memory()}")
|
22 |
description = []
|
23 |
for element in md_analyzer:
|
|
|
17 |
if SessionStateConfig.SUMY_SUMMARIZER not in st.session_state:
|
18 |
st.session_state[SessionStateConfig.SUMY_SUMMARIZER] = SumySummarizer()
|
19 |
sumy_summary = st.session_state[SessionStateConfig.SUMY_SUMMARIZER].summarize(text)
|
|
|
20 |
st.info(f"{psutil.virtual_memory()}")
|
21 |
description = []
|
22 |
for element in md_analyzer:
|
src/nlp/playground/textclassification.py
CHANGED
@@ -138,9 +138,7 @@ class ZeroShotClassifier:
|
|
138 |
self.classifier = pipeline(
|
139 |
task="zero-shot-classification",
|
140 |
model="Sahajtomar/German_Zeroshot")
|
141 |
-
st.info("
|
142 |
-
st.info(
|
143 |
-
f"Speicherauslastung vor imports: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
144 |
|
145 |
def classify(self, text, mode: ClassifierMode):
|
146 |
predictions = self.classifier(text, mode.labels, hypothesis_template=mode.hypothesis_template)
|
|
|
138 |
self.classifier = pipeline(
|
139 |
task="zero-shot-classification",
|
140 |
model="Sahajtomar/German_Zeroshot")
|
141 |
+
st.info("Using ZeroShotClassification with Model Sahajtomar/German_Zeroshot")
|
|
|
|
|
142 |
|
143 |
def classify(self, text, mode: ClassifierMode):
|
144 |
predictions = self.classifier(text, mode.labels, hypothesis_template=mode.hypothesis_template)
|
src/nlp/playground/textsummarization.py
CHANGED
@@ -26,9 +26,7 @@ class SumySummarizer:
|
|
26 |
stemmer = Stemmer(self.LANGUAGE)
|
27 |
|
28 |
summarizer = Summarizer(stemmer)
|
29 |
-
st.info("
|
30 |
-
st.info(
|
31 |
-
f"Speicherauslastung vor imports: {psutil.virtual_memory().percent}%. Keys in Cache: {[k for k in st.session_state]}")
|
32 |
summarizer.stop_words = get_stop_words(self.LANGUAGE)
|
33 |
|
34 |
summary = summarizer(parser.document, self.SENTENCES_COUNT)
|
|
|
26 |
stemmer = Stemmer(self.LANGUAGE)
|
27 |
|
28 |
summarizer = Summarizer(stemmer)
|
29 |
+
st.info("Using Textsummarization Model Sumy Summarizer")
|
|
|
|
|
30 |
summarizer.stop_words = get_stop_words(self.LANGUAGE)
|
31 |
|
32 |
summary = summarizer(parser.document, self.SENTENCES_COUNT)
|