Spaces:

abdulllah01
/

Keywords

Sleeping

App Files Files Community

abdulllah01 commited on Dec 30, 2024

Commit

e8d4fbe

verified ·

1 Parent(s): 416370a

Create main.py

Browse files

Files changed (1) hide show

main.py +106 -0

main.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import streamlit as st
+import pandas as pd
+from fuzzywuzzy import fuzz
+from fuzzywuzzy import process
+import string
+from nltk.corpus import stopwords
+import nltk
+nltk.download('stopwords')
+# Helper functions
+def clean_text(text):
+    text = text.lower()
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    return text
+def clean_text_fuzzy(text):
+    stop_words = set(stopwords.words('english'))
+    text = text.lower()
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    words = text.split()
+    words = [word for word in words if word not in stop_words]
+    return " ".join(words)
+def process_matching(keywords, article, fuzzy, mode):
+    keywords = [clean_text(k) for k in keywords]
+    article_exact = clean_text(article)
+    article_fuzzy = clean_text_fuzzy(article)
+    results = {}
+    max_keyword_length = max(len(k.split()) for k in keywords)
+    for n in range(1, max_keyword_length + 1):
+        n_grams_exact = [" ".join(article_exact.split()[i:i + n]) for i in range(len(article_exact.split()) - n + 1)]
+        n_grams_fuzzy = [" ".join(article_fuzzy.split()[i:i + n]) for i in range(len(article_fuzzy.split()) - n + 1)]
+        for keyword in keywords:
+            if fuzzy:
+                matches = process.extract(keyword, n_grams_fuzzy, scorer=fuzz.partial_ratio, limit=None)
+                if keyword not in results:
+                    results[keyword] = 0
+                results[keyword] += sum(1 for match, score in matches if score > 90)
+            else:
+                if keyword not in results:
+                    results[keyword] = 0
+                results[keyword] += n_grams_exact.count(keyword)
+    if mode == "filter":
+        results = {k: v for k, v in results.items() if v > 0}
+    return results
+# Streamlit app
+st.title("Keyword Matcher")
+# Mode selection
+mode = st.radio("Select Mode:", ["Keyword Frequency", "Keyword Filter"], horizontal=True)
+mode = "frequency" if mode == "Keyword Frequency" else "filter"
+# Keyword input
+st.subheader("Keywords")
+keywords_input = st.text_area("Enter keywords (comma separated):")
+uploaded_file = st.file_uploader("Or upload a CSV/Excel file with keywords (first column):", type=["csv", "xlsx"])
+keywords = []
+if uploaded_file:
+    if uploaded_file.name.endswith(".csv"):
+        df = pd.read_csv(uploaded_file)
+    else:
+        df = pd.read_excel(uploaded_file)
+    if not df.empty:
+        keywords = df.iloc[:, 0].dropna().tolist()
+else:
+    keywords = [k.strip() for k in keywords_input.split(",") if k.strip()]
+# Article input
+st.subheader("Article")
+article = st.text_area("Paste the article text here:")
+# Fuzzy matching checkbox
+fuzzy = st.checkbox("Enable Fuzzy Matching")
+# Process button
+if st.button("Process"):
+    if not keywords:
+        st.error("Please provide keywords.")
+    elif not article:
+        st.error("Please provide an article.")
+    else:
+        results = process_matching(keywords, article, fuzzy, mode)
+        st.subheader("Results")
+        for keyword, count in results.items():
+            st.write(f"{keyword}: {count}")
+        # Save to Excel
+        save_results = st.checkbox("Save results to Excel")
+        if save_results:
+            df = pd.DataFrame(list(results.items()), columns=["Keyword", "Count"])
+            st.download_button(
+                label="Download Results as Excel",
+                data=df.to_excel(index=False, engine='openpyxl'),
+                file_name="results.xlsx",
+                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+            )