Spaces:

Diezu
/

Correct_spelling_mistakes_app

Sleeping

App Files Files Community

Diezu commited on Jan 16

Commit

828db21

verified ·

1 Parent(s): c45f7b9

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -121

app.py CHANGED Viewed

@@ -1,123 +1,50 @@
 import streamlit as st
-from transformers import pipeline
-import difflib
-# Thư viện tra cứu nghĩa
-# Giả sử bạn có một hàm hoặc cơ sở dữ liệu trả về nghĩa
-def get_word_meaning(word):
-    # Thay bằng hàm thực tế tra cứu nghĩa từ
-    dictionary = {
-        "sai": "Không đúng, lệch lạc so với thực tế.",
-        "đúng": "Phù hợp với sự thật hoặc tiêu chuẩn.",
-    }
-    return dictionary.get(word, "Không tìm thấy nghĩa.")
-# Cấu hình ứng dụng
-MAX_LENGTH = 512
-st.set_page_config(
-    page_title="Demo Correct Spelling Mistakes",
-    page_icon="🤖",
-    layout="centered",
-    initial_sidebar_state="auto"
-)
-# CSS tùy chỉnh
-custom_css = """
-<style>
-    body {
-        background-color: #f4f4f4;
-        font-family: 'Arial', sans-serif;
-    }
-    .main {
-        background-color: #ffffff;
-        padding: 20px;
-        border-radius: 10px;
-        box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
-        max-width: 800px;
-        margin: 0 auto;
-    }
-    h1 {
-        text-align: center;
-        color: #4a90e2;
-    }
-    textarea {
-        font-family: 'Courier New', Courier, monospace;
-        font-size: 14px;
-        color: #333;
-    }
-    .stButton button {
-        background-color: #4a90e2;
-        color: white;
-        border: none;
-        border-radius: 5px;
-        padding: 10px 20px;
-        font-size: 16px;
-        cursor: pointer;
-    }
-    .stButton button:hover {
-        background-color: #357ABD;
-    }
-    .markdown-text-container {
-        margin-top: 20px;
-    }
-    .highlight {
-        color: #d9534f;
-        font-weight: bold;
-    }
-    .meaning {
-        color: #5cb85c;
-        font-style: italic;
-    }
-</style>
-"""
-st.markdown(custom_css, unsafe_allow_html=True)
-st.title("Correct Spelling Mistakes App")
-# Load mô hình
-model_checkpoint = "Diezu/bat_pho_bo"  # Thay đổi checkpoint phù hợp
-correct_spelling = pipeline("text2text-generation", model=model_checkpoint)
-# Nhập liệu từ người dùng
-context = st.text_area("Input text", placeholder="Nhập văn bản có lỗi chính tả...")
-# Xử lý nút bấm
-if st.button("Get Result"):
-    if context.strip():
-        try:
-            # Sử dụng pipeline để sửa lỗi chính tả
-            result = correct_spelling(context, max_length=MAX_LENGTH)
-            corrected_text = result[0]['generated_text'] if result else "No output generated."
-            # So sánh và làm nổi bật sự khác biệt
-            def highlight_differences_with_meaning(original, corrected):
-                highlighted_text = []
-                matcher = difflib.SequenceMatcher(None, original, corrected)
-                for tag, i1, i2, j1, j2 in matcher.get_opcodes():
-                    if tag == 'replace':  # Nếu đoạn văn bị thay thế
-                        word = corrected[j1:j2]
-                        meaning = get_word_meaning(word)
-                        highlighted_text.append(
-                            f"<span class='highlight'>{word}</span> <span class='meaning'>({meaning})</span>"
-                        )
-                    elif tag == 'insert':  # Nếu đoạn mới được thêm
-                        word = corrected[j1:j2]
-                        meaning = get_word_meaning(word)
-                        highlighted_text.append(
-                            f"<span class='highlight'>{word}</span> <span class='meaning'>({meaning})</span>"
-                        )
-                    elif tag == 'delete':  # Nếu đoạn bị xóa
-                        highlighted_text.append(f"<span class='highlight'>{original[i1:i2]}</span>")
-                    else:  # Nếu đoạn không thay đổi
-                        highlighted_text.append(corrected[j1:j2])
-                return "".join(highlighted_text)
-            # Làm nổi bật các thay đổi với ngữ nghĩa
-            highlighted_text = highlight_differences_with_meaning(context, corrected_text)
-            # Hiển thị kết quả
-            st.markdown(f"### Corrected Text (with highlighted changes):\n\n{highlighted_text}", unsafe_allow_html=True)
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
     else:
-        st.warning("Please input some text to process!")

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification  # Thay bằng mô hình của bạn
+import torch
+# Load mô hình (thay thế bằng mô hình sửa lỗi chính tả của bạn)
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("Diezu/bat_pho_bo")  # Thay bằng tên mô hình của bạn
+    model = AutoModelForSequenceClassification.from_pretrained("Diezu/bat_pho_bo")
+    return tokenizer, model
+# Hàm phát hiện lỗi chính tả
+def detect_errors(text, tokenizer, model):
+    errors = []
+    words = text.split()
+    for word in words:
+        inputs = tokenizer(word, return_tensors="pt", padding=True, truncation=True)
+        with torch.no_grad():
+            outputs = model(**inputs)
+        # Xử lý kết quả để phát hiện lỗi (giả sử lớp "1" là từ sai)
+        probabilities = torch.softmax(outputs.logits, dim=-1)
+        predicted_class = torch.argmax(probabilities, dim=-1).item()
+        if predicted_class == 1:  # Nếu từ sai
+            errors.append(word)
+    return errors
+# Streamlit App
+st.title("Công cụ phát hiện lỗi chính tả")
+# Tải mô hình
+tokenizer, model = load_model()
+# Nhập văn bản
+input_text = st.text_area("Nhập văn bản của bạn tại đây:", height=200)
+if st.button("Phát hiện lỗi"):
+    if input_text.strip():
+        # Phát hiện lỗi
+        errors = detect_errors(input_text, tokenizer, model)
+        if errors:
+            st.subheader("Các từ phát hiện lỗi:")
+            st.write(", ".join(f"**{word}**" for word in errors))
+        else:
+            st.success("Không phát hiện lỗi nào trong văn bản.")
     else:
+        st.warning("Vui lòng nhập văn bản để kiểm tra.")