egumasa's picture
push
eca7f4a
import re
# import spacy_streamlit
# from collections import Counter
import spacy
# from spacy.tokens import Doc
# from spacy_streamlit import visualize_spans
import streamlit as st
from utils.utility import delete_overlapping_span, cleanup_justify
from utils.visualize import visualize_spans
# nlp = spacy.load(
# "packages/en_engagement_RoBERTa-0.0.2/en_engagement_RoBERTa/en_engagement_RoBERTa-0.0.2"
# )
# Load from local storage
# MODEL_LIST = ['en_engagement_RoBERTa-ME-AtoE.tar.gz']
# model = st.selectbox('Select model', MODEL_LIST, index=0)
# nlp = spacy.load("packages/" + model)
# Load from huggingface
# sm = spacy.load('en_core_web_sm', disable=['ner'])
st.set_page_config(
page_title="ENGAGEMENT analyzer (beta ver 0.3)",
layout="wide",
initial_sidebar_state="expanded",
)
@st.cache_resource
def load_model():
# nlp = spacy.load("en_engagement_RoBERTa_context_flz")
nlp = spacy.load("en_engagement_LSTM")
# nlp = spacy.load("en_engagement_spl_RoBERTa_base_attention")
return nlp
nlp = load_model()
doc = nlp(
"Welcome! Probably this is one of the few attempts to teach a machine how to read the discourse...! Although it is not perfect, you should be able to get a good place to start for your stance-taking analyses. The result will be presented here."
)
# TPL_ENT = """
# <mark class="entity" style="background: {bg}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
# {text}
# <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">{label}</span>
# </mark>
# """
TPL_SPANS = """
<div class="spans" style="line-height: 4.5;">
{text}
{span_slices}
{span_starts}
</div>
"""
TPL_SPAN = """
<span style="font-weight: bold; display: inline-block; line-height: 3; padding-bottom: 12px;position: relative;">
{text}
{span_slices}
{span_starts}
</span>
"""
TPL_SPAN_SLICE = """
<span style="background: {bg}; top: {top_offset}px; display: inline-block; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
</span>
"""
TPL_SPAN_START = """
<span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
<span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
{label}{kb_link}
</span>
</span>
"""
# TPL_SPAN_START_RTL = """
# <span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
# <span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
# {label}{kb_link}
# </span>
# </span>
# """
DEFAULT_TEXT = """Tickner said regardless of the result, the royal commission was a waste of money and he would proceed with a separate inquiry into the issue headed by Justice Jane Matthews. His attack came as the Aboriginal women involved in the case demanded a female minister examine the religious beliefs they claim are inherent in their fight against a bridge to the island near Goolwa in South Australia."""
TEXT_LIST = [
"""To a significant extent, individuals can be considered responsible for the rise of Hitler to power on the 31st of January, 1933. Hitler himself, the charismatic leader of the Nazi Party, as well as creator of Nazi policy, played a key role in his own rise to power. However, other individuals in government, such as Hindenburg and von Papen were influential in Hitler’s rise. To a small extent, other factors also enabled Hitler to rise to power such as the Depression and the weakness of the political system. Nevertheless to a significant extent, individuals can be held responsible for the rise of Adolf Hitler to power.""",
"""Tickner said regardless of the result, the royal commission was a waste of money and he would proceed with a separate inquiry into the issue headed by Justice Jane Matthews. His attack came as the Aboriginal women involved in the case demanded a female minister examine the religious beliefs they claim are inherent in their fight against a bridge to the island near Goolwa in South Australia.""",
"""Certainly, the argumentation is not without some faults. For example, the statement that “linking homosexuality to witches fulfills the same purpose” is not supported by references to the readings. It is not clear who was linking homosexuality to witches and in what context. Nevertheless, overall and in line with the general tendencies reported in the previous section, the author employs various contracting and expanding engagement resources successfully. However, a large part of the successful use of engagement resources seems to be related to how the author structures these strategies throughout the text, namely in a wave-like fashion: from acknowledging the opinions of others, to countering them by offering one’s own interpretation, to supporting it by acknowledging other sources.""",
"""As the centuries passed, accounts of witchcraft became more and more specific; details of witches’ ceremonies and oaths became more concrete and whatever the condemned humans confessed to was treated as fact. As discussants correctly pointed out, Bernardino of Siena, Martin Le Franc, and the anonymous author of the Errores Gazariorum all have an even more aggressive campaign against witches than did the authors of our previous readings. By depicting their rituals and customs, they look to paint the most grotesque picture of witches possible. Their frenzied accusations, were some of the main catalysts of the subsequent witch hunts.""",
"""The post labeled “Witchcraft as a Problem in Society” clearly explains the contribution that each text makes to the witch hunts. While two of the authors focused on describing, in full detail, the shocking and disturbing practices that witches partook of, the others tried to prove that the witch threat was real. These last texts sought to explain witchcraft so as to convince readers that witches actually existed. As all posts reiterate, the devil is definitely at the source of witchcraft.""",
"""The third part temporarily puts aside mediation analysis and shifts the discussion to moderation analysis. In Chapter 7, I show how a multiple regression model can be made more flexible by allowing one variable’s effect to depend linearly on another variable in the model. The resulting moderated multiple regression model allows an investigator to ascertain the extent to which X’s influence on outcome variable Y is contingent on or interacts with a moderator variable W.""",
"""For instance, research has shown that people have a tendency to justify close others’ unethical actions to protect them (Gino and Galinsky 2012). Research has also shown that parents who feel close to their children often adopt strict curfew practices (Elder et al., 1995). (EC-33)""",
"""Fitzpatrick and Pagani (2013) found that engagement skills in classroom behaviour at kindergarten were related with better math scores and academic success. (LC-0525-EN)""",
"""The COAG Reform Council (2013) indicated that when compared to other students, Australian Year 4 students who attended one year of ECEC services or programs gained 11 points higher in reading (LC-0471-MA). Preliminary evidence suggests that teaching children from low-income families using humanoid robots increases motivation, sense of community, and self-expression... (EC-64). These findings suggest that visual perception takes up only a small fraction of fixation durations. Specifically, Verdelhan (2010) proposes a two-country, one-good model in which each country has an exogenously specified i.i.d. consumption growth process. Waters & Baur (2003) suggest that children or adolescents who are overweight or obese suffer from social and psychological issues. (LC-0460-EN)""",
"""According to the Australian Bureau of Statistics (2008), the percentage of obese or overweight adults is a staggering 60%.
According to George et al. (2011), in the UK immigration has improved the academic performance of the native children.
According to UNICEF (2011) a child that is breastfed within the first hour of life is fourteen times less likely to die from diarrhoea or pneumonia.""",
"""As far as I am concerned, I do think globalization is good chance for China’s developing. From my point of view, I prefer to think that advantages of globalization outweighs disadvantages. """,
"""As we know, China has made great progress for these years. I think it is the result of globalization. We all know China is a fast-developing country. We can seethe great progress that China has made. """,
"""His idea was that an important ninth century bishop called John Anglicus may indeed have given birth to a child in full view of everyone on the streets of Rome, but that this bishop was not and never had been the pope. Of course, there is no evidence whatever for this, as Leibnitz himself well knew.""",
"""On the whole, however, when evaluating meanings metaphorically, the Chinese EFL learners hedge and qualify their statements subjectively, tempering the certainty and authority of their assertions rather than using the resources of interpersonal metaphor to reinforce and substantiate their arguments. These tendencies reveal a key area for pedagogical intervention. Namely, instruction could focus on the value of construing metaphors objectively to obscure the author as the source of the evaluation. Similarly, raising students’ awareness of the space of negotiation and the value of offering assertions on a cline of certainty (e.g., IT IS EVIDENT) rather than through exclusive declarations of shared knowledge (e.g., AS WE ALL KNOW) is critical for academic writing refinement. Instructional interventions such as these are key areas for further investigation.""",
"""Of the defendants involved in Utah Pie Company’s case only one seems to have emerged as exceptionally successful. However this success was not a factor of overwhelming market power, as can be seen by the dominant position of Mrs. Smith’s during this time, which had maintained a 39-45 percent market share over the corresponding period.""",
"""Because of the evidence presented by Tremblay and Tremblay, it would appear that mergers in the brewing industry would have been procompetitive because of economies of scale. However, allowing a firm to acquire more than 20% of the market in Wisconsin would give it too much power to charge higher prices, even if the merger would help lower total average costs.""",
"""Taken in whole, the economic evidence for grocery retailers in the decades after the Von’s decision suggests that increased concentration is pro-competitive and good for consumers, running contrary to the fears proposed by the Court.""",
"""The remedies that Justice Lewis Powell prescribed did not gain the desired effect, and I feel that they were not very effective in promoting competition. (Elan, S86)""",
"""There is the possibility for abuse if the producer sets different maximum prices for different retailers, allowing some to reap higher profits.""",
"""Such a program, with appropriate limits, would provide a balanced structure that would ensure quality patient care.""",
"""A recent survey of physician satisfaction by Harvard Medical School found that physician autonomy and the ability to provide high-quality care, not income, are most strongly associated with changes in job satisfaction . Thus, it seems reasonable to assume that health care providers would take advantage of the greater bargaining power to improve the quality of care. (Ken, S78-79)""",
"""It appears, then, that maximum price fixing does the greatest harm when set below a competitive level [evidentialize]. In Case 4 it could potentially do harm to small retailers trying to enter the market [suggest], but does so for the benefit of consumers and the producer. Based purely on the models, it appears that, at the very least, maximum prices deserve a Rule of Reason approach to evaluate their cost and benefits.""",
"""It could be seen that for this 68% of the respondents, Tampines was characteristically a location that provided for them all their basic needs. It can be seen from chart [11] that many people quoted accessibility and proximity to home, and even shopping as one of the ideal factors that drew them there. Accessibility is quite a key factor because it is evident that the regional centre was built on the basis of good infrastructure. In comparison, 32% of the respondents felt that the conventional downtown was still a major attraction, even though the regional centre had gained quite a vast amount of popularity and did to large extent have an air of modernity.""",
]
@st.cache_resource
def preprocess(text):
text = re.sub("\n\n", " &&&&&&&&#&#&#&#&", text)
text = re.sub("\n", " ", text)
text = re.sub("\s+", " ", text)
text = re.sub("&&&&&&&&#&#&#&#&", "\n\n", text)
return text
@st.cache_resource
def delete_span(span_sc: dict):
id_del = []
for n, spn in enumerate(span_sc, start=1):
# print(spn)
# print(spn.label_)
if len(list(spn.sents)) > 1:
id_del.append(n)
# print(len(list(spn.sents)))
for idx in id_del:
# print(idx)
del span_sc[idx]
# st.markdown('''
# <style>
# .sidebar .sidebar-content {{
# width: 300px;
# }}
# </style>
# ''',
# unsafe_allow_html=True)
with st.sidebar:
st.markdown("""
## Engagement moves analyzed in this tool (adapted from Martin & White, 2005).
| Engagement moves | Description |
| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `Deny` (Contract -> disclaim) | An utterance which invokes a contrary position but which at the same time rejects it directly. The contrary position is hence given very little dialogic space. |
| `Counter` (Contract -> disclaim) | An utterance which expresses the present proposition as replacing and thus 'countering' another proposition which would have been expected. |
| `Concur` (Contract -> proclaim) | An utterance which shows writers' expectation/assumption that the putative readers will agree with the preposition and/or to have the same knowledge. |
| `Pronounce` (Contract -> proclaim) | An utterance which expresses a strong level of writer commitment through the author's explicit emphasis and interpolation, thereby closing down the dialogic space. |
| `Endorse` (Contract -> proclaim) | An utterance which refers to external sources as warrantable, undeniable, and/or reliable. It expresses the writer’s alignment with and endorsement of an attributed proposition. As such, the dialogic space is somewhat narrowed. |
| `Entertain` (Expand) | An utterance which indicates author's position but as only one possibility amongst others, thereby opening up dialogic space. |
| `Attribute` (Expand) | An utterance which signifies dialogic space as the writer attributes the proposition to an external source. |
| `Monogloss` | An utterance which does not employ any value of engagement. Such an utterance ignores the dialogic potential in an utterance. |
""")
# For a more complete description of the category, visit [the annotation guideline](https://egumasa.github.io/engagement-annotation-project/3_Categories/)!!
st.sidebar.markdown("""
Engagement Analyzer is developed by [Masaki Eguchi](https://masakieguchi.weebly.com).
### Acknowledgements:
The development of this tool has been supported by the following grants:
- The TIRF Doctoral Dissertation Grant 2022 sponsored by the International Research Foundation for English Language Education (TIRF)
- The NFMLTA-MLJ Doctoral Dissertation Writing Support Grant 2022 sponsored by the National Federation of Modern Language Teachers Associations (NFMLTA)
- Duolingo English Test Doctoral Dissertation Award, 2022
- The Graduate Student Research Award sponsored by the Department of Linguistics, University of Oregon
I would also like to thank:
- Aaron Miller (Linguistics, University of Oregon) for corpus annotation
- Ryan Walker (Linguistics/Antholopology, University of Oregon) for corpus annotation
- Dr. Kristopher Kyle (Associate Professor in Linguistics, University of Oregon)
""")
cc = '<a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/">Creative Commons Attribution-NonCommercial 4.0 International License</a>.'
st.sidebar.markdown(cc, unsafe_allow_html=True)
st.header("Engagement Analyzer (beta ver 0.2)")
st.write(
"Engagement Analyzer is a free tool that analyzes English texts for rhetorical strategies under the Engagement system framework (Martin & White, 2005). Martin and White (2005) propose two basic stance-taking strategies: expansion and contraction, which are in turn divided into finer-grained rhetorical strategies. The current tool allows you to analyze texts for a total of nine rhetorical strategies. The definitions of each category label can be found from the side bar"
)
with st.expander("See more explanation"):
st.markdown("""
According to Martin & White (2005), Engagement is about how the writer of a text takes stances on a topic of discussion by `Expanding (= open)` or `Contracting (= close)` the discourse for alternative viewpoints.
**Expansion strategy** = Discourse moves which open-up the dialogic space; the speaker/writer actively makes allowances for dialogically alternative positions and voices. (e.g., `ENTERTAIN`, `ATTRIBUTE`)
**Contraction strategy** = Discourse moves which close down dialogic space; the speaker/writer acts to challenge, fend off or restrict other alternative positions and voices (e.g., `DENY`, `COUNTER`, `PRONOUNCE`, `ENDORSE`).
""")
st.info("""Updated on Jan.11th, 2023\n
The current version was trained on 2,519 sentences and tested on 443 sentences. It achieved the following benchmark:
- Macro F1 = .75
- Macro Precision = .78
- Macro Recall = .74
I expect that the model's performance improves as the annotated dataset gets larger.
""")
with st.form("my_form"):
st.subheader("Option 1: selecting example text from list")
text = st.selectbox("", TEXT_LIST)
st.subheader("Option 2: analyze your own text")
input_text = st.text_area(
label="",
value="I would strongly encourage you to put your texts here to analyze it for stance-taking expressions.",
height=120,
)
st.text(
"The text from the pull-down list and in the textbox cannot be analyzed at the same time. Please select the mode."
)
textmode = st.radio(
label="Choose the mode.",
options=["Option 1: Pull-down choice", "Option 2: My own text"],
index=1,
)
submitted = st.form_submit_button("Submit")
if submitted:
if textmode == "Option 2: My own text":
text = input_text
with st.spinner("Analysis in progress..."):
doc = nlp(preprocess(text))
# st.markdown("> " + input_text)
else:
with st.spinner("Analysis in progress..."):
doc = nlp(preprocess(text))
# st.markdown("> " + text)
## Dependency parsing
# if textmode == 'My own text':
# text = input_text
# doc = nlp(preprocess(text))
# #st.markdown("> " + input_text)
# else:
# doc = nlp(preprocess(text))
# #st.markdown("> " + text)
# st.header("Text", "text")
# st.write(text)
# delete_span(doc.spans['sc'])
cleanup_justify(doc, doc.spans["sc"])
delete_overlapping_span(doc.spans["sc"])
visualize_spans(
doc,
spans_key="sc",
displacy_options={
"template": {
"span": TPL_SPAN,
"slice": TPL_SPAN_SLICE,
"start": TPL_SPAN_START,
},
"colors": {
"ENTERTAIN": "#82b74b",
"DENY": "#c94c4c",
"COUNTER": "#eea29a",
"PRONOUNCE": "#92a8d1",
"ENDORSE": "#034f84",
"CITATION": "#b2b2b2",
# "MONOGLOSS": "#3e4444",
"ATTRIBUTE": "#f7786b",
"ATTRIBUTION": "#f7786b",
"PROCLAIM": "#92a8d1",
"CITATION": "#F8C471",
"SOURCES": "#F7DC6F",
"JUSTIFYING": "#2ECC71",
"ENDOPHORIC": "#FAD7A0",
},
},
simple=False,
show_diversity=True,
show_confidence=False,
)
st.subheader("Bibliography")
st.markdown("""
* Chang, P., & Schleppegrell, M. (2011). Taking an effective authorial stance in academic writing: Making the linguistic resources explicit for L2 writers in the social sciences. _Journal of English for Academic Purposes, 10_ (3), 140–151. https://doi.org/10.1016/j.jeap.2011.05.005
* Martin, J. R., & White, P. R. R. (2005). _The language of evaluation: Appraisal in English._ Palgrave Macmillan.
* Ryshina-Pankova, M. (2014). Exploring academic argumentation in course-related blogs through ENGAGEMENT. In G. Thompson & L. Alba-Juez (Eds.), _Pragmatics & Beyond New Series (Vol. 242, pp. 281–302)_. John Benjamins Publishing Company. https://doi.org/10.1075/pbns.242.14rys
* Wu, S. M. (2007). The use of engagement resources in high- and low-rated undergraduate geography essays. _Journal of English for Academic Purposes, 6_ (3), 254–271. https://doi.org/10.1016/j.jeap.2007.09.006
""")
st.subheader("Please cite the following papers:")
st.markdown("""* Eguchi, M., & Kyle, K. (2023). Span Identification of Epistemic Stance-Taking in Academic Written English. Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023), 429–442. https://aclanthology.org/2023.bea-1.35
* Eguchi, M., & Kyle, K. (2024). Building custom NLP tools to annotate discourse-functional features for second language writing research: A tutorial. *Research Methods in Applied Linguistics, 3*(3), 100153. https://doi.org/10.1016/j.rmal.2024.100153
""")