Spaces:
Runtime error
Runtime error
File size: 2,776 Bytes
68a8c29 6570b48 6390590 5a1315d 3689459 39897d9 a0fdec6 abed01c 1392687 39897d9 1392687 39897d9 e3d850e ff7c666 67daf03 cde8835 e3ca56d 726336c cde8835 e3d850e 21247cf 606d796 a397155 0fef655 d6aa39c 0fef655 947dc2d 7e88eb4 a8b6772 0fef655 2f590b1 16fc4ca dedac74 8329262 b65ecd9 878ffe0 163a18d 1392687 431f228 878ffe0 1392687 ca54e5d 2c50de3 293724f 7e88eb4 a8b6772 07e3fb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import langcodes
# https://huggingface.co/blog/streamlit-spaces
langtext = st.text_input("language lookup using https://github.com/rspeer/langcodes, see also https://r12a.github.io/app-subtags/", "english")
st.write("Checking whether the tag is valid. That is, the language, script, territory, and variants (if present) are all tags that have meanings assigned by IANA.")
if langcodes.tag_is_valid(langtext):
st.write(f"...True! '{langtext}' parses meaningfully as a language tag according to IANA.")
else:
st.write(f"...False! '{langtext}' doesn't parse meaningfully as a language tag according to IANA, some of its subcomponents may be invalid or it might be a natural language description.")
try:
lang = langcodes.Language.get(langtext)
# st.write(f"{lang} is the BCP-47 tag.")
if "unknown" in lang.display_name().lower():
st.write(f"Attempting to lookup the code directly gives us '{lang.display_name()}', attempting to search for it as a natural language string.")
lang = None
except langcodes.LanguageTagError as e:
st.write(f"Could not lookup code directly, attempting to search for it as a natural language string.")
lang = None
if lang is None:
try:
found = langcodes.find(langtext)
lang = found
st.write(f"natural language search found the following BCP-47 tag: {lang}")
except LookupError as e:
st.write(f"Unable to look up language code.")
st.write(f"Try also: https://r12a.github.io/app-subtags/")
st.write(f"Try also: https://glottolog.org/glottolog?search={langtext}")
lang = None
#st.write(f"langcodes found the following tag: {type(found)}") # a Language object
if lang is not None:
display = lang.display_name()
st.write(f"Best-match BCP-47 tag for '{langtext}', according to the langcodes library: {lang}")
st.write(f"Breakdown of tag components: {lang.describe()}")
st.write(f"Display name for {lang}: {lang.display_name()}")
st.write(f"Autonym for {lang}: {lang.autonym()}")
b_variant = lang.to_alpha3(variant='B')
t_variant = lang.to_alpha3(variant='T')
st.write(f"ISO 639-3 'alpha3' code, 'terminology' variant (deprecated): {t_variant}")
st.write(f"ISO 639-3 'alpha3' code, 'bibliographic' variant (deprecated): {b_variant}")
st.write(f"If it exists, the Ethnologue entry would be at https://iso639-3.sil.org/code/{t_variant}")
broader_tags = lang.broader_tags()
st.write(f"Broader tags for this language, if any: {broader_tags}")
st.write(f"Correct, standardized, BCP-47 tag for {langtext}, according to the langcodes library: {langcodes.standardize_tag(lang)}")
st.write(f"Try also: https://r12a.github.io/app-subtags/?lookup={lang}")
st.write(f"https://glottolog.org/glottolog?search={lang} may be of interest")
|