AIdeaText commited on
Commit
0707975
verified
1 Parent(s): 8275cdb

Update modules/text_analysis/semantic_analysis.py

Browse files
modules/text_analysis/semantic_analysis.py CHANGED
@@ -1,12 +1,67 @@
 
1
  import streamlit as st
2
  import spacy
3
  import networkx as nx
4
  import matplotlib.pyplot as plt
5
- from collections import Counter
6
  from sklearn.feature_extraction.text import TfidfVectorizer
7
  from sklearn.metrics.pairwise import cosine_similarity
8
 
9
- # ... (mant茅n las definiciones de POS_COLORS, POS_TRANSLATIONS, y ENTITY_LABELS como est谩n)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def identify_key_concepts(doc):
12
  word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])
 
1
+ #semantic_analysis.py
2
  import streamlit as st
3
  import spacy
4
  import networkx as nx
5
  import matplotlib.pyplot as plt
6
+ from collections import Counter, defaultdict
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
 
10
+ # Define colors for grammatical categories
11
+ POS_COLORS = {
12
+ 'ADJ': '#FFA07A', 'ADP': '#98FB98', 'ADV': '#87CEFA', 'AUX': '#DDA0DD',
13
+ 'CCONJ': '#F0E68C', 'DET': '#FFB6C1', 'INTJ': '#FF6347', 'NOUN': '#90EE90',
14
+ 'NUM': '#FAFAD2', 'PART': '#D3D3D3', 'PRON': '#FFA500', 'PROPN': '#20B2AA',
15
+ 'SCONJ': '#DEB887', 'SYM': '#7B68EE', 'VERB': '#FF69B4', 'X': '#A9A9A9',
16
+ }
17
+
18
+ POS_TRANSLATIONS = {
19
+ 'es': {
20
+ 'ADJ': 'Adjetivo', 'ADP': 'Preposici贸n', 'ADV': 'Adverbio', 'AUX': 'Auxiliar',
21
+ 'CCONJ': 'Conjunci贸n Coordinante', 'DET': 'Determinante', 'INTJ': 'Interjecci贸n',
22
+ 'NOUN': 'Sustantivo', 'NUM': 'N煤mero', 'PART': 'Part铆cula', 'PRON': 'Pronombre',
23
+ 'PROPN': 'Nombre Propio', 'SCONJ': 'Conjunci贸n Subordinante', 'SYM': 'S铆mbolo',
24
+ 'VERB': 'Verbo', 'X': 'Otro',
25
+ },
26
+ 'en': {
27
+ 'ADJ': 'Adjective', 'ADP': 'Preposition', 'ADV': 'Adverb', 'AUX': 'Auxiliary',
28
+ 'CCONJ': 'Coordinating Conjunction', 'DET': 'Determiner', 'INTJ': 'Interjection',
29
+ 'NOUN': 'Noun', 'NUM': 'Number', 'PART': 'Particle', 'PRON': 'Pronoun',
30
+ 'PROPN': 'Proper Noun', 'SCONJ': 'Subordinating Conjunction', 'SYM': 'Symbol',
31
+ 'VERB': 'Verb', 'X': 'Other',
32
+ },
33
+ 'fr': {
34
+ 'ADJ': 'Adjectif', 'ADP': 'Pr茅position', 'ADV': 'Adverbe', 'AUX': 'Auxiliaire',
35
+ 'CCONJ': 'Conjonction de Coordination', 'DET': 'D茅terminant', 'INTJ': 'Interjection',
36
+ 'NOUN': 'Nom', 'NUM': 'Nombre', 'PART': 'Particule', 'PRON': 'Pronom',
37
+ 'PROPN': 'Nom Propre', 'SCONJ': 'Conjonction de Subordination', 'SYM': 'Symbole',
38
+ 'VERB': 'Verbe', 'X': 'Autre',
39
+ }
40
+ }
41
+
42
+ ENTITY_LABELS = {
43
+ 'es': {
44
+ "Personas": "lightblue",
45
+ "Lugares": "lightcoral",
46
+ "Inventos": "lightgreen",
47
+ "Fechas": "lightyellow",
48
+ "Conceptos": "lightpink"
49
+ },
50
+ 'en': {
51
+ "People": "lightblue",
52
+ "Places": "lightcoral",
53
+ "Inventions": "lightgreen",
54
+ "Dates": "lightyellow",
55
+ "Concepts": "lightpink"
56
+ },
57
+ 'fr': {
58
+ "Personnes": "lightblue",
59
+ "Lieux": "lightcoral",
60
+ "Inventions": "lightgreen",
61
+ "Dates": "lightyellow",
62
+ "Concepts": "lightpink"
63
+ }
64
+ }
65
 
66
  def identify_key_concepts(doc):
67
  word_freq = Counter([token.lemma_.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'] and not token.is_stop])