Spaces:
Sleeping
Sleeping
Update themes-ui.py
Browse files- themes-ui.py +751 -523
themes-ui.py
CHANGED
@@ -1,545 +1,773 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
import
|
4 |
-
import
|
|
|
|
|
5 |
import sys
|
6 |
-
|
7 |
-
import
|
8 |
-
import
|
9 |
-
import
|
10 |
-
|
11 |
-
|
12 |
-
import google.protobuf.json_format
|
13 |
-
import google.api_core.exceptions
|
14 |
-
|
15 |
-
from google.ai import generativelanguage as glm
|
16 |
-
from google.generativeai import string_utils
|
17 |
-
|
18 |
-
__all__ = [
|
19 |
-
"AsyncGenerateContentResponse",
|
20 |
-
"BlockedPromptException",
|
21 |
-
"StopCandidateException",
|
22 |
-
"IncompleteIterationError",
|
23 |
-
"BrokenResponseError",
|
24 |
-
"GenerationConfigDict",
|
25 |
-
"GenerationConfigType",
|
26 |
-
"GenerationConfig",
|
27 |
-
"GenerateContentResponse",
|
28 |
-
]
|
29 |
-
|
30 |
-
if sys.version_info < (3, 10):
|
31 |
-
|
32 |
-
def aiter(obj):
|
33 |
-
return obj.__aiter__()
|
34 |
-
|
35 |
-
async def anext(obj, default=None):
|
36 |
-
try:
|
37 |
-
return await obj.__anext__()
|
38 |
-
except StopAsyncIteration:
|
39 |
-
if default is not None:
|
40 |
-
return default
|
41 |
-
else:
|
42 |
-
raise
|
43 |
-
|
44 |
-
|
45 |
-
class BlockedPromptException(Exception):
|
46 |
-
pass
|
47 |
-
|
48 |
-
|
49 |
-
class StopCandidateException(Exception):
|
50 |
-
pass
|
51 |
-
|
52 |
-
|
53 |
-
class IncompleteIterationError(Exception):
|
54 |
-
pass
|
55 |
-
|
56 |
-
|
57 |
-
class BrokenResponseError(Exception):
|
58 |
-
pass
|
59 |
-
|
60 |
-
|
61 |
-
class GenerationConfigDict(TypedDict):
|
62 |
-
# TODO(markdaoust): Python 3.11+ use `NotRequired`, ref: https://peps.python.org/pep-0655/
|
63 |
-
candidate_count: int
|
64 |
-
stop_sequences: Iterable[str]
|
65 |
-
max_output_tokens: int
|
66 |
-
temperature: float
|
67 |
-
|
68 |
-
|
69 |
-
@dataclasses.dataclass
|
70 |
-
class GenerationConfig:
|
71 |
-
"""A simple dataclass used to configure the generation parameters of `GenerativeModel.generate_content`.
|
72 |
-
|
73 |
-
Attributes:
|
74 |
-
candidate_count:
|
75 |
-
Number of generated responses to return.
|
76 |
-
stop_sequences:
|
77 |
-
The set of character sequences (up
|
78 |
-
to 5) that will stop output generation. If
|
79 |
-
specified, the API will stop at the first
|
80 |
-
appearance of a stop sequence. The stop sequence
|
81 |
-
will not be included as part of the response.
|
82 |
-
max_output_tokens:
|
83 |
-
The maximum number of tokens to include in a
|
84 |
-
candidate.
|
85 |
-
|
86 |
-
If unset, this will default to output_token_limit specified
|
87 |
-
in the model's specification.
|
88 |
-
temperature:
|
89 |
-
Controls the randomness of the output. Note: The
|
90 |
-
|
91 |
-
default value varies by model, see the `Model.temperature`
|
92 |
-
attribute of the `Model` returned the `genai.get_model`
|
93 |
-
function.
|
94 |
-
|
95 |
-
Values can range from [0.0,1.0], inclusive. A value closer
|
96 |
-
to 1.0 will produce responses that are more varied and
|
97 |
-
creative, while a value closer to 0.0 will typically result
|
98 |
-
in more straightforward responses from the model.
|
99 |
-
top_p:
|
100 |
-
Optional. The maximum cumulative probability of tokens to
|
101 |
-
consider when sampling.
|
102 |
-
|
103 |
-
The model uses combined Top-k and nucleus sampling.
|
104 |
-
|
105 |
-
Tokens are sorted based on their assigned probabilities so
|
106 |
-
that only the most likely tokens are considered. Top-k
|
107 |
-
sampling directly limits the maximum number of tokens to
|
108 |
-
consider, while Nucleus sampling limits number of tokens
|
109 |
-
based on the cumulative probability.
|
110 |
-
|
111 |
-
Note: The default value varies by model, see the
|
112 |
-
`Model.top_p` attribute of the `Model` returned the
|
113 |
-
`genai.get_model` function.
|
114 |
-
|
115 |
-
top_k (int):
|
116 |
-
Optional. The maximum number of tokens to consider when
|
117 |
-
sampling.
|
118 |
-
|
119 |
-
The model uses combined Top-k and nucleus sampling.
|
120 |
-
|
121 |
-
Top-k sampling considers the set of `top_k` most probable
|
122 |
-
tokens. Defaults to 40.
|
123 |
-
|
124 |
-
Note: The default value varies by model, see the
|
125 |
-
`Model.top_k` attribute of the `Model` returned the
|
126 |
-
`genai.get_model` function.
|
127 |
-
"""
|
128 |
-
|
129 |
-
candidate_count: int | None = None
|
130 |
-
stop_sequences: Iterable[str] | None = None
|
131 |
-
max_output_tokens: int | None = None
|
132 |
-
temperature: float | None = None
|
133 |
-
top_p: float | None = None
|
134 |
-
top_k: int | None = None
|
135 |
-
|
136 |
-
|
137 |
-
GenerationConfigType = Union[glm.GenerationConfig, GenerationConfigDict, GenerationConfig]
|
138 |
-
|
139 |
-
|
140 |
-
def to_generation_config_dict(generation_config: GenerationConfigType):
|
141 |
-
if generation_config is None:
|
142 |
-
return {}
|
143 |
-
elif isinstance(generation_config, glm.GenerationConfig):
|
144 |
-
return type(generation_config).to_dict(generation_config) # pytype: disable=attribute-error
|
145 |
-
elif isinstance(generation_config, GenerationConfig):
|
146 |
-
generation_config = dataclasses.asdict(generation_config)
|
147 |
-
return {key: value for key, value in generation_config.items() if value is not None}
|
148 |
-
elif hasattr(generation_config, "keys"):
|
149 |
-
return dict(generation_config)
|
150 |
-
else:
|
151 |
-
raise TypeError(
|
152 |
-
"Did not understand `generation_config`, expected a `dict` or"
|
153 |
-
f" `GenerationConfig`\nGot type: {type(generation_config)}\nValue:"
|
154 |
-
f" {generation_config}"
|
155 |
-
)
|
156 |
-
|
157 |
-
|
158 |
-
def _join_citation_metadatas(
|
159 |
-
citation_metadatas: Iterable[glm.CitationMetadata],
|
160 |
-
):
|
161 |
-
citation_metadatas = list(citation_metadatas)
|
162 |
-
return citation_metadatas[-1]
|
163 |
|
164 |
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
for rating in safety_ratings_list:
|
173 |
-
ratings[rating.category] = rating.probability
|
174 |
-
blocked[rating.category].append(rating.blocked)
|
175 |
-
|
176 |
-
blocked = {category: any(blocked) for category, blocked in blocked.items()}
|
177 |
-
|
178 |
-
safety_list = []
|
179 |
-
for (category, probability), blocked in zip(ratings.items(), blocked.values()):
|
180 |
-
safety_list.append(
|
181 |
-
glm.SafetyRating(category=category, probability=probability, blocked=blocked)
|
182 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
else:
|
193 |
-
role = ""
|
194 |
|
195 |
-
|
196 |
-
|
197 |
-
parts.extend(content.parts)
|
198 |
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
212 |
|
213 |
-
|
214 |
-
|
215 |
-
|
|
|
|
|
|
|
|
|
216 |
)
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
index = candidates[0].index # These should all be the same.
|
223 |
-
|
224 |
-
return glm.Candidate(
|
225 |
-
index=index,
|
226 |
-
content=_join_contents([c.content for c in candidates]),
|
227 |
-
finish_reason=candidates[-1].finish_reason,
|
228 |
-
safety_ratings=_join_safety_ratings_lists([c.safety_ratings for c in candidates]),
|
229 |
-
citation_metadata=_join_citation_metadatas([c.citation_metadata for c in candidates]),
|
230 |
)
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
for candidate in candidate_list:
|
239 |
-
candidates[candidate.index].append(candidate)
|
240 |
-
|
241 |
-
new_candidates = []
|
242 |
-
for index, candidate_parts in sorted(candidates.items()):
|
243 |
-
new_candidates.append(_join_candidates(candidate_parts))
|
244 |
-
|
245 |
-
return new_candidates
|
246 |
-
|
247 |
-
|
248 |
-
def _join_prompt_feedbacks(
|
249 |
-
prompt_feedbacks: Iterable[glm.GenerateContentResponse.PromptFeedback],
|
250 |
-
):
|
251 |
-
# Always return the first prompt feedback.
|
252 |
-
return next(iter(prompt_feedbacks))
|
253 |
-
|
254 |
-
|
255 |
-
def _join_chunks(chunks: Iterable[glm.GenerateContentResponse]):
|
256 |
-
return glm.GenerateContentResponse(
|
257 |
-
candidates=_join_candidate_lists(c.candidates for c in chunks),
|
258 |
-
prompt_feedback=_join_prompt_feedbacks(c.prompt_feedback for c in chunks),
|
259 |
)
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
class BaseGenerateContentResponse:
|
268 |
-
def __init__(
|
269 |
-
self,
|
270 |
-
done: bool,
|
271 |
-
iterator: (
|
272 |
-
None
|
273 |
-
| Iterable[glm.GenerateContentResponse]
|
274 |
-
| AsyncIterable[glm.GenerateContentResponse]
|
275 |
-
),
|
276 |
-
result: glm.GenerateContentResponse,
|
277 |
-
chunks: Iterable[glm.GenerateContentResponse] | None = None,
|
278 |
-
):
|
279 |
-
self._done = done
|
280 |
-
self._iterator = iterator
|
281 |
-
self._result = result
|
282 |
-
if chunks is None:
|
283 |
-
self._chunks = [result]
|
284 |
-
else:
|
285 |
-
self._chunks = list(chunks)
|
286 |
-
if result.prompt_feedback.block_reason:
|
287 |
-
self._error = BlockedPromptException(result)
|
288 |
-
else:
|
289 |
-
self._error = None
|
290 |
-
|
291 |
-
@property
|
292 |
-
def candidates(self):
|
293 |
-
"""The list of candidate responses.
|
294 |
-
|
295 |
-
Raises:
|
296 |
-
IncompleteIterationError: With `stream=True` if iteration over the stream was not completed.
|
297 |
-
"""
|
298 |
-
if not self._done:
|
299 |
-
raise IncompleteIterationError(_INCOMPLETE_ITERATION_MESSAGE)
|
300 |
-
return self._result.candidates
|
301 |
-
|
302 |
-
@property
|
303 |
-
def parts(self):
|
304 |
-
"""A quick accessor equivalent to `self.candidates[0].parts`
|
305 |
-
|
306 |
-
Raises:
|
307 |
-
ValueError: If the candidate list does not contain exactly one candidate.
|
308 |
-
"""
|
309 |
-
candidates = self.candidates
|
310 |
-
if not candidates:
|
311 |
-
raise ValueError(
|
312 |
-
"The `response.parts` quick accessor only works for a single candidate, "
|
313 |
-
"but none were returned. Check the `response.prompt_feedback` to see if the prompt was blocked."
|
314 |
-
)
|
315 |
-
if len(candidates) > 1:
|
316 |
-
raise ValueError(
|
317 |
-
"The `response.parts` quick accessor only works with a "
|
318 |
-
"single candidate. With multiple candidates use "
|
319 |
-
"result.candidates[index].text"
|
320 |
-
)
|
321 |
-
parts = candidates[0].content.parts
|
322 |
-
return parts
|
323 |
-
|
324 |
-
@property
|
325 |
-
def text(self):
|
326 |
-
"""A quick accessor equivalent to `self.candidates[0].parts[0].text`
|
327 |
-
|
328 |
-
Raises:
|
329 |
-
ValueError: If the candidate list or parts list does not contain exactly one entry.
|
330 |
-
"""
|
331 |
-
parts = self.parts
|
332 |
-
if not parts:
|
333 |
-
raise ValueError(
|
334 |
-
"The `response.text` quick accessor only works when the response contains a valid "
|
335 |
-
"`Part`, but none was returned. Check the `candidate.safety_ratings` to see if the "
|
336 |
-
"response was blocked."
|
337 |
-
)
|
338 |
-
|
339 |
-
return parts[0].text
|
340 |
-
|
341 |
-
@property
|
342 |
-
def prompt_feedback(self):
|
343 |
-
return self._result.prompt_feedback
|
344 |
-
|
345 |
-
def __str__(self) -> str:
|
346 |
-
if self._done:
|
347 |
-
_iterator = "None"
|
348 |
-
else:
|
349 |
-
_iterator = f"<{self._iterator.__class__.__name__}>"
|
350 |
-
|
351 |
-
_result = f"glm.GenerateContentResponse({type(self._result).to_dict(self._result)})"
|
352 |
-
|
353 |
-
if self._error:
|
354 |
-
_error = f",\nerror=<{self._error.__class__.__name__}> {self._error}"
|
355 |
-
else:
|
356 |
-
_error = ""
|
357 |
-
|
358 |
-
return (
|
359 |
-
textwrap.dedent(
|
360 |
-
f"""\
|
361 |
-
response:
|
362 |
-
{type(self).__name__}(
|
363 |
-
done={self._done},
|
364 |
-
iterator={_iterator},
|
365 |
-
result={_result},
|
366 |
-
)"""
|
367 |
-
)
|
368 |
-
+ _error
|
369 |
-
)
|
370 |
-
|
371 |
-
__repr__ = __str__
|
372 |
-
|
373 |
-
|
374 |
-
@contextlib.contextmanager
|
375 |
-
def rewrite_stream_error():
|
376 |
try:
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
)
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
for chunk in response:
|
401 |
-
print(chunk.text)
|
402 |
-
```
|
403 |
-
|
404 |
-
`GenerateContentResponse.prompt_feedback` is available immediately but
|
405 |
-
`GenerateContentResponse.candidates`, and all the attributes derived from them (`.text`, `.parts`),
|
406 |
-
are only available after the iteration is complete.
|
407 |
"""
|
408 |
-
|
409 |
-
|
410 |
-
"""
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
else:
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
)
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
502 |
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
import json
|
6 |
+
import re
|
7 |
import sys
|
8 |
+
import subprocess
|
9 |
+
from datetime import datetime, date, timedelta
|
10 |
+
from urllib.parse import urlencode
|
11 |
+
from typing import Dict, List, Optional
|
12 |
+
import google.generativeai as genai
|
13 |
+
import plotly.express as px
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
+
# Handle required package installations and imports
|
17 |
+
def install_package(package_name: str) -> None:
|
18 |
+
try:
|
19 |
+
subprocess.check_call(
|
20 |
+
[sys.executable, "-m", "pip", "install", package_name],
|
21 |
+
stdout=subprocess.DEVNULL,
|
22 |
+
stderr=subprocess.DEVNULL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
)
|
24 |
+
except subprocess.CalledProcessError as e:
|
25 |
+
st.error(f"Failed to install {package_name}: {str(e)}")
|
26 |
+
sys.exit(1)
|
27 |
+
try:
|
28 |
+
import google.generativeai as genai
|
29 |
+
except ImportError:
|
30 |
+
install_package("google-generativeai")
|
31 |
+
import google.generativeai as genai
|
32 |
+
|
33 |
+
# Import Plotly
|
34 |
+
try:
|
35 |
+
import plotly.express as px
|
36 |
+
except ImportError:
|
37 |
+
install_package("plotly")
|
38 |
+
import plotly.express as px
|
39 |
+
# Set up the application title and layout
|
40 |
+
st.set_page_config(
|
41 |
+
page_title="Steam App Reviews - Themes Analysis",
|
42 |
+
page_icon="🎮",
|
43 |
+
layout="wide",
|
44 |
+
initial_sidebar_state="expanded"
|
45 |
+
)
|
46 |
|
47 |
+
# Custom CSS to improve UI
|
48 |
+
st.markdown("""
|
49 |
+
<style>
|
50 |
+
.main-header {
|
51 |
+
font-size: 2.5rem !important;
|
52 |
+
color: #1e88e5;
|
53 |
+
}
|
54 |
+
.theme-card {
|
55 |
+
background-color: #f5f5f5;
|
56 |
+
border-radius: 10px;
|
57 |
+
padding: 1.5rem;
|
58 |
+
margin-bottom: 1rem;
|
59 |
+
border-left: 5px solid #1e88e5;
|
60 |
+
}
|
61 |
+
.theme-title {
|
62 |
+
font-size: 1.2rem;
|
63 |
+
font-weight: bold;
|
64 |
+
color: #1e88e5;
|
65 |
+
}
|
66 |
+
.theme-desc {
|
67 |
+
color: #424242;
|
68 |
+
margin: 0.5rem 0;
|
69 |
+
}
|
70 |
+
.theme-count {
|
71 |
+
font-size: 0.9rem;
|
72 |
+
color: #616161;
|
73 |
+
}
|
74 |
+
.sentiment-positive {
|
75 |
+
background-color: #D5EAD8;
|
76 |
+
color: #2E8B57;
|
77 |
+
padding: 3px 8px;
|
78 |
+
border-radius: 10px;
|
79 |
+
}
|
80 |
+
.sentiment-negative {
|
81 |
+
background-color: #FFE4E1;
|
82 |
+
color: #CD5C5C;
|
83 |
+
padding: 3px 8px;
|
84 |
+
border-radius: 10px;
|
85 |
+
}
|
86 |
+
.sentiment-mixed {
|
87 |
+
background-color: #FFF8DC;
|
88 |
+
color: #DAA520;
|
89 |
+
padding: 3px 8px;
|
90 |
+
border-radius: 10px;
|
91 |
+
}
|
92 |
+
.app-info {
|
93 |
+
background-color: #f0f8ff;
|
94 |
+
border-radius: 10px;
|
95 |
+
padding: 1rem;
|
96 |
+
margin-bottom: 1rem;
|
97 |
+
}
|
98 |
+
</style>
|
99 |
+
""", unsafe_allow_html=True)
|
100 |
+
|
101 |
+
# Title and description
|
102 |
+
st.markdown('<h1 class="main-header">🎮 Steam App Reviews - Themes Analysis</h1>', unsafe_allow_html=True)
|
103 |
+
st.markdown("""
|
104 |
+
This tool analyzes user reviews for Steam games to identify common themes, sentiments, and feedback patterns.
|
105 |
+
Upload your Google Gemini API key, enter a Steam App ID, select a date range, and get valuable insights from user reviews.
|
106 |
+
""")
|
107 |
+
|
108 |
+
# Initialize session state variables
|
109 |
+
if 'reviews_data' not in st.session_state:
|
110 |
+
st.session_state['reviews_data'] = None
|
111 |
+
if 'themes_df' not in st.session_state:
|
112 |
+
st.session_state['themes_df'] = None
|
113 |
+
if 'app_info' not in st.session_state:
|
114 |
+
st.session_state['app_info'] = None
|
115 |
+
|
116 |
+
# Sidebar inputs for user interaction
|
117 |
+
st.sidebar.header("User Input Parameters")
|
118 |
+
|
119 |
+
# User input for Google Gemini API key
|
120 |
+
api_key_input = st.sidebar.text_input(
|
121 |
+
"Enter your Google Gemini API Key:",
|
122 |
+
type="password",
|
123 |
+
help="Your API key will be used to access the Google Gemini API for theme extraction.",
|
124 |
+
)
|
125 |
|
126 |
+
# Initialize Google Gemini client
|
127 |
+
gemini_client = None
|
128 |
+
if api_key_input:
|
129 |
+
try:
|
130 |
+
genai.configure(api_key=api_key_input)
|
131 |
+
model = genai.GenerativeModel(model_name='gemini-1.5-pro')
|
132 |
+
gemini_client = model
|
133 |
+
st.sidebar.success("Gemini API connection established!")
|
134 |
+
except Exception as e:
|
135 |
+
st.sidebar.error(f"Error initializing Gemini API: {str(e)}")
|
136 |
+
else:
|
137 |
+
st.sidebar.warning("Please enter your Google Gemini API Key to proceed.")
|
138 |
+
|
139 |
+
# User input for App ID
|
140 |
+
appid = st.sidebar.text_input(
|
141 |
+
"Enter the Steam App ID:",
|
142 |
+
value="1782120",
|
143 |
+
help="Find the App ID in the URL of the game's Steam page."
|
144 |
+
)
|
145 |
|
146 |
+
# Validate App ID
|
147 |
+
def is_valid_app_id(app_id: str) -> bool:
|
148 |
+
if not app_id or not app_id.isdigit():
|
149 |
+
return False
|
150 |
+
return True
|
|
|
|
|
151 |
|
152 |
+
if not is_valid_app_id(appid):
|
153 |
+
st.sidebar.error("Please enter a valid Steam App ID (numeric only).")
|
|
|
154 |
|
155 |
+
# Date input for selecting a range
|
156 |
+
st.sidebar.write("Select the date range for reviews:")
|
157 |
+
start_date = st.sidebar.date_input(
|
158 |
+
"Start Date",
|
159 |
+
value=datetime.today() - timedelta(days=7)
|
160 |
+
)
|
161 |
+
end_date = st.sidebar.date_input(
|
162 |
+
"End Date",
|
163 |
+
value=datetime.today()
|
164 |
+
)
|
165 |
|
166 |
+
# Validate date range
|
167 |
+
if start_date and end_date:
|
168 |
+
today = date.today()
|
169 |
+
|
170 |
+
# Check if end date is in the future
|
171 |
+
if end_date > today:
|
172 |
+
st.sidebar.error("Error: End date cannot be in the future.")
|
173 |
+
st.stop()
|
174 |
+
|
175 |
+
# Check if start date is after end date
|
176 |
+
if start_date > end_date:
|
177 |
+
st.sidebar.error("Error: Start date must be before end date.")
|
178 |
+
st.stop()
|
179 |
+
|
180 |
+
# Check if date range is too large
|
181 |
+
date_range = (end_date - start_date).days
|
182 |
+
if date_range > 365:
|
183 |
+
st.sidebar.warning("Warning: Large date ranges may result in incomplete data due to Steam API limitations.")
|
184 |
+
elif date_range < 0:
|
185 |
+
st.sidebar.error("Error: Invalid date range selected.")
|
186 |
+
st.stop()
|
187 |
+
|
188 |
+
# Maximum reviews to fetch
|
189 |
+
max_reviews = st.sidebar.slider(
|
190 |
+
"Maximum reviews to fetch:",
|
191 |
+
min_value=50,
|
192 |
+
max_value=500,
|
193 |
+
value=200,
|
194 |
+
step=50,
|
195 |
+
help="Higher values may take longer to process."
|
196 |
+
)
|
197 |
|
198 |
+
# Language filter
|
199 |
+
language_filter = st.sidebar.multiselect(
|
200 |
+
"Filter by languages:",
|
201 |
+
options=["english", "spanish", "french", "german", "italian", "russian", "all"],
|
202 |
+
default=["english"],
|
203 |
+
help="Select 'all' to include all languages or choose specific languages."
|
204 |
+
)
|
205 |
|
206 |
+
# Advanced options
|
207 |
+
advanced_options = st.sidebar.expander("Advanced Analysis Options")
|
208 |
+
with advanced_options:
|
209 |
+
include_sentiment = st.checkbox(
|
210 |
+
"Include sentiment analysis",
|
211 |
+
value=True,
|
212 |
+
help="Analyze the sentiment of each review and theme."
|
213 |
)
|
214 |
+
|
215 |
+
cluster_similar_themes = st.checkbox(
|
216 |
+
"Cluster similar themes",
|
217 |
+
value=True,
|
218 |
+
help="Group themes that are semantically similar."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
)
|
220 |
+
|
221 |
+
min_mention_threshold = st.slider(
|
222 |
+
"Minimum reviews per theme:",
|
223 |
+
min_value=1,
|
224 |
+
max_value=10,
|
225 |
+
value=2,
|
226 |
+
help="Only show themes mentioned in at least this many reviews."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
)
|
228 |
|
229 |
+
# Function to fetch app information
|
230 |
+
@st.cache_data(ttl=3600, show_spinner=False)
|
231 |
+
def get_app_info(app_id: str) -> Optional[Dict]:
|
232 |
+
"""
|
233 |
+
Fetches information about a Steam game using its App ID.
|
234 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
try:
|
236 |
+
url = f"https://store.steampowered.com/api/appdetails?appids={app_id}"
|
237 |
+
response = requests.get(url, timeout=10)
|
238 |
+
response.raise_for_status()
|
239 |
+
data = response.json()
|
240 |
+
|
241 |
+
if data.get(app_id, {}).get('success', False):
|
242 |
+
app_data = data[app_id]['data']
|
243 |
+
return {
|
244 |
+
'name': app_data.get('name', 'Unknown Game'),
|
245 |
+
'header_image': app_data.get('header_image', ''),
|
246 |
+
'release_date': app_data.get('release_date', {}).get('date', 'Unknown'),
|
247 |
+
'developers': app_data.get('developers', ['Unknown']),
|
248 |
+
'publishers': app_data.get('publishers', ['Unknown'])
|
249 |
+
}
|
250 |
+
return None
|
251 |
+
except Exception as e:
|
252 |
+
st.sidebar.error(f"Error fetching app info: {str(e)}")
|
253 |
+
return None
|
254 |
+
|
255 |
+
# Function to fetch reviews
|
256 |
+
@st.cache_data(ttl=1800, show_spinner=False)
|
257 |
+
def fetch_reviews(app_id: str, start_timestamp: int, end_timestamp: int,
|
258 |
+
max_reviews: int = 1000, language_filter: List[str] = ["english"]) -> Optional[List]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
"""
|
260 |
+
Fetches Steam reviews for the specified app within the given date range.
|
261 |
+
Implements batch processing and caching for efficient handling of large volumes.
|
262 |
+
"""
|
263 |
+
# Define the base API URL
|
264 |
+
base_url = f"https://store.steampowered.com/appreviews/{app_id}?json=1"
|
265 |
+
|
266 |
+
# Normalize language filter and handle 'all' case
|
267 |
+
normalized_language_filter = [lang.lower() for lang in language_filter]
|
268 |
+
use_all_languages = "all" in normalized_language_filter
|
269 |
+
|
270 |
+
# Calculate day range dynamically based on start and end timestamps
|
271 |
+
day_range = min(365, (end_timestamp - start_timestamp) // 86400 + 1)
|
272 |
+
|
273 |
+
# Define initial API parameters with optimized batch size
|
274 |
+
params = {
|
275 |
+
"filter": "updated", # Use 'updated' to get all reviews in date range
|
276 |
+
"language": "all" if use_all_languages else ",".join(normalized_language_filter),
|
277 |
+
"day_range": str(day_range),
|
278 |
+
"review_type": "all",
|
279 |
+
"purchase_type": "all",
|
280 |
+
"num_per_page": "100", # Maximum allowed by Steam API
|
281 |
+
"cursor": "*",
|
282 |
+
"filter_offtopic_activity": 0,
|
283 |
+
"start_date": start_timestamp,
|
284 |
+
"end_date": end_timestamp
|
285 |
+
}
|
286 |
+
|
287 |
+
# Initialize cache for review batches
|
288 |
+
if 'review_cache' not in st.session_state:
|
289 |
+
st.session_state.review_cache = {}
|
290 |
+
cache_key = f"{app_id}_{start_timestamp}_{end_timestamp}_{language_filter}"
|
291 |
+
|
292 |
+
# Check cache first
|
293 |
+
if cache_key in st.session_state.review_cache:
|
294 |
+
cached_reviews = st.session_state.review_cache[cache_key]
|
295 |
+
if len(cached_reviews) >= max_reviews:
|
296 |
+
return cached_reviews[:max_reviews]
|
297 |
+
|
298 |
+
reviews_list = []
|
299 |
+
request_count = 0
|
300 |
+
max_requests = 100 # Increased limit for larger datasets
|
301 |
+
retry_attempts = 3 # Number of retry attempts for failed requests
|
302 |
+
batch_size = 100 # Size of each batch
|
303 |
+
|
304 |
+
progress_bar = st.progress(0)
|
305 |
+
status_text = st.empty()
|
306 |
+
|
307 |
+
# Create a container for batch progress
|
308 |
+
batch_container = st.empty()
|
309 |
+
|
310 |
+
while True:
|
311 |
+
# URL encode the cursor parameter
|
312 |
+
params_encoded = params.copy()
|
313 |
+
params_encoded["cursor"] = params["cursor"].replace("+", "%2B")
|
314 |
+
|
315 |
+
# Construct the full URL with parameters
|
316 |
+
url = base_url + "&" + urlencode(params_encoded)
|
317 |
+
|
318 |
+
try:
|
319 |
+
for attempt in range(retry_attempts):
|
320 |
+
response = requests.get(url, timeout=15)
|
321 |
+
response.raise_for_status()
|
322 |
+
data = response.json()
|
323 |
+
|
324 |
+
# Check if we have any reviews
|
325 |
+
reviews = data.get('reviews')
|
326 |
+
if not reviews:
|
327 |
+
status_text.warning("No reviews found for the specified date range and filters.")
|
328 |
+
return []
|
329 |
+
|
330 |
+
# Process reviews
|
331 |
+
for review in reviews:
|
332 |
+
timestamp = review.get("timestamp_created", 0)
|
333 |
+
review_language = review.get("language", "").lower()
|
334 |
+
|
335 |
+
# Validate timestamp is within range
|
336 |
+
is_in_timerange = start_timestamp <= timestamp <= end_timestamp
|
337 |
+
|
338 |
+
# Check language filter
|
339 |
+
is_valid_language = "all" in language_filter or review_language in [lang.lower() for lang in language_filter]
|
340 |
+
|
341 |
+
if is_in_timerange and is_valid_language:
|
342 |
+
reviews_list.append(review)
|
343 |
+
|
344 |
+
# Update progress
|
345 |
+
progress = min(len(reviews_list) / max_reviews * 100, 100)
|
346 |
+
progress_bar.progress(int(progress))
|
347 |
+
status_text.text(f"Fetched {len(reviews_list)} reviews...")
|
348 |
+
|
349 |
+
# Check if we've reached max reviews or earlier timestamp
|
350 |
+
if len(reviews_list) >= max_reviews:
|
351 |
+
break
|
352 |
+
if any(r.get("timestamp_created", 0) < start_timestamp for r in reviews):
|
353 |
+
break
|
354 |
+
|
355 |
+
# Update cursor for next batch
|
356 |
+
new_cursor = data.get("cursor")
|
357 |
+
if new_cursor is None or params["cursor"] == new_cursor:
|
358 |
+
break
|
359 |
+
params["cursor"] = new_cursor
|
360 |
+
|
361 |
+
# Handle rate limiting
|
362 |
+
if 'X-Rate-Limit-Remaining' in response.headers:
|
363 |
+
remaining_calls = int(response.headers['X-Rate-Limit-Remaining'])
|
364 |
+
time.sleep(0.5 if remaining_calls < 10 else 0.2)
|
365 |
else:
|
366 |
+
time.sleep(0.2)
|
367 |
+
|
368 |
+
# Update batch progress
|
369 |
+
batch_container.text(f"Processing batch {request_count + 1} of {max_requests} (max)")
|
370 |
+
|
371 |
+
# Check request limits
|
372 |
+
request_count += 1
|
373 |
+
if request_count >= max_requests:
|
374 |
+
status_text.warning("Reached maximum number of requests. Some reviews may not be fetched.")
|
375 |
+
break
|
376 |
+
|
377 |
+
break # Success - exit retry loop
|
378 |
+
|
379 |
+
except requests.exceptions.RequestException as e:
|
380 |
+
status_text.error(f"Steam API Error: {str(e)}")
|
381 |
+
if attempt < retry_attempts - 1:
|
382 |
+
time.sleep(1) # Wait before retrying
|
383 |
+
continue
|
384 |
+
return None
|
385 |
+
|
386 |
+
# Clean up progress indicators
|
387 |
+
progress_bar.empty()
|
388 |
+
status_text.empty()
|
389 |
+
batch_container.empty()
|
390 |
+
|
391 |
+
# Cache and return results
|
392 |
+
st.session_state.review_cache[cache_key] = reviews_list
|
393 |
+
return reviews_list
|
394 |
+
# Function to extract themes using Google Gemini 1.5 Pro
|
395 |
+
def extract_themes(df: pd.DataFrame,
|
396 |
+
include_sentiment: bool = True,
|
397 |
+
cluster_similar_themes: bool = True,
|
398 |
+
min_mention_threshold: int = 2) -> Optional[pd.DataFrame]:
|
399 |
+
"""
|
400 |
+
Uses Google Gemini 1.5 Pro to identify the most common themes in reviews.
|
401 |
+
Implements batched processing and caching for large datasets.
|
402 |
+
"""
|
403 |
+
if len(df) == 0:
|
404 |
+
st.error("No reviews to analyze.")
|
405 |
+
return None
|
406 |
+
|
407 |
+
# Get counts of positive and negative reviews (if available)
|
408 |
+
positive_count = 0
|
409 |
+
negative_count = 0
|
410 |
+
|
411 |
+
if "Recommended" in df.columns:
|
412 |
+
positive_count = df["Recommended"].sum()
|
413 |
+
negative_count = len(df) - positive_count
|
414 |
+
|
415 |
+
# Initialize theme cache
|
416 |
+
if 'theme_cache' not in st.session_state:
|
417 |
+
st.session_state.theme_cache = {}
|
418 |
+
|
419 |
+
# Calculate cache key based on review content hash
|
420 |
+
cache_key = hash(tuple(sorted(df['Review'].values)))
|
421 |
+
|
422 |
+
# Check cache first
|
423 |
+
if cache_key in st.session_state.theme_cache:
|
424 |
+
return st.session_state.theme_cache[cache_key]
|
425 |
+
|
426 |
+
# Process reviews in batches to handle large datasets
|
427 |
+
batch_size = 200 # Optimal batch size for Gemini API
|
428 |
+
total_batches = (len(df) + batch_size - 1) // batch_size
|
429 |
+
|
430 |
+
all_themes = []
|
431 |
+
progress_bar = st.progress(0)
|
432 |
+
batch_status = st.empty()
|
433 |
+
|
434 |
+
for batch_idx in range(total_batches):
|
435 |
+
start_idx = batch_idx * batch_size
|
436 |
+
end_idx = min(start_idx + batch_size, len(df))
|
437 |
+
df_batch = df.iloc[start_idx:end_idx]
|
438 |
+
|
439 |
+
# Combine reviews into a single string with IDs
|
440 |
+
reviews_text = "\n\n".join([
|
441 |
+
f"Review ID: {row['Review ID']}\nReview Text: {row['Review']}"
|
442 |
+
for _, row in df.iterrows()
|
443 |
+
])
|
444 |
+
|
445 |
+
# Prepare the prompt
|
446 |
+
sentiment_instruction = "For each theme, analyze the sentiment (Positive, Negative, or Mixed)." if include_sentiment else ""
|
447 |
+
clustering_instruction = "Cluster similar themes together." if cluster_similar_themes else ""
|
448 |
+
|
449 |
+
# Fix the JSON template structure
|
450 |
+
sentiment_field = '"Sentiment": "Positive/Negative/Mixed",' if include_sentiment else ""
|
451 |
+
|
452 |
+
prompt = f"""
|
453 |
+
Analyze these {len(df)} user reviews for a game with {positive_count} positive and {negative_count} negative reviews.
|
454 |
+
|
455 |
+
Identify significant themes. {clustering_instruction}
|
456 |
+
For each theme:
|
457 |
+
1. Provide a concise, specific name
|
458 |
+
2. Write a detailed description summarizing user feedback
|
459 |
+
3. List the Review IDs where the theme is mentioned
|
460 |
+
4. {sentiment_instruction}
|
461 |
+
|
462 |
+
Only include themes mentioned in at least {min_mention_threshold} different reviews.
|
463 |
+
|
464 |
+
Provide the output as a JSON array with the following structure:
|
465 |
+
[
|
466 |
+
{{
|
467 |
+
"Theme": "theme_name",
|
468 |
+
"Description": "detailed_description",
|
469 |
+
"Review IDs": ["id1", "id2", ...],
|
470 |
+
{sentiment_field}
|
471 |
+
}},
|
472 |
+
...
|
473 |
+
]
|
474 |
+
|
475 |
+
Reviews:
|
476 |
+
{reviews_text}
|
477 |
+
"""
|
478 |
+
|
479 |
+
# Call Google Gemini 1.5 Pro
|
480 |
+
try:
|
481 |
+
with st.spinner("Analyzing themes with Google Gemini 1.5 Pro..."):
|
482 |
+
response = model.generate_content(prompt)
|
483 |
+
|
484 |
+
# Extract text from the response
|
485 |
+
if hasattr(response, 'text'):
|
486 |
+
response_text = response.text
|
487 |
+
elif hasattr(response, 'parts') and response.parts:
|
488 |
+
response_text = response.parts[0].text
|
489 |
+
else:
|
490 |
+
response_text = str(response)
|
491 |
+
|
492 |
+
# Clean and parse the response text
|
493 |
+
# First try to extract JSON from code blocks
|
494 |
+
json_pattern = r'```(?:json)?(.*?)```'
|
495 |
+
json_matches = re.findall(json_pattern, response_text, re.DOTALL)
|
496 |
+
|
497 |
+
if json_matches:
|
498 |
+
# Use the first JSON block found
|
499 |
+
json_str = json_matches[0].strip()
|
500 |
+
else:
|
501 |
+
# If no code blocks, try to use the entire response as JSON
|
502 |
+
# Remove any markdown formatting or extra whitespace
|
503 |
+
json_str = response_text.strip()
|
504 |
+
|
505 |
+
# Parse the JSON output
|
506 |
+
themes_data = json.loads(json_str)
|
507 |
+
|
508 |
+
# Convert to DataFrame and add count column
|
509 |
+
themes_df = pd.DataFrame(themes_data)
|
510 |
+
themes_df["Count"] = themes_df["Review IDs"].apply(len)
|
511 |
+
|
512 |
+
# Sort themes by count (descending)
|
513 |
+
themes_df = themes_df.sort_values("Count", ascending=False).reset_index(drop=True)
|
514 |
+
|
515 |
+
return themes_df
|
516 |
+
|
517 |
+
except Exception as e:
|
518 |
+
st.error(f"Error extracting themes: {str(e)}")
|
519 |
+
st.error("Response from Gemini API:")
|
520 |
+
if 'response' in locals():
|
521 |
+
try:
|
522 |
+
if hasattr(response, 'text'):
|
523 |
+
error_text = response.text
|
524 |
+
elif hasattr(response, 'parts') and response.parts:
|
525 |
+
error_text = response.parts[0].text
|
526 |
+
else:
|
527 |
+
error_text = str(response)
|
528 |
+
st.error(error_text)
|
529 |
+
except Exception as e:
|
530 |
+
st.error(f"Error displaying response: {str(e)}")
|
531 |
+
return None
|
532 |
+
|
533 |
+
# Function to create visualizations
|
534 |
+
def create_visualizations(themes_df: pd.DataFrame, reviews_df: pd.DataFrame):
|
535 |
+
"""
|
536 |
+
Creates visualizations for the theme analysis.
|
537 |
+
"""
|
538 |
+
col1, col2 = st.columns(2)
|
539 |
+
|
540 |
+
# Theme distribution chart
|
541 |
+
with col1:
|
542 |
+
theme_counts = themes_df[["Theme", "Count"]]
|
543 |
+
fig = px.bar(
|
544 |
+
theme_counts,
|
545 |
+
x="Count", y="Theme", orientation="h",
|
546 |
+
title="Theme Distribution",
|
547 |
+
|
548 |
)
|
549 |
+
fig.update_layout(height=400)
|
550 |
+
st.plotly_chart(fig, use_container_width=True)
|
551 |
+
|
552 |
+
# Sentiment analysis chart (if available)
|
553 |
+
with col2:
|
554 |
+
if "Sentiment" in themes_df.columns:
|
555 |
+
sentiment_counts = themes_df["Sentiment"].value_counts().reset_index()
|
556 |
+
sentiment_counts.columns = ["Sentiment", "Count"]
|
557 |
+
fig = px.pie(
|
558 |
+
sentiment_counts,
|
559 |
+
values="Count", names="Sentiment",
|
560 |
+
title="Theme Sentiment Distribution",
|
561 |
+
color="Sentiment",
|
562 |
+
color_discrete_map={"Positive": "#2E8B57", "Negative": "#CD5C5C", "Mixed": "#DAA520"},
|
563 |
+
)
|
564 |
+
fig.update_layout(height=400)
|
565 |
+
st.plotly_chart(fig, use_container_width=True)
|
566 |
+
|
567 |
+
# Review timeline (if timestamp available)
|
568 |
+
if "Timestamp" in reviews_df.columns:
|
569 |
+
# Convert timestamp to datetime
|
570 |
+
reviews_df["Date"] = pd.to_datetime(reviews_df["Timestamp"], unit='s')
|
571 |
+
|
572 |
+
# Group by date and count
|
573 |
+
reviews_by_date = reviews_df.groupby(reviews_df["Date"].dt.date).size().reset_index()
|
574 |
+
reviews_by_date.columns = ["Date", "Count"]
|
575 |
+
|
576 |
+
# Create timeline chart
|
577 |
+
fig = px.line(
|
578 |
+
reviews_by_date,
|
579 |
+
x="Date", y="Count",
|
580 |
+
title="Reviews Timeline",
|
581 |
+
markers=True
|
582 |
)
|
583 |
+
st.plotly_chart(fig, use_container_width=True)
|
584 |
+
|
585 |
+
# Validate inputs before processing
|
586 |
+
if start_date > end_date:
|
587 |
+
st.error("Error: End date must fall after start date.")
|
588 |
+
elif not api_key_input:
|
589 |
+
st.info("Please input your Google Gemini API Key to proceed.")
|
590 |
+
elif not is_valid_app_id(appid):
|
591 |
+
st.error("Please enter a valid Steam App ID.")
|
592 |
+
else:
|
593 |
+
# Fetch app info
|
594 |
+
if st.session_state['app_info'] is None or st.session_state.get('current_appid') != appid:
|
595 |
+
st.session_state['app_info'] = get_app_info(appid)
|
596 |
+
st.session_state['current_appid'] = appid
|
597 |
+
|
598 |
+
# Display app info if available
|
599 |
+
if st.session_state['app_info']:
|
600 |
+
app_info = st.session_state['app_info']
|
601 |
+
col1, col2 = st.columns([1, 3])
|
602 |
+
|
603 |
+
with col1:
|
604 |
+
st.image(app_info['header_image'], width=200)
|
605 |
+
|
606 |
+
with col2:
|
607 |
+
st.markdown(f"""
|
608 |
+
<div class='app-info'>
|
609 |
+
<h2>{app_info['name']}</h2>
|
610 |
+
<p><strong>Release Date:</strong> {app_info['release_date']}</p>
|
611 |
+
<p><strong>Developers:</strong> {', '.join(app_info['developers'])}</p>
|
612 |
+
<p><strong>Publishers:</strong> {', '.join(app_info['publishers'])}</p>
|
613 |
+
</div>
|
614 |
+
""", unsafe_allow_html=True)
|
615 |
+
|
616 |
+
# Fetch reviews button
|
617 |
+
if st.button("Fetch and Analyze Reviews", type="primary"):
|
618 |
+
# Convert dates to timestamps
|
619 |
+
start_timestamp = int(time.mktime(start_date.timetuple()))
|
620 |
+
end_timestamp = int(time.mktime((end_date + timedelta(days=1)).timetuple())) - 1 # Include the entire end date
|
621 |
+
|
622 |
+
# Fetch the reviews
|
623 |
+
with st.spinner("Fetching reviews from Steam..."):
|
624 |
+
reviews_data = fetch_reviews(
|
625 |
+
appid,
|
626 |
+
start_timestamp,
|
627 |
+
end_timestamp,
|
628 |
+
max_reviews=max_reviews,
|
629 |
+
language_filter=language_filter
|
630 |
+
)
|
631 |
+
st.session_state['reviews_data'] = reviews_data
|
632 |
+
|
633 |
+
# Check if reviews were fetched
|
634 |
+
if reviews_data:
|
635 |
+
st.success(f"Fetched {len(reviews_data)} reviews from App ID {appid}.")
|
636 |
+
|
637 |
+
# Create a DataFrame from the review data
|
638 |
+
df = pd.DataFrame(
|
639 |
+
[
|
640 |
+
{
|
641 |
+
"Review ID": str(review.get("recommendationid")),
|
642 |
+
"Author SteamID": review.get("author", {}).get("steamid"),
|
643 |
+
"Language": review.get("language"),
|
644 |
+
"Review": review.get("review"),
|
645 |
+
"Recommended": review.get("voted_up", False),
|
646 |
+
"Votes Helpful": review.get("votes_up", 0),
|
647 |
+
"Timestamp": review.get("timestamp_created", 0),
|
648 |
+
"Posted On": datetime.fromtimestamp(
|
649 |
+
review.get("timestamp_created", 0)
|
650 |
+
).strftime("%Y-%m-%d %H:%M:%S"),
|
651 |
+
}
|
652 |
+
for review in reviews_data
|
653 |
+
]
|
654 |
+
)
|
655 |
|
656 |
+
# Extract themes using Google Gemini 1.5 Pro
|
657 |
+
themes_df = extract_themes(
|
658 |
+
df,
|
659 |
+
include_sentiment=include_sentiment,
|
660 |
+
cluster_similar_themes=cluster_similar_themes,
|
661 |
+
min_mention_threshold=min_mention_threshold
|
662 |
+
)
|
663 |
+
st.session_state['themes_df'] = themes_df
|
664 |
+
|
665 |
+
if themes_df is not None:
|
666 |
+
# Show summary statistics
|
667 |
+
col1, col2, col3, col4 = st.columns(4)
|
668 |
+
with col1:
|
669 |
+
st.metric("Total Reviews", len(df))
|
670 |
+
with col2:
|
671 |
+
positive_count = df["Recommended"].sum()
|
672 |
+
positive_percent = (positive_count / len(df)) * 100 if len(df) > 0 else 0
|
673 |
+
st.metric("Positive Reviews", f"{positive_count} ({positive_percent:.1f}%)")
|
674 |
+
with col3:
|
675 |
+
negative_count = len(df) - positive_count
|
676 |
+
negative_percent = (negative_count / len(df)) * 100 if len(df) > 0 else 0
|
677 |
+
st.metric("Negative Reviews", f"{negative_count} ({negative_percent:.1f}%)")
|
678 |
+
with col4:
|
679 |
+
st.metric("Themes Identified", len(themes_df))
|
680 |
+
|
681 |
+
# Create visualizations
|
682 |
+
create_visualizations(themes_df, df)
|
683 |
+
|
684 |
+
# Show themes analysis
|
685 |
+
st.markdown("## 📊 Theme Analysis")
|
686 |
+
st.dataframe(themes_df)
|
687 |
+
|
688 |
+
# Display detailed theme information
|
689 |
+
st.markdown("## 🔍 Detailed Theme Analysis")
|
690 |
+
|
691 |
+
for index, row in themes_df.iterrows():
|
692 |
+
theme = row["Theme"]
|
693 |
+
description = row["Description"]
|
694 |
+
review_ids = row["Review IDs"]
|
695 |
+
count = row["Count"]
|
696 |
+
sentiment = row.get("Sentiment", "Not analyzed")
|
697 |
+
|
698 |
+
# Create a sentiment badge with appropriate styling
|
699 |
+
sentiment_class = ""
|
700 |
+
if sentiment == "Positive":
|
701 |
+
sentiment_class = "sentiment-positive"
|
702 |
+
elif sentiment == "Negative":
|
703 |
+
sentiment_class = "sentiment-negative"
|
704 |
+
elif sentiment == "Mixed":
|
705 |
+
sentiment_class = "sentiment-mixed"
|
706 |
+
|
707 |
+
# Display theme card with enhanced formatting
|
708 |
+
sentiment_html = f'<span class="{sentiment_class}">{sentiment}</span>' if sentiment != "Not analyzed" else ""
|
709 |
+
|
710 |
+
st.markdown(f"""
|
711 |
+
<div class="theme-card">
|
712 |
+
<div class="theme-title">{theme} {sentiment_html}</div>
|
713 |
+
<p class="theme-desc">{description}</p>
|
714 |
+
<div class="theme-count">Mentioned in {count} reviews</div>
|
715 |
+
</div>
|
716 |
+
""", unsafe_allow_html=True)
|
717 |
+
|
718 |
+
with st.expander(f"View reviews mentioning '{theme}'"):
|
719 |
+
# Get the reviews that mention the theme
|
720 |
+
try:
|
721 |
+
reviews_with_theme = df[df["Review ID"].isin(review_ids)][["Review ID", "Review", "Posted On", "Recommended"]]
|
722 |
+
st.dataframe(reviews_with_theme, use_container_width=True)
|
723 |
+
except Exception as e:
|
724 |
+
st.error(f"Error displaying reviews for theme '{theme}': {str(e)}")
|
725 |
+
|
726 |
+
# Export options
|
727 |
+
st.markdown("## 📥 Export Results")
|
728 |
+
col1, col2 = st.columns(2)
|
729 |
+
|
730 |
+
with col1:
|
731 |
+
# Export reviews as CSV
|
732 |
+
reviews_csv = df.to_csv(index=False).encode('utf-8')
|
733 |
+
st.download_button(
|
734 |
+
label="Download Reviews CSV",
|
735 |
+
data=reviews_csv,
|
736 |
+
file_name=f"steam_reviews_{appid}_{start_date}_to_{end_date}.csv",
|
737 |
+
mime="text/csv"
|
738 |
+
)
|
739 |
+
|
740 |
+
with col2:
|
741 |
+
# Export themes as CSV
|
742 |
+
themes_csv = themes_df.to_csv(index=False).encode('utf-8')
|
743 |
+
st.download_button(
|
744 |
+
label="Download Themes Analysis CSV",
|
745 |
+
data=themes_csv,
|
746 |
+
file_name=f"steam_themes_{appid}_{start_date}_to_{end_date}.csv",
|
747 |
+
mime="text/csv"
|
748 |
+
)
|
749 |
+
else:
|
750 |
+
st.warning("Failed to extract themes. Please try again or adjust parameters.")
|
751 |
+
else:
|
752 |
+
st.warning("No reviews found for the specified date range and filters.")
|
753 |
+
|
754 |
+
# Display the raw reviews data if available
|
755 |
+
if st.session_state['reviews_data'] is not None:
|
756 |
+
with st.expander("View Raw Reviews Data"):
|
757 |
+
reviews_df = pd.DataFrame(
|
758 |
+
[
|
759 |
+
{
|
760 |
+
"Review ID": str(review.get("recommendationid")),
|
761 |
+
"Author SteamID": review.get("author", {}).get("steamid"),
|
762 |
+
"Language": review.get("language"),
|
763 |
+
"Review": review.get("review"),
|
764 |
+
"Recommended": review.get("voted_up", False),
|
765 |
+
"Votes Helpful": review.get("votes_up", 0),
|
766 |
+
"Posted On": datetime.fromtimestamp(
|
767 |
+
review.get("timestamp_created", 0)
|
768 |
+
).strftime("%Y-%m-%d %H:%M:%S"),
|
769 |
+
}
|
770 |
+
for review in st.session_state['reviews_data']
|
771 |
+
]
|
772 |
+
)
|
773 |
+
st.dataframe(reviews_df, use_container_width=True)
|