noumanjavaid commited on
Commit
3cd24d7
·
verified ·
1 Parent(s): 1f717b7

Update themes-ui.py

Browse files
Files changed (1) hide show
  1. themes-ui.py +751 -523
themes-ui.py CHANGED
@@ -1,545 +1,773 @@
1
- from __future__ import annotations
2
-
3
- import collections
4
- import contextlib
 
 
5
  import sys
6
- from collections.abc import Iterable, AsyncIterable
7
- import dataclasses
8
- import itertools
9
- import textwrap
10
- from typing import TypedDict, Union
11
-
12
- import google.protobuf.json_format
13
- import google.api_core.exceptions
14
-
15
- from google.ai import generativelanguage as glm
16
- from google.generativeai import string_utils
17
-
18
- __all__ = [
19
- "AsyncGenerateContentResponse",
20
- "BlockedPromptException",
21
- "StopCandidateException",
22
- "IncompleteIterationError",
23
- "BrokenResponseError",
24
- "GenerationConfigDict",
25
- "GenerationConfigType",
26
- "GenerationConfig",
27
- "GenerateContentResponse",
28
- ]
29
-
30
- if sys.version_info < (3, 10):
31
-
32
- def aiter(obj):
33
- return obj.__aiter__()
34
-
35
- async def anext(obj, default=None):
36
- try:
37
- return await obj.__anext__()
38
- except StopAsyncIteration:
39
- if default is not None:
40
- return default
41
- else:
42
- raise
43
-
44
-
45
- class BlockedPromptException(Exception):
46
- pass
47
-
48
-
49
- class StopCandidateException(Exception):
50
- pass
51
-
52
-
53
- class IncompleteIterationError(Exception):
54
- pass
55
-
56
-
57
- class BrokenResponseError(Exception):
58
- pass
59
-
60
-
61
- class GenerationConfigDict(TypedDict):
62
- # TODO(markdaoust): Python 3.11+ use `NotRequired`, ref: https://peps.python.org/pep-0655/
63
- candidate_count: int
64
- stop_sequences: Iterable[str]
65
- max_output_tokens: int
66
- temperature: float
67
-
68
-
69
- @dataclasses.dataclass
70
- class GenerationConfig:
71
- """A simple dataclass used to configure the generation parameters of `GenerativeModel.generate_content`.
72
-
73
- Attributes:
74
- candidate_count:
75
- Number of generated responses to return.
76
- stop_sequences:
77
- The set of character sequences (up
78
- to 5) that will stop output generation. If
79
- specified, the API will stop at the first
80
- appearance of a stop sequence. The stop sequence
81
- will not be included as part of the response.
82
- max_output_tokens:
83
- The maximum number of tokens to include in a
84
- candidate.
85
-
86
- If unset, this will default to output_token_limit specified
87
- in the model's specification.
88
- temperature:
89
- Controls the randomness of the output. Note: The
90
-
91
- default value varies by model, see the `Model.temperature`
92
- attribute of the `Model` returned the `genai.get_model`
93
- function.
94
-
95
- Values can range from [0.0,1.0], inclusive. A value closer
96
- to 1.0 will produce responses that are more varied and
97
- creative, while a value closer to 0.0 will typically result
98
- in more straightforward responses from the model.
99
- top_p:
100
- Optional. The maximum cumulative probability of tokens to
101
- consider when sampling.
102
-
103
- The model uses combined Top-k and nucleus sampling.
104
-
105
- Tokens are sorted based on their assigned probabilities so
106
- that only the most likely tokens are considered. Top-k
107
- sampling directly limits the maximum number of tokens to
108
- consider, while Nucleus sampling limits number of tokens
109
- based on the cumulative probability.
110
-
111
- Note: The default value varies by model, see the
112
- `Model.top_p` attribute of the `Model` returned the
113
- `genai.get_model` function.
114
-
115
- top_k (int):
116
- Optional. The maximum number of tokens to consider when
117
- sampling.
118
-
119
- The model uses combined Top-k and nucleus sampling.
120
-
121
- Top-k sampling considers the set of `top_k` most probable
122
- tokens. Defaults to 40.
123
-
124
- Note: The default value varies by model, see the
125
- `Model.top_k` attribute of the `Model` returned the
126
- `genai.get_model` function.
127
- """
128
-
129
- candidate_count: int | None = None
130
- stop_sequences: Iterable[str] | None = None
131
- max_output_tokens: int | None = None
132
- temperature: float | None = None
133
- top_p: float | None = None
134
- top_k: int | None = None
135
-
136
-
137
- GenerationConfigType = Union[glm.GenerationConfig, GenerationConfigDict, GenerationConfig]
138
-
139
-
140
- def to_generation_config_dict(generation_config: GenerationConfigType):
141
- if generation_config is None:
142
- return {}
143
- elif isinstance(generation_config, glm.GenerationConfig):
144
- return type(generation_config).to_dict(generation_config) # pytype: disable=attribute-error
145
- elif isinstance(generation_config, GenerationConfig):
146
- generation_config = dataclasses.asdict(generation_config)
147
- return {key: value for key, value in generation_config.items() if value is not None}
148
- elif hasattr(generation_config, "keys"):
149
- return dict(generation_config)
150
- else:
151
- raise TypeError(
152
- "Did not understand `generation_config`, expected a `dict` or"
153
- f" `GenerationConfig`\nGot type: {type(generation_config)}\nValue:"
154
- f" {generation_config}"
155
- )
156
-
157
-
158
- def _join_citation_metadatas(
159
- citation_metadatas: Iterable[glm.CitationMetadata],
160
- ):
161
- citation_metadatas = list(citation_metadatas)
162
- return citation_metadatas[-1]
163
 
164
 
165
- def _join_safety_ratings_lists(
166
- safety_ratings_lists: Iterable[list[glm.SafetyRating]],
167
- ):
168
- ratings = {}
169
- blocked = collections.defaultdict(list)
170
-
171
- for safety_ratings_list in safety_ratings_lists:
172
- for rating in safety_ratings_list:
173
- ratings[rating.category] = rating.probability
174
- blocked[rating.category].append(rating.blocked)
175
-
176
- blocked = {category: any(blocked) for category, blocked in blocked.items()}
177
-
178
- safety_list = []
179
- for (category, probability), blocked in zip(ratings.items(), blocked.values()):
180
- safety_list.append(
181
- glm.SafetyRating(category=category, probability=probability, blocked=blocked)
182
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- return safety_list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- def _join_contents(contents: Iterable[glm.Content]):
188
- contents = tuple(contents)
189
- roles = [c.role for c in contents if c.role]
190
- if roles:
191
- role = roles[0]
192
- else:
193
- role = ""
194
 
195
- parts = []
196
- for content in contents:
197
- parts.extend(content.parts)
198
 
199
- merged_parts = [parts.pop(0)]
200
- for part in parts:
201
- if not merged_parts[-1].text:
202
- merged_parts.append(part)
203
- continue
 
 
 
 
 
204
 
205
- if not part.text:
206
- merged_parts.append(part)
207
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- merged_part = glm.Part(merged_parts[-1])
210
- merged_part.text += part.text
211
- merged_parts[-1] = merged_part
 
 
 
 
212
 
213
- return glm.Content(
214
- role=role,
215
- parts=merged_parts,
 
 
 
 
216
  )
217
-
218
-
219
- def _join_candidates(candidates: Iterable[glm.Candidate]):
220
- candidates = tuple(candidates)
221
-
222
- index = candidates[0].index # These should all be the same.
223
-
224
- return glm.Candidate(
225
- index=index,
226
- content=_join_contents([c.content for c in candidates]),
227
- finish_reason=candidates[-1].finish_reason,
228
- safety_ratings=_join_safety_ratings_lists([c.safety_ratings for c in candidates]),
229
- citation_metadata=_join_citation_metadatas([c.citation_metadata for c in candidates]),
230
  )
231
-
232
-
233
- def _join_candidate_lists(candidate_lists: Iterable[list[glm.Candidate]]):
234
- # Assuming that is a candidate ends, it is no longer returned in the list of
235
- # candidates and that's why candidates have an index
236
- candidates = collections.defaultdict(list)
237
- for candidate_list in candidate_lists:
238
- for candidate in candidate_list:
239
- candidates[candidate.index].append(candidate)
240
-
241
- new_candidates = []
242
- for index, candidate_parts in sorted(candidates.items()):
243
- new_candidates.append(_join_candidates(candidate_parts))
244
-
245
- return new_candidates
246
-
247
-
248
- def _join_prompt_feedbacks(
249
- prompt_feedbacks: Iterable[glm.GenerateContentResponse.PromptFeedback],
250
- ):
251
- # Always return the first prompt feedback.
252
- return next(iter(prompt_feedbacks))
253
-
254
-
255
- def _join_chunks(chunks: Iterable[glm.GenerateContentResponse]):
256
- return glm.GenerateContentResponse(
257
- candidates=_join_candidate_lists(c.candidates for c in chunks),
258
- prompt_feedback=_join_prompt_feedbacks(c.prompt_feedback for c in chunks),
259
  )
260
 
261
-
262
- _INCOMPLETE_ITERATION_MESSAGE = """\
263
- Please let the response complete iteration before accessing the final accumulated
264
- attributes (or call `response.resolve()`)"""
265
-
266
-
267
- class BaseGenerateContentResponse:
268
- def __init__(
269
- self,
270
- done: bool,
271
- iterator: (
272
- None
273
- | Iterable[glm.GenerateContentResponse]
274
- | AsyncIterable[glm.GenerateContentResponse]
275
- ),
276
- result: glm.GenerateContentResponse,
277
- chunks: Iterable[glm.GenerateContentResponse] | None = None,
278
- ):
279
- self._done = done
280
- self._iterator = iterator
281
- self._result = result
282
- if chunks is None:
283
- self._chunks = [result]
284
- else:
285
- self._chunks = list(chunks)
286
- if result.prompt_feedback.block_reason:
287
- self._error = BlockedPromptException(result)
288
- else:
289
- self._error = None
290
-
291
- @property
292
- def candidates(self):
293
- """The list of candidate responses.
294
-
295
- Raises:
296
- IncompleteIterationError: With `stream=True` if iteration over the stream was not completed.
297
- """
298
- if not self._done:
299
- raise IncompleteIterationError(_INCOMPLETE_ITERATION_MESSAGE)
300
- return self._result.candidates
301
-
302
- @property
303
- def parts(self):
304
- """A quick accessor equivalent to `self.candidates[0].parts`
305
-
306
- Raises:
307
- ValueError: If the candidate list does not contain exactly one candidate.
308
- """
309
- candidates = self.candidates
310
- if not candidates:
311
- raise ValueError(
312
- "The `response.parts` quick accessor only works for a single candidate, "
313
- "but none were returned. Check the `response.prompt_feedback` to see if the prompt was blocked."
314
- )
315
- if len(candidates) > 1:
316
- raise ValueError(
317
- "The `response.parts` quick accessor only works with a "
318
- "single candidate. With multiple candidates use "
319
- "result.candidates[index].text"
320
- )
321
- parts = candidates[0].content.parts
322
- return parts
323
-
324
- @property
325
- def text(self):
326
- """A quick accessor equivalent to `self.candidates[0].parts[0].text`
327
-
328
- Raises:
329
- ValueError: If the candidate list or parts list does not contain exactly one entry.
330
- """
331
- parts = self.parts
332
- if not parts:
333
- raise ValueError(
334
- "The `response.text` quick accessor only works when the response contains a valid "
335
- "`Part`, but none was returned. Check the `candidate.safety_ratings` to see if the "
336
- "response was blocked."
337
- )
338
-
339
- return parts[0].text
340
-
341
- @property
342
- def prompt_feedback(self):
343
- return self._result.prompt_feedback
344
-
345
- def __str__(self) -> str:
346
- if self._done:
347
- _iterator = "None"
348
- else:
349
- _iterator = f"<{self._iterator.__class__.__name__}>"
350
-
351
- _result = f"glm.GenerateContentResponse({type(self._result).to_dict(self._result)})"
352
-
353
- if self._error:
354
- _error = f",\nerror=<{self._error.__class__.__name__}> {self._error}"
355
- else:
356
- _error = ""
357
-
358
- return (
359
- textwrap.dedent(
360
- f"""\
361
- response:
362
- {type(self).__name__}(
363
- done={self._done},
364
- iterator={_iterator},
365
- result={_result},
366
- )"""
367
- )
368
- + _error
369
- )
370
-
371
- __repr__ = __str__
372
-
373
-
374
- @contextlib.contextmanager
375
- def rewrite_stream_error():
376
  try:
377
- yield
378
- except (google.protobuf.json_format.ParseError, AttributeError) as e:
379
- raise google.api_core.exceptions.BadRequest(
380
- "Unknown error trying to retrieve streaming response. "
381
- "Please retry with `stream=False` for more details."
382
- )
383
-
384
-
385
- GENERATE_CONTENT_RESPONSE_DOC = """Instances of this class manage the response of the `generate_content` method.
386
-
387
- These are returned by `GenerativeModel.generate_content` and `ChatSession.send_message`.
388
- This object is based on the low level `glm.GenerateContentResponse` class which just has `prompt_feedback`
389
- and `candidates` attributes. This class adds several quick accessors for common use cases.
390
-
391
- The same object type is returned for both `stream=True/False`.
392
-
393
- ### Streaming
394
-
395
- When you pass `stream=True` to `GenerativeModel.generate_content` or `ChatSession.send_message`,
396
- iterate over this object to receive chunks of the response:
397
-
398
- ```
399
- response = model.generate_content(..., stream=True):
400
- for chunk in response:
401
- print(chunk.text)
402
- ```
403
-
404
- `GenerateContentResponse.prompt_feedback` is available immediately but
405
- `GenerateContentResponse.candidates`, and all the attributes derived from them (`.text`, `.parts`),
406
- are only available after the iteration is complete.
407
  """
408
-
409
- ASYNC_GENERATE_CONTENT_RESPONSE_DOC = (
410
- """This is the async version of `genai.GenerateContentResponse`."""
411
- )
412
-
413
-
414
- @string_utils.set_doc(GENERATE_CONTENT_RESPONSE_DOC)
415
- class GenerateContentResponse(BaseGenerateContentResponse):
416
- @classmethod
417
- def from_iterator(cls, iterator: Iterable[glm.GenerateContentResponse]):
418
- iterator = iter(iterator)
419
- with rewrite_stream_error():
420
- response = next(iterator)
421
-
422
- return cls(
423
- done=False,
424
- iterator=iterator,
425
- result=response,
426
- )
427
-
428
- @classmethod
429
- def from_response(cls, response: glm.GenerateContentResponse):
430
- return cls(
431
- done=True,
432
- iterator=None,
433
- result=response,
434
- )
435
-
436
- def __iter__(self):
437
- # This is not thread safe.
438
- if self._done:
439
- for chunk in self._chunks:
440
- yield GenerateContentResponse.from_response(chunk)
441
- return
442
-
443
- # Always have the next chunk available.
444
- if len(self._chunks) == 0:
445
- self._chunks.append(next(self._iterator))
446
-
447
- for n in itertools.count():
448
- if self._error:
449
- raise self._error
450
-
451
- if n >= len(self._chunks) - 1:
452
- # Look ahead for a new item, so that you know the stream is done
453
- # when you yield the last item.
454
- if self._done:
455
- return
456
-
457
- try:
458
- item = next(self._iterator)
459
- except StopIteration:
460
- self._done = True
461
- except Exception as e:
462
- self._error = e
463
- self._done = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  else:
465
- self._chunks.append(item)
466
- self._result = _join_chunks([self._result, item])
467
-
468
- item = self._chunks[n]
469
-
470
- item = GenerateContentResponse.from_response(item)
471
- yield item
472
-
473
- def resolve(self):
474
- if self._done:
475
- return
476
-
477
- for _ in self:
478
- pass
479
-
480
-
481
- @string_utils.set_doc(ASYNC_GENERATE_CONTENT_RESPONSE_DOC)
482
- class AsyncGenerateContentResponse(BaseGenerateContentResponse):
483
- @classmethod
484
- async def from_aiterator(cls, iterator: AsyncIterable[glm.GenerateContentResponse]):
485
- iterator = aiter(iterator) # type: ignore
486
- with rewrite_stream_error():
487
- response = await anext(iterator) # type: ignore
488
-
489
- return cls(
490
- done=False,
491
- iterator=iterator,
492
- result=response,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  )
494
-
495
- @classmethod
496
- def from_response(cls, response: glm.GenerateContentResponse):
497
- return cls(
498
- done=True,
499
- iterator=None,
500
- result=response,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
 
503
- async def __aiter__(self):
504
- # This is not thread safe.
505
- if self._done:
506
- for chunk in self._chunks:
507
- yield GenerateContentResponse.from_response(chunk)
508
- return
509
-
510
- # Always have the next chunk available.
511
- if len(self._chunks) == 0:
512
- self._chunks.append(await anext(self._iterator)) # type: ignore
513
-
514
- for n in itertools.count():
515
- if self._error:
516
- raise self._error
517
-
518
- if n >= len(self._chunks) - 1:
519
- # Look ahead for a new item, so that you know the stream is done
520
- # when you yield the last item.
521
- if self._done:
522
- return
523
-
524
- try:
525
- item = await anext(self._iterator) # type: ignore
526
- except StopAsyncIteration:
527
- self._done = True
528
- except Exception as e:
529
- self._error = e
530
- self._done = True
531
- else:
532
- self._chunks.append(item)
533
- self._result = _join_chunks([self._result, item])
534
-
535
- item = self._chunks[n]
536
-
537
- item = GenerateContentResponse.from_response(item)
538
- yield item
539
-
540
- async def resolve(self):
541
- if self._done:
542
- return
543
-
544
- async for _ in self:
545
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ import time
5
+ import json
6
+ import re
7
  import sys
8
+ import subprocess
9
+ from datetime import datetime, date, timedelta
10
+ from urllib.parse import urlencode
11
+ from typing import Dict, List, Optional
12
+ import google.generativeai as genai
13
+ import plotly.express as px
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
+ # Handle required package installations and imports
17
+ def install_package(package_name: str) -> None:
18
+ try:
19
+ subprocess.check_call(
20
+ [sys.executable, "-m", "pip", "install", package_name],
21
+ stdout=subprocess.DEVNULL,
22
+ stderr=subprocess.DEVNULL
 
 
 
 
 
 
 
 
 
 
23
  )
24
+ except subprocess.CalledProcessError as e:
25
+ st.error(f"Failed to install {package_name}: {str(e)}")
26
+ sys.exit(1)
27
+ try:
28
+ import google.generativeai as genai
29
+ except ImportError:
30
+ install_package("google-generativeai")
31
+ import google.generativeai as genai
32
+
33
+ # Import Plotly
34
+ try:
35
+ import plotly.express as px
36
+ except ImportError:
37
+ install_package("plotly")
38
+ import plotly.express as px
39
+ # Set up the application title and layout
40
+ st.set_page_config(
41
+ page_title="Steam App Reviews - Themes Analysis",
42
+ page_icon="🎮",
43
+ layout="wide",
44
+ initial_sidebar_state="expanded"
45
+ )
46
 
47
+ # Custom CSS to improve UI
48
+ st.markdown("""
49
+ <style>
50
+ .main-header {
51
+ font-size: 2.5rem !important;
52
+ color: #1e88e5;
53
+ }
54
+ .theme-card {
55
+ background-color: #f5f5f5;
56
+ border-radius: 10px;
57
+ padding: 1.5rem;
58
+ margin-bottom: 1rem;
59
+ border-left: 5px solid #1e88e5;
60
+ }
61
+ .theme-title {
62
+ font-size: 1.2rem;
63
+ font-weight: bold;
64
+ color: #1e88e5;
65
+ }
66
+ .theme-desc {
67
+ color: #424242;
68
+ margin: 0.5rem 0;
69
+ }
70
+ .theme-count {
71
+ font-size: 0.9rem;
72
+ color: #616161;
73
+ }
74
+ .sentiment-positive {
75
+ background-color: #D5EAD8;
76
+ color: #2E8B57;
77
+ padding: 3px 8px;
78
+ border-radius: 10px;
79
+ }
80
+ .sentiment-negative {
81
+ background-color: #FFE4E1;
82
+ color: #CD5C5C;
83
+ padding: 3px 8px;
84
+ border-radius: 10px;
85
+ }
86
+ .sentiment-mixed {
87
+ background-color: #FFF8DC;
88
+ color: #DAA520;
89
+ padding: 3px 8px;
90
+ border-radius: 10px;
91
+ }
92
+ .app-info {
93
+ background-color: #f0f8ff;
94
+ border-radius: 10px;
95
+ padding: 1rem;
96
+ margin-bottom: 1rem;
97
+ }
98
+ </style>
99
+ """, unsafe_allow_html=True)
100
+
101
+ # Title and description
102
+ st.markdown('<h1 class="main-header">🎮 Steam App Reviews - Themes Analysis</h1>', unsafe_allow_html=True)
103
+ st.markdown("""
104
+ This tool analyzes user reviews for Steam games to identify common themes, sentiments, and feedback patterns.
105
+ Upload your Google Gemini API key, enter a Steam App ID, select a date range, and get valuable insights from user reviews.
106
+ """)
107
+
108
+ # Initialize session state variables
109
+ if 'reviews_data' not in st.session_state:
110
+ st.session_state['reviews_data'] = None
111
+ if 'themes_df' not in st.session_state:
112
+ st.session_state['themes_df'] = None
113
+ if 'app_info' not in st.session_state:
114
+ st.session_state['app_info'] = None
115
+
116
+ # Sidebar inputs for user interaction
117
+ st.sidebar.header("User Input Parameters")
118
+
119
+ # User input for Google Gemini API key
120
+ api_key_input = st.sidebar.text_input(
121
+ "Enter your Google Gemini API Key:",
122
+ type="password",
123
+ help="Your API key will be used to access the Google Gemini API for theme extraction.",
124
+ )
125
 
126
+ # Initialize Google Gemini client
127
+ gemini_client = None
128
+ if api_key_input:
129
+ try:
130
+ genai.configure(api_key=api_key_input)
131
+ model = genai.GenerativeModel(model_name='gemini-1.5-pro')
132
+ gemini_client = model
133
+ st.sidebar.success("Gemini API connection established!")
134
+ except Exception as e:
135
+ st.sidebar.error(f"Error initializing Gemini API: {str(e)}")
136
+ else:
137
+ st.sidebar.warning("Please enter your Google Gemini API Key to proceed.")
138
+
139
+ # User input for App ID
140
+ appid = st.sidebar.text_input(
141
+ "Enter the Steam App ID:",
142
+ value="1782120",
143
+ help="Find the App ID in the URL of the game's Steam page."
144
+ )
145
 
146
+ # Validate App ID
147
+ def is_valid_app_id(app_id: str) -> bool:
148
+ if not app_id or not app_id.isdigit():
149
+ return False
150
+ return True
 
 
151
 
152
+ if not is_valid_app_id(appid):
153
+ st.sidebar.error("Please enter a valid Steam App ID (numeric only).")
 
154
 
155
+ # Date input for selecting a range
156
+ st.sidebar.write("Select the date range for reviews:")
157
+ start_date = st.sidebar.date_input(
158
+ "Start Date",
159
+ value=datetime.today() - timedelta(days=7)
160
+ )
161
+ end_date = st.sidebar.date_input(
162
+ "End Date",
163
+ value=datetime.today()
164
+ )
165
 
166
+ # Validate date range
167
+ if start_date and end_date:
168
+ today = date.today()
169
+
170
+ # Check if end date is in the future
171
+ if end_date > today:
172
+ st.sidebar.error("Error: End date cannot be in the future.")
173
+ st.stop()
174
+
175
+ # Check if start date is after end date
176
+ if start_date > end_date:
177
+ st.sidebar.error("Error: Start date must be before end date.")
178
+ st.stop()
179
+
180
+ # Check if date range is too large
181
+ date_range = (end_date - start_date).days
182
+ if date_range > 365:
183
+ st.sidebar.warning("Warning: Large date ranges may result in incomplete data due to Steam API limitations.")
184
+ elif date_range < 0:
185
+ st.sidebar.error("Error: Invalid date range selected.")
186
+ st.stop()
187
+
188
+ # Maximum reviews to fetch
189
+ max_reviews = st.sidebar.slider(
190
+ "Maximum reviews to fetch:",
191
+ min_value=50,
192
+ max_value=500,
193
+ value=200,
194
+ step=50,
195
+ help="Higher values may take longer to process."
196
+ )
197
 
198
+ # Language filter
199
+ language_filter = st.sidebar.multiselect(
200
+ "Filter by languages:",
201
+ options=["english", "spanish", "french", "german", "italian", "russian", "all"],
202
+ default=["english"],
203
+ help="Select 'all' to include all languages or choose specific languages."
204
+ )
205
 
206
+ # Advanced options
207
+ advanced_options = st.sidebar.expander("Advanced Analysis Options")
208
+ with advanced_options:
209
+ include_sentiment = st.checkbox(
210
+ "Include sentiment analysis",
211
+ value=True,
212
+ help="Analyze the sentiment of each review and theme."
213
  )
214
+
215
+ cluster_similar_themes = st.checkbox(
216
+ "Cluster similar themes",
217
+ value=True,
218
+ help="Group themes that are semantically similar."
 
 
 
 
 
 
 
 
219
  )
220
+
221
+ min_mention_threshold = st.slider(
222
+ "Minimum reviews per theme:",
223
+ min_value=1,
224
+ max_value=10,
225
+ value=2,
226
+ help="Only show themes mentioned in at least this many reviews."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  )
228
 
229
+ # Function to fetch app information
230
+ @st.cache_data(ttl=3600, show_spinner=False)
231
+ def get_app_info(app_id: str) -> Optional[Dict]:
232
+ """
233
+ Fetches information about a Steam game using its App ID.
234
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  try:
236
+ url = f"https://store.steampowered.com/api/appdetails?appids={app_id}"
237
+ response = requests.get(url, timeout=10)
238
+ response.raise_for_status()
239
+ data = response.json()
240
+
241
+ if data.get(app_id, {}).get('success', False):
242
+ app_data = data[app_id]['data']
243
+ return {
244
+ 'name': app_data.get('name', 'Unknown Game'),
245
+ 'header_image': app_data.get('header_image', ''),
246
+ 'release_date': app_data.get('release_date', {}).get('date', 'Unknown'),
247
+ 'developers': app_data.get('developers', ['Unknown']),
248
+ 'publishers': app_data.get('publishers', ['Unknown'])
249
+ }
250
+ return None
251
+ except Exception as e:
252
+ st.sidebar.error(f"Error fetching app info: {str(e)}")
253
+ return None
254
+
255
+ # Function to fetch reviews
256
+ @st.cache_data(ttl=1800, show_spinner=False)
257
+ def fetch_reviews(app_id: str, start_timestamp: int, end_timestamp: int,
258
+ max_reviews: int = 1000, language_filter: List[str] = ["english"]) -> Optional[List]:
 
 
 
 
 
 
 
259
  """
260
+ Fetches Steam reviews for the specified app within the given date range.
261
+ Implements batch processing and caching for efficient handling of large volumes.
262
+ """
263
+ # Define the base API URL
264
+ base_url = f"https://store.steampowered.com/appreviews/{app_id}?json=1"
265
+
266
+ # Normalize language filter and handle 'all' case
267
+ normalized_language_filter = [lang.lower() for lang in language_filter]
268
+ use_all_languages = "all" in normalized_language_filter
269
+
270
+ # Calculate day range dynamically based on start and end timestamps
271
+ day_range = min(365, (end_timestamp - start_timestamp) // 86400 + 1)
272
+
273
+ # Define initial API parameters with optimized batch size
274
+ params = {
275
+ "filter": "updated", # Use 'updated' to get all reviews in date range
276
+ "language": "all" if use_all_languages else ",".join(normalized_language_filter),
277
+ "day_range": str(day_range),
278
+ "review_type": "all",
279
+ "purchase_type": "all",
280
+ "num_per_page": "100", # Maximum allowed by Steam API
281
+ "cursor": "*",
282
+ "filter_offtopic_activity": 0,
283
+ "start_date": start_timestamp,
284
+ "end_date": end_timestamp
285
+ }
286
+
287
+ # Initialize cache for review batches
288
+ if 'review_cache' not in st.session_state:
289
+ st.session_state.review_cache = {}
290
+ cache_key = f"{app_id}_{start_timestamp}_{end_timestamp}_{language_filter}"
291
+
292
+ # Check cache first
293
+ if cache_key in st.session_state.review_cache:
294
+ cached_reviews = st.session_state.review_cache[cache_key]
295
+ if len(cached_reviews) >= max_reviews:
296
+ return cached_reviews[:max_reviews]
297
+
298
+ reviews_list = []
299
+ request_count = 0
300
+ max_requests = 100 # Increased limit for larger datasets
301
+ retry_attempts = 3 # Number of retry attempts for failed requests
302
+ batch_size = 100 # Size of each batch
303
+
304
+ progress_bar = st.progress(0)
305
+ status_text = st.empty()
306
+
307
+ # Create a container for batch progress
308
+ batch_container = st.empty()
309
+
310
+ while True:
311
+ # URL encode the cursor parameter
312
+ params_encoded = params.copy()
313
+ params_encoded["cursor"] = params["cursor"].replace("+", "%2B")
314
+
315
+ # Construct the full URL with parameters
316
+ url = base_url + "&" + urlencode(params_encoded)
317
+
318
+ try:
319
+ for attempt in range(retry_attempts):
320
+ response = requests.get(url, timeout=15)
321
+ response.raise_for_status()
322
+ data = response.json()
323
+
324
+ # Check if we have any reviews
325
+ reviews = data.get('reviews')
326
+ if not reviews:
327
+ status_text.warning("No reviews found for the specified date range and filters.")
328
+ return []
329
+
330
+ # Process reviews
331
+ for review in reviews:
332
+ timestamp = review.get("timestamp_created", 0)
333
+ review_language = review.get("language", "").lower()
334
+
335
+ # Validate timestamp is within range
336
+ is_in_timerange = start_timestamp <= timestamp <= end_timestamp
337
+
338
+ # Check language filter
339
+ is_valid_language = "all" in language_filter or review_language in [lang.lower() for lang in language_filter]
340
+
341
+ if is_in_timerange and is_valid_language:
342
+ reviews_list.append(review)
343
+
344
+ # Update progress
345
+ progress = min(len(reviews_list) / max_reviews * 100, 100)
346
+ progress_bar.progress(int(progress))
347
+ status_text.text(f"Fetched {len(reviews_list)} reviews...")
348
+
349
+ # Check if we've reached max reviews or earlier timestamp
350
+ if len(reviews_list) >= max_reviews:
351
+ break
352
+ if any(r.get("timestamp_created", 0) < start_timestamp for r in reviews):
353
+ break
354
+
355
+ # Update cursor for next batch
356
+ new_cursor = data.get("cursor")
357
+ if new_cursor is None or params["cursor"] == new_cursor:
358
+ break
359
+ params["cursor"] = new_cursor
360
+
361
+ # Handle rate limiting
362
+ if 'X-Rate-Limit-Remaining' in response.headers:
363
+ remaining_calls = int(response.headers['X-Rate-Limit-Remaining'])
364
+ time.sleep(0.5 if remaining_calls < 10 else 0.2)
365
  else:
366
+ time.sleep(0.2)
367
+
368
+ # Update batch progress
369
+ batch_container.text(f"Processing batch {request_count + 1} of {max_requests} (max)")
370
+
371
+ # Check request limits
372
+ request_count += 1
373
+ if request_count >= max_requests:
374
+ status_text.warning("Reached maximum number of requests. Some reviews may not be fetched.")
375
+ break
376
+
377
+ break # Success - exit retry loop
378
+
379
+ except requests.exceptions.RequestException as e:
380
+ status_text.error(f"Steam API Error: {str(e)}")
381
+ if attempt < retry_attempts - 1:
382
+ time.sleep(1) # Wait before retrying
383
+ continue
384
+ return None
385
+
386
+ # Clean up progress indicators
387
+ progress_bar.empty()
388
+ status_text.empty()
389
+ batch_container.empty()
390
+
391
+ # Cache and return results
392
+ st.session_state.review_cache[cache_key] = reviews_list
393
+ return reviews_list
394
+ # Function to extract themes using Google Gemini 1.5 Pro
395
+ def extract_themes(df: pd.DataFrame,
396
+ include_sentiment: bool = True,
397
+ cluster_similar_themes: bool = True,
398
+ min_mention_threshold: int = 2) -> Optional[pd.DataFrame]:
399
+ """
400
+ Uses Google Gemini 1.5 Pro to identify the most common themes in reviews.
401
+ Implements batched processing and caching for large datasets.
402
+ """
403
+ if len(df) == 0:
404
+ st.error("No reviews to analyze.")
405
+ return None
406
+
407
+ # Get counts of positive and negative reviews (if available)
408
+ positive_count = 0
409
+ negative_count = 0
410
+
411
+ if "Recommended" in df.columns:
412
+ positive_count = df["Recommended"].sum()
413
+ negative_count = len(df) - positive_count
414
+
415
+ # Initialize theme cache
416
+ if 'theme_cache' not in st.session_state:
417
+ st.session_state.theme_cache = {}
418
+
419
+ # Calculate cache key based on review content hash
420
+ cache_key = hash(tuple(sorted(df['Review'].values)))
421
+
422
+ # Check cache first
423
+ if cache_key in st.session_state.theme_cache:
424
+ return st.session_state.theme_cache[cache_key]
425
+
426
+ # Process reviews in batches to handle large datasets
427
+ batch_size = 200 # Optimal batch size for Gemini API
428
+ total_batches = (len(df) + batch_size - 1) // batch_size
429
+
430
+ all_themes = []
431
+ progress_bar = st.progress(0)
432
+ batch_status = st.empty()
433
+
434
+ for batch_idx in range(total_batches):
435
+ start_idx = batch_idx * batch_size
436
+ end_idx = min(start_idx + batch_size, len(df))
437
+ df_batch = df.iloc[start_idx:end_idx]
438
+
439
+ # Combine reviews into a single string with IDs
440
+ reviews_text = "\n\n".join([
441
+ f"Review ID: {row['Review ID']}\nReview Text: {row['Review']}"
442
+ for _, row in df.iterrows()
443
+ ])
444
+
445
+ # Prepare the prompt
446
+ sentiment_instruction = "For each theme, analyze the sentiment (Positive, Negative, or Mixed)." if include_sentiment else ""
447
+ clustering_instruction = "Cluster similar themes together." if cluster_similar_themes else ""
448
+
449
+ # Fix the JSON template structure
450
+ sentiment_field = '"Sentiment": "Positive/Negative/Mixed",' if include_sentiment else ""
451
+
452
+ prompt = f"""
453
+ Analyze these {len(df)} user reviews for a game with {positive_count} positive and {negative_count} negative reviews.
454
+
455
+ Identify significant themes. {clustering_instruction}
456
+ For each theme:
457
+ 1. Provide a concise, specific name
458
+ 2. Write a detailed description summarizing user feedback
459
+ 3. List the Review IDs where the theme is mentioned
460
+ 4. {sentiment_instruction}
461
+
462
+ Only include themes mentioned in at least {min_mention_threshold} different reviews.
463
+
464
+ Provide the output as a JSON array with the following structure:
465
+ [
466
+ {{
467
+ "Theme": "theme_name",
468
+ "Description": "detailed_description",
469
+ "Review IDs": ["id1", "id2", ...],
470
+ {sentiment_field}
471
+ }},
472
+ ...
473
+ ]
474
+
475
+ Reviews:
476
+ {reviews_text}
477
+ """
478
+
479
+ # Call Google Gemini 1.5 Pro
480
+ try:
481
+ with st.spinner("Analyzing themes with Google Gemini 1.5 Pro..."):
482
+ response = model.generate_content(prompt)
483
+
484
+ # Extract text from the response
485
+ if hasattr(response, 'text'):
486
+ response_text = response.text
487
+ elif hasattr(response, 'parts') and response.parts:
488
+ response_text = response.parts[0].text
489
+ else:
490
+ response_text = str(response)
491
+
492
+ # Clean and parse the response text
493
+ # First try to extract JSON from code blocks
494
+ json_pattern = r'```(?:json)?(.*?)```'
495
+ json_matches = re.findall(json_pattern, response_text, re.DOTALL)
496
+
497
+ if json_matches:
498
+ # Use the first JSON block found
499
+ json_str = json_matches[0].strip()
500
+ else:
501
+ # If no code blocks, try to use the entire response as JSON
502
+ # Remove any markdown formatting or extra whitespace
503
+ json_str = response_text.strip()
504
+
505
+ # Parse the JSON output
506
+ themes_data = json.loads(json_str)
507
+
508
+ # Convert to DataFrame and add count column
509
+ themes_df = pd.DataFrame(themes_data)
510
+ themes_df["Count"] = themes_df["Review IDs"].apply(len)
511
+
512
+ # Sort themes by count (descending)
513
+ themes_df = themes_df.sort_values("Count", ascending=False).reset_index(drop=True)
514
+
515
+ return themes_df
516
+
517
+ except Exception as e:
518
+ st.error(f"Error extracting themes: {str(e)}")
519
+ st.error("Response from Gemini API:")
520
+ if 'response' in locals():
521
+ try:
522
+ if hasattr(response, 'text'):
523
+ error_text = response.text
524
+ elif hasattr(response, 'parts') and response.parts:
525
+ error_text = response.parts[0].text
526
+ else:
527
+ error_text = str(response)
528
+ st.error(error_text)
529
+ except Exception as e:
530
+ st.error(f"Error displaying response: {str(e)}")
531
+ return None
532
+
533
+ # Function to create visualizations
534
+ def create_visualizations(themes_df: pd.DataFrame, reviews_df: pd.DataFrame):
535
+ """
536
+ Creates visualizations for the theme analysis.
537
+ """
538
+ col1, col2 = st.columns(2)
539
+
540
+ # Theme distribution chart
541
+ with col1:
542
+ theme_counts = themes_df[["Theme", "Count"]]
543
+ fig = px.bar(
544
+ theme_counts,
545
+ x="Count", y="Theme", orientation="h",
546
+ title="Theme Distribution",
547
+
548
  )
549
+ fig.update_layout(height=400)
550
+ st.plotly_chart(fig, use_container_width=True)
551
+
552
+ # Sentiment analysis chart (if available)
553
+ with col2:
554
+ if "Sentiment" in themes_df.columns:
555
+ sentiment_counts = themes_df["Sentiment"].value_counts().reset_index()
556
+ sentiment_counts.columns = ["Sentiment", "Count"]
557
+ fig = px.pie(
558
+ sentiment_counts,
559
+ values="Count", names="Sentiment",
560
+ title="Theme Sentiment Distribution",
561
+ color="Sentiment",
562
+ color_discrete_map={"Positive": "#2E8B57", "Negative": "#CD5C5C", "Mixed": "#DAA520"},
563
+ )
564
+ fig.update_layout(height=400)
565
+ st.plotly_chart(fig, use_container_width=True)
566
+
567
+ # Review timeline (if timestamp available)
568
+ if "Timestamp" in reviews_df.columns:
569
+ # Convert timestamp to datetime
570
+ reviews_df["Date"] = pd.to_datetime(reviews_df["Timestamp"], unit='s')
571
+
572
+ # Group by date and count
573
+ reviews_by_date = reviews_df.groupby(reviews_df["Date"].dt.date).size().reset_index()
574
+ reviews_by_date.columns = ["Date", "Count"]
575
+
576
+ # Create timeline chart
577
+ fig = px.line(
578
+ reviews_by_date,
579
+ x="Date", y="Count",
580
+ title="Reviews Timeline",
581
+ markers=True
582
  )
583
+ st.plotly_chart(fig, use_container_width=True)
584
+
585
+ # Validate inputs before processing
586
+ if start_date > end_date:
587
+ st.error("Error: End date must fall after start date.")
588
+ elif not api_key_input:
589
+ st.info("Please input your Google Gemini API Key to proceed.")
590
+ elif not is_valid_app_id(appid):
591
+ st.error("Please enter a valid Steam App ID.")
592
+ else:
593
+ # Fetch app info
594
+ if st.session_state['app_info'] is None or st.session_state.get('current_appid') != appid:
595
+ st.session_state['app_info'] = get_app_info(appid)
596
+ st.session_state['current_appid'] = appid
597
+
598
+ # Display app info if available
599
+ if st.session_state['app_info']:
600
+ app_info = st.session_state['app_info']
601
+ col1, col2 = st.columns([1, 3])
602
+
603
+ with col1:
604
+ st.image(app_info['header_image'], width=200)
605
+
606
+ with col2:
607
+ st.markdown(f"""
608
+ <div class='app-info'>
609
+ <h2>{app_info['name']}</h2>
610
+ <p><strong>Release Date:</strong> {app_info['release_date']}</p>
611
+ <p><strong>Developers:</strong> {', '.join(app_info['developers'])}</p>
612
+ <p><strong>Publishers:</strong> {', '.join(app_info['publishers'])}</p>
613
+ </div>
614
+ """, unsafe_allow_html=True)
615
+
616
+ # Fetch reviews button
617
+ if st.button("Fetch and Analyze Reviews", type="primary"):
618
+ # Convert dates to timestamps
619
+ start_timestamp = int(time.mktime(start_date.timetuple()))
620
+ end_timestamp = int(time.mktime((end_date + timedelta(days=1)).timetuple())) - 1 # Include the entire end date
621
+
622
+ # Fetch the reviews
623
+ with st.spinner("Fetching reviews from Steam..."):
624
+ reviews_data = fetch_reviews(
625
+ appid,
626
+ start_timestamp,
627
+ end_timestamp,
628
+ max_reviews=max_reviews,
629
+ language_filter=language_filter
630
+ )
631
+ st.session_state['reviews_data'] = reviews_data
632
+
633
+ # Check if reviews were fetched
634
+ if reviews_data:
635
+ st.success(f"Fetched {len(reviews_data)} reviews from App ID {appid}.")
636
+
637
+ # Create a DataFrame from the review data
638
+ df = pd.DataFrame(
639
+ [
640
+ {
641
+ "Review ID": str(review.get("recommendationid")),
642
+ "Author SteamID": review.get("author", {}).get("steamid"),
643
+ "Language": review.get("language"),
644
+ "Review": review.get("review"),
645
+ "Recommended": review.get("voted_up", False),
646
+ "Votes Helpful": review.get("votes_up", 0),
647
+ "Timestamp": review.get("timestamp_created", 0),
648
+ "Posted On": datetime.fromtimestamp(
649
+ review.get("timestamp_created", 0)
650
+ ).strftime("%Y-%m-%d %H:%M:%S"),
651
+ }
652
+ for review in reviews_data
653
+ ]
654
+ )
655
 
656
+ # Extract themes using Google Gemini 1.5 Pro
657
+ themes_df = extract_themes(
658
+ df,
659
+ include_sentiment=include_sentiment,
660
+ cluster_similar_themes=cluster_similar_themes,
661
+ min_mention_threshold=min_mention_threshold
662
+ )
663
+ st.session_state['themes_df'] = themes_df
664
+
665
+ if themes_df is not None:
666
+ # Show summary statistics
667
+ col1, col2, col3, col4 = st.columns(4)
668
+ with col1:
669
+ st.metric("Total Reviews", len(df))
670
+ with col2:
671
+ positive_count = df["Recommended"].sum()
672
+ positive_percent = (positive_count / len(df)) * 100 if len(df) > 0 else 0
673
+ st.metric("Positive Reviews", f"{positive_count} ({positive_percent:.1f}%)")
674
+ with col3:
675
+ negative_count = len(df) - positive_count
676
+ negative_percent = (negative_count / len(df)) * 100 if len(df) > 0 else 0
677
+ st.metric("Negative Reviews", f"{negative_count} ({negative_percent:.1f}%)")
678
+ with col4:
679
+ st.metric("Themes Identified", len(themes_df))
680
+
681
+ # Create visualizations
682
+ create_visualizations(themes_df, df)
683
+
684
+ # Show themes analysis
685
+ st.markdown("## 📊 Theme Analysis")
686
+ st.dataframe(themes_df)
687
+
688
+ # Display detailed theme information
689
+ st.markdown("## 🔍 Detailed Theme Analysis")
690
+
691
+ for index, row in themes_df.iterrows():
692
+ theme = row["Theme"]
693
+ description = row["Description"]
694
+ review_ids = row["Review IDs"]
695
+ count = row["Count"]
696
+ sentiment = row.get("Sentiment", "Not analyzed")
697
+
698
+ # Create a sentiment badge with appropriate styling
699
+ sentiment_class = ""
700
+ if sentiment == "Positive":
701
+ sentiment_class = "sentiment-positive"
702
+ elif sentiment == "Negative":
703
+ sentiment_class = "sentiment-negative"
704
+ elif sentiment == "Mixed":
705
+ sentiment_class = "sentiment-mixed"
706
+
707
+ # Display theme card with enhanced formatting
708
+ sentiment_html = f'<span class="{sentiment_class}">{sentiment}</span>' if sentiment != "Not analyzed" else ""
709
+
710
+ st.markdown(f"""
711
+ <div class="theme-card">
712
+ <div class="theme-title">{theme} {sentiment_html}</div>
713
+ <p class="theme-desc">{description}</p>
714
+ <div class="theme-count">Mentioned in {count} reviews</div>
715
+ </div>
716
+ """, unsafe_allow_html=True)
717
+
718
+ with st.expander(f"View reviews mentioning '{theme}'"):
719
+ # Get the reviews that mention the theme
720
+ try:
721
+ reviews_with_theme = df[df["Review ID"].isin(review_ids)][["Review ID", "Review", "Posted On", "Recommended"]]
722
+ st.dataframe(reviews_with_theme, use_container_width=True)
723
+ except Exception as e:
724
+ st.error(f"Error displaying reviews for theme '{theme}': {str(e)}")
725
+
726
+ # Export options
727
+ st.markdown("## 📥 Export Results")
728
+ col1, col2 = st.columns(2)
729
+
730
+ with col1:
731
+ # Export reviews as CSV
732
+ reviews_csv = df.to_csv(index=False).encode('utf-8')
733
+ st.download_button(
734
+ label="Download Reviews CSV",
735
+ data=reviews_csv,
736
+ file_name=f"steam_reviews_{appid}_{start_date}_to_{end_date}.csv",
737
+ mime="text/csv"
738
+ )
739
+
740
+ with col2:
741
+ # Export themes as CSV
742
+ themes_csv = themes_df.to_csv(index=False).encode('utf-8')
743
+ st.download_button(
744
+ label="Download Themes Analysis CSV",
745
+ data=themes_csv,
746
+ file_name=f"steam_themes_{appid}_{start_date}_to_{end_date}.csv",
747
+ mime="text/csv"
748
+ )
749
+ else:
750
+ st.warning("Failed to extract themes. Please try again or adjust parameters.")
751
+ else:
752
+ st.warning("No reviews found for the specified date range and filters.")
753
+
754
+ # Display the raw reviews data if available
755
+ if st.session_state['reviews_data'] is not None:
756
+ with st.expander("View Raw Reviews Data"):
757
+ reviews_df = pd.DataFrame(
758
+ [
759
+ {
760
+ "Review ID": str(review.get("recommendationid")),
761
+ "Author SteamID": review.get("author", {}).get("steamid"),
762
+ "Language": review.get("language"),
763
+ "Review": review.get("review"),
764
+ "Recommended": review.get("voted_up", False),
765
+ "Votes Helpful": review.get("votes_up", 0),
766
+ "Posted On": datetime.fromtimestamp(
767
+ review.get("timestamp_created", 0)
768
+ ).strftime("%Y-%m-%d %H:%M:%S"),
769
+ }
770
+ for review in st.session_state['reviews_data']
771
+ ]
772
+ )
773
+ st.dataframe(reviews_df, use_container_width=True)