hydraadra112 commited on
Commit
55c4810
·
0 Parent(s):

Created streamlit platform for SciDocuParse

Browse files
Files changed (7) hide show
  1. .gitattributes +37 -0
  2. .gitignore +174 -0
  3. README.md +14 -0
  4. SciDocuParse.py +60 -0
  5. playground.ipynb +159 -0
  6. requirements.txt +4 -0
  7. utils.py +64 -0
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/logoJPG.jpg filter=lfs diff=lfs merge=lfs -text
37
+ assets/logo_trans.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SciDocuParse
3
+ emoji: 🏆
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.43.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: A scientific document graph interpreter & analyzer
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
SciDocuParse.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utils import get_api_key, get_response
3
+
4
+ def main():
5
+
6
+ st.header('Welcome to SciDocuParse! 🧑‍🔬📚')
7
+ st.write('A scientific document parser, particularly specializing in graph analysis 📊 and data interpretation 🔍.')
8
+
9
+ st.session_state["thought_process"] = ""
10
+ st.session_state["response"] = ""
11
+
12
+ with st.sidebar:
13
+ st.header('SciDocuParse Sidebar 🔧')
14
+ st.caption('A tool to help you analyze scientific papers and documents efficiently! 📝')
15
+
16
+ paper = st.text_area('Paste scientific document citation here 🧑‍🏫',
17
+ """@article{wang2020automated,
18
+ title={Automated diabetic retinopathy grading and lesion detection based on the modified R-FCN object-detection algorithm},
19
+ author={Wang, Jialiang and Luo, Jianxu and Liu, Bin and Feng, Rui and Lu, Lina and Zou, Haidong},
20
+ journal={IET Computer Vision},
21
+ volume={14},
22
+ number={1},
23
+ pages={1--8},
24
+ year={2020},
25
+ publisher={Wiley Online Library}
26
+ }""", height=350, help='Paste your document citation in BiBtex format.')
27
+
28
+ if not paper:
29
+ st.error('Provide a citation first! ⚠️')
30
+
31
+ user_prompt = st.text_area("Enter your query for analysis 🔍:",
32
+ "Summarize this document and highlight key findings in graphs 📈")
33
+
34
+ persona = "You are a master Scientific Graph Analyzer skilled in interpreting graphs across all fields. Analyze trends (linear/exponential growth, correlations, outliers) and statistical patterns (mean, variance). Summarize key findings in plain language, Expalin data about causality, anomalies, or data limitations. Prioritize clarity: ensure outputs are accessible to technical and non-technical audiences. Combine technical precision with intuitive communication to deliver accurate, user-friendly interpretations."
35
+
36
+ user_prompt = persona + paper + user_prompt
37
+
38
+ # api_key = get_api_key()
39
+ if st.button('Analyze with LLM 🚀'):
40
+ with st.spinner('Processing your document...'):
41
+ api_key = get_api_key()
42
+
43
+ thought_process, response = get_response(user_prompt, api_key) # uncommenting it to save tokens
44
+
45
+ st.session_state["thought_process"] = thought_process
46
+ st.session_state["response"] = response
47
+
48
+ if "thought_process" in st.session_state and "response" in st.session_state:
49
+ if len(st.session_state["thought_process"]) >= 1 and len(st.session_state["response"]) >= 1:
50
+ with st.expander('Show thought process 💭'):
51
+ st.caption(thought_process)
52
+
53
+ st.subheader('RESPONSE 📝')
54
+ st.write(response)
55
+
56
+ st.caption('SciDocuParse is made by John Manuel Carado')
57
+ st.caption('Intelligent Systems course in WVSU - CICT, Midterm Requirement')
58
+
59
+ if __name__ == '__main__':
60
+ main()
playground.ipynb ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import re\n",
11
+ "from dotenv import load_dotenv\n",
12
+ "from typing import Tuple\n",
13
+ "from groq import Groq"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "load_dotenv()\n",
23
+ "api_key = os.getenv('key')\n",
24
+ "\n",
25
+ "prompt = 'How to get your first paper published?'"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 7,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "client = Groq(api_key=api_key)\n",
35
+ "completion = client.chat.completions.create(\n",
36
+ " model=\"deepseek-r1-distill-llama-70b\",\n",
37
+ " messages=[\n",
38
+ " {\n",
39
+ " 'role': 'user',\n",
40
+ " 'content': prompt\n",
41
+ " }\n",
42
+ " ],\n",
43
+ " temperature=0.6,\n",
44
+ " max_completion_tokens=4096,\n",
45
+ " top_p=0.95,\n",
46
+ " stream=True,\n",
47
+ " stop=None,\n",
48
+ ")\n",
49
+ "\n",
50
+ "chunks = []\n",
51
+ "for chunk in completion:\n",
52
+ " current_chunk = chunk.choices[0].delta.content or \"\"\n",
53
+ " chunks.append(current_chunk)"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 8,
59
+ "metadata": {},
60
+ "outputs": [],
61
+ "source": [
62
+ "full_response = \"\".join(chunks)"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 9,
68
+ "metadata": {},
69
+ "outputs": [
70
+ {
71
+ "data": {
72
+ "text/plain": [
73
+ "\"<think>\\nOkay, so I want to get my first paper published, but I'm not really sure where to start. I've heard that publishing research is a big deal in academia, but the process seems pretty intimidating. Let me try to break this down step by step.\\n\\nFirst, I think I need to have some research done. I remember my professor mentioning that I should start by identifying a gap in the literature. But wait, how do I even find that gap? I guess I need to read a lot of papers in my field. Maybe I can start by looking at some recent studies and see where there's something missing or where more research is needed. But I'm not exactly sure how to efficiently find these gaps. Do I just read everything and hope I spot something? That seems time-consuming.\\n\\nOnce I have an idea, I need to design a study. I'm a bit confused about the methodology part. Should I go for an experimental approach or maybe a review? I think it depends on what I'm researching. If I'm testing a hypothesis, an experiment makes sense, but if I'm synthesizing existing information, a review might be better. But I'm not sure which one is more likely to get accepted, especially as a first-time author.\\n\\nAfter designing the study, I need to conduct the research. This part might take a while. I'm worried about collecting enough data and ensuring it's reliable. What if my results aren't significant? Does that mean my paper won't get published? I guess even negative results can be valuable, but I'm not certain how journals view them.\\n\\nNext, I need to write the paper. I'm a bit overwhelmed by the structure: title, abstract, introduction, methods, results, discussion, conclusion, references. Each section has its own requirements. The abstract is supposed to summarize everything, but I'm not sure how to make it concise yet. The introduction needs to set up the problem, but I'm not confident in my ability to clearly state the research gap. I've heard that the discussion section is where I interpret the results, but I'm worried about overstepping and making unsupported claims.\\n\\nChoosing the right journal is another hurdle. There are so many journals out there, and each has different scopes and impact factors. How do I pick one that's a good fit? I don't want to aim too high and get rejected, but I also don't want to aim too low. Maybe I should look at where similar studies have been published. But how do I assess the impact factor? Is it just about the number, or are there other factors?\\n\\nOnce the paper is written, I need to format it according to the journal's guidelines. This includes things like citation style, font, margins, etc. I'm a bit nervous about missing some formatting detail and having the paper rejected because of that. Maybe I should double-check the guidelines multiple times or use a template.\\n\\nThen comes the submission process. I think most journals use online systems, so I'll need to create an account and upload my manuscript. I'm a bit confused about what to include besides the manuscript—like a cover letter. What should I write in the cover letter? Do I just state the title and that it's original work, or is there more to it?\\n\\nAfter submission, the waiting game begins. I've heard that peer review can take months, which is a long time. What happens if the reviewers reject my paper? I guess I can revise and resubmit, but I'm not sure how to handle negative feedback. It might be discouraging, especially if I'm really invested in the work.\\n\\nIf it gets accepted, I'll have to deal with the production process—proofreading, layouts, etc. I'm not sure how involved that is. Do I need to check every single detail, or is it handled by the journal? I hope they catch any mistakes, but I know I should still review it carefully.\\n\\nPromotion is something I hadn't thought about. After publication, I should probably share it on social media and maybe my institution's platform. But I'm not very active on these platforms, so I'm not sure how effective that will be. Maybe I should set up a few profiles just for this purpose.\\n\\nEthical considerations are important too. I need to make sure I didn't plagiarize anything, even accidentally. Using proper citations is crucial, but I'm sometimes unsure if I'm citing correctly. Also, if I used any data that's sensitive, I need to anonymize it properly. I should probably run my paper through a plagiarism checker before submission.\\n\\nStaying persistent is the last step. I know publishing can involve multiple rejections, but I'm not sure how to handle that mentally. It might be tough to keep going after a rejection, but I guess every successful researcher has been through that.\\n\\nOverall, I think the key steps are: identify a research gap, design the study, conduct the research, write the paper, choose the right journal, format correctly, submit, handle peer review, and then promote the work. But each step has its own challenges, and I'm not entirely confident in my ability to navigate them all. Maybe I should seek guidance from my professor or a mentor to help me through the process. They might have tips or can review my work before I submit it. I should also start early and give myself plenty of time, especially since peer review can take a while. It's a lot to take in, but breaking it down into manageable steps makes it feel more achievable.\\n</think>\\n\\nPublishing your first research paper is a significant academic milestone. Here's a structured approach to guide you through the process, addressing each step with clarity and confidence:\\n\\n### 1. Identify a Research Gap\\n- **Literature Review:** Begin by reading recent studies in your field. Use academic databases like PubMed, Google Scholar, or Web of Science to find relevant papers.\\n- **Spotting Gaps:** Look for areas where the current research is lacking or inconclusive. This could be a understudied population, a novel application of a method, or unanswered questions.\\n\\n### 2. Design Your Study\\n- **Methodology:** Choose between experimental, observational, or review studies based on your research question. Experimental designs test hypotheses, while reviews synthesize existing data.\\n- **Feasibility:** Ensure your study is feasible with available resources and time.\\n\\n### 3. Conduct the Research\\n- **Data Collection:** Collect data meticulously, ensuring reliability. Consider using pilot studies to test your methods.\\n- **Ethical Considerations:** Obtain necessary approvals (e.g., IRB) and ensure confidentiality and informed consent.\\n\\n### 4. Write the Paper\\n- **Structure:** Organize your paper into sections: Title, Abstract, Introduction, Methods, Results, Discussion, Conclusion, References.\\n - **Abstract:** Summarize your study succinctly, highlighting objectives, methods, results, and conclusions.\\n - **Introduction:** Clearly state the research gap and objectives.\\n - **Discussion:** Interpret results, relate them to existing literature, and discuss implications.\\n\\n### 5. Choose the Right Journal\\n- **Journal Selection:** Consider journals where similar studies have been published. Check the scope, impact factor, and audience.\\n- **Target Appropriately:** Balance between high-impact and realistic targets. Consider open-access options for broader reach.\\n\\n### 6. Format and Submit\\n- **Formatting:** Adhere strictly to the journal’s guidelines, including citation style (e.g., APA, MLA).\\n- **Cover Letter:** Briefly introduce your manuscript, state its originality, and mention any conflicts of interest.\\n\\n### 7. Peer Review Process\\n- **Submission:** Use the journal’s online system. Be prepared for a wait, as peer review can take months.\\n- **Revisions:** If revisions are requested, address them thoroughly. Use feedback to improve your manuscript.\\n\\n### 8. Post-Acceptance\\n- **Production:** Review proofs carefully for errors. Ensure all figures and tables are correctly placed.\\n- **Promotion:** Share your paper on social media, academic platforms, and through your institution. Consider creating profiles for this purpose.\\n\\n### 9. Ethical and Legal Considerations\\n- **Plagiarism:** Use plagiarism checkers like Turnitin. Ensure all citations are correctly formatted.\\n- **Data Protection:** Anonymize sensitive data and obtain necessary consents.\\n\\n### 10. Persistence and Learning\\n- **Handle Rejections:** View rejections as part of the process. Use feedback to improve future submissions.\\n- **Stay Persistent:** Keep submitting and learning from each experience.\\n\\n### Additional Tips\\n- **Seek Mentorship:** Consult with professors or mentors for guidance and reviews.\\n- **Start Early:** Allow ample time for each step, especially peer review.\\n- **Collaborate:** Work with colleagues or peers to gain different perspectives and support.\\n\\nBy following these steps and maintaining persistence, you can successfully navigate the publication process and contribute valuable research to your field.\""
74
+ ]
75
+ },
76
+ "execution_count": 9,
77
+ "metadata": {},
78
+ "output_type": "execute_result"
79
+ }
80
+ ],
81
+ "source": [
82
+ "full_response"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 10,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "thought_process_match = re.search(r\"<think>\\s*(.*?)\\s*</think>\", full_response, re.DOTALL)\n",
92
+ "thought_process = thought_process_match.group(1) if thought_process_match else \"\"\n",
93
+ "\n",
94
+ "actual_response = re.sub(r\"<think>.*?</think>\\s*\", \"\", full_response, flags=re.DOTALL)"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": 12,
100
+ "metadata": {},
101
+ "outputs": [
102
+ {
103
+ "data": {
104
+ "text/plain": [
105
+ "\"Okay, so I want to get my first paper published, but I'm not really sure where to start. I've heard that publishing research is a big deal in academia, but the process seems pretty intimidating. Let me try to break this down step by step.\\n\\nFirst, I think I need to have some research done. I remember my professor mentioning that I should start by identifying a gap in the literature. But wait, how do I even find that gap? I guess I need to read a lot of papers in my field. Maybe I can start by looking at some recent studies and see where there's something missing or where more research is needed. But I'm not exactly sure how to efficiently find these gaps. Do I just read everything and hope I spot something? That seems time-consuming.\\n\\nOnce I have an idea, I need to design a study. I'm a bit confused about the methodology part. Should I go for an experimental approach or maybe a review? I think it depends on what I'm researching. If I'm testing a hypothesis, an experiment makes sense, but if I'm synthesizing existing information, a review might be better. But I'm not sure which one is more likely to get accepted, especially as a first-time author.\\n\\nAfter designing the study, I need to conduct the research. This part might take a while. I'm worried about collecting enough data and ensuring it's reliable. What if my results aren't significant? Does that mean my paper won't get published? I guess even negative results can be valuable, but I'm not certain how journals view them.\\n\\nNext, I need to write the paper. I'm a bit overwhelmed by the structure: title, abstract, introduction, methods, results, discussion, conclusion, references. Each section has its own requirements. The abstract is supposed to summarize everything, but I'm not sure how to make it concise yet. The introduction needs to set up the problem, but I'm not confident in my ability to clearly state the research gap. I've heard that the discussion section is where I interpret the results, but I'm worried about overstepping and making unsupported claims.\\n\\nChoosing the right journal is another hurdle. There are so many journals out there, and each has different scopes and impact factors. How do I pick one that's a good fit? I don't want to aim too high and get rejected, but I also don't want to aim too low. Maybe I should look at where similar studies have been published. But how do I assess the impact factor? Is it just about the number, or are there other factors?\\n\\nOnce the paper is written, I need to format it according to the journal's guidelines. This includes things like citation style, font, margins, etc. I'm a bit nervous about missing some formatting detail and having the paper rejected because of that. Maybe I should double-check the guidelines multiple times or use a template.\\n\\nThen comes the submission process. I think most journals use online systems, so I'll need to create an account and upload my manuscript. I'm a bit confused about what to include besides the manuscript—like a cover letter. What should I write in the cover letter? Do I just state the title and that it's original work, or is there more to it?\\n\\nAfter submission, the waiting game begins. I've heard that peer review can take months, which is a long time. What happens if the reviewers reject my paper? I guess I can revise and resubmit, but I'm not sure how to handle negative feedback. It might be discouraging, especially if I'm really invested in the work.\\n\\nIf it gets accepted, I'll have to deal with the production process—proofreading, layouts, etc. I'm not sure how involved that is. Do I need to check every single detail, or is it handled by the journal? I hope they catch any mistakes, but I know I should still review it carefully.\\n\\nPromotion is something I hadn't thought about. After publication, I should probably share it on social media and maybe my institution's platform. But I'm not very active on these platforms, so I'm not sure how effective that will be. Maybe I should set up a few profiles just for this purpose.\\n\\nEthical considerations are important too. I need to make sure I didn't plagiarize anything, even accidentally. Using proper citations is crucial, but I'm sometimes unsure if I'm citing correctly. Also, if I used any data that's sensitive, I need to anonymize it properly. I should probably run my paper through a plagiarism checker before submission.\\n\\nStaying persistent is the last step. I know publishing can involve multiple rejections, but I'm not sure how to handle that mentally. It might be tough to keep going after a rejection, but I guess every successful researcher has been through that.\\n\\nOverall, I think the key steps are: identify a research gap, design the study, conduct the research, write the paper, choose the right journal, format correctly, submit, handle peer review, and then promote the work. But each step has its own challenges, and I'm not entirely confident in my ability to navigate them all. Maybe I should seek guidance from my professor or a mentor to help me through the process. They might have tips or can review my work before I submit it. I should also start early and give myself plenty of time, especially since peer review can take a while. It's a lot to take in, but breaking it down into manageable steps makes it feel more achievable.\""
106
+ ]
107
+ },
108
+ "execution_count": 12,
109
+ "metadata": {},
110
+ "output_type": "execute_result"
111
+ }
112
+ ],
113
+ "source": [
114
+ "thought_process"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 11,
120
+ "metadata": {},
121
+ "outputs": [
122
+ {
123
+ "data": {
124
+ "text/plain": [
125
+ "\"Publishing your first research paper is a significant academic milestone. Here's a structured approach to guide you through the process, addressing each step with clarity and confidence:\\n\\n### 1. Identify a Research Gap\\n- **Literature Review:** Begin by reading recent studies in your field. Use academic databases like PubMed, Google Scholar, or Web of Science to find relevant papers.\\n- **Spotting Gaps:** Look for areas where the current research is lacking or inconclusive. This could be a understudied population, a novel application of a method, or unanswered questions.\\n\\n### 2. Design Your Study\\n- **Methodology:** Choose between experimental, observational, or review studies based on your research question. Experimental designs test hypotheses, while reviews synthesize existing data.\\n- **Feasibility:** Ensure your study is feasible with available resources and time.\\n\\n### 3. Conduct the Research\\n- **Data Collection:** Collect data meticulously, ensuring reliability. Consider using pilot studies to test your methods.\\n- **Ethical Considerations:** Obtain necessary approvals (e.g., IRB) and ensure confidentiality and informed consent.\\n\\n### 4. Write the Paper\\n- **Structure:** Organize your paper into sections: Title, Abstract, Introduction, Methods, Results, Discussion, Conclusion, References.\\n - **Abstract:** Summarize your study succinctly, highlighting objectives, methods, results, and conclusions.\\n - **Introduction:** Clearly state the research gap and objectives.\\n - **Discussion:** Interpret results, relate them to existing literature, and discuss implications.\\n\\n### 5. Choose the Right Journal\\n- **Journal Selection:** Consider journals where similar studies have been published. Check the scope, impact factor, and audience.\\n- **Target Appropriately:** Balance between high-impact and realistic targets. Consider open-access options for broader reach.\\n\\n### 6. Format and Submit\\n- **Formatting:** Adhere strictly to the journal’s guidelines, including citation style (e.g., APA, MLA).\\n- **Cover Letter:** Briefly introduce your manuscript, state its originality, and mention any conflicts of interest.\\n\\n### 7. Peer Review Process\\n- **Submission:** Use the journal’s online system. Be prepared for a wait, as peer review can take months.\\n- **Revisions:** If revisions are requested, address them thoroughly. Use feedback to improve your manuscript.\\n\\n### 8. Post-Acceptance\\n- **Production:** Review proofs carefully for errors. Ensure all figures and tables are correctly placed.\\n- **Promotion:** Share your paper on social media, academic platforms, and through your institution. Consider creating profiles for this purpose.\\n\\n### 9. Ethical and Legal Considerations\\n- **Plagiarism:** Use plagiarism checkers like Turnitin. Ensure all citations are correctly formatted.\\n- **Data Protection:** Anonymize sensitive data and obtain necessary consents.\\n\\n### 10. Persistence and Learning\\n- **Handle Rejections:** View rejections as part of the process. Use feedback to improve future submissions.\\n- **Stay Persistent:** Keep submitting and learning from each experience.\\n\\n### Additional Tips\\n- **Seek Mentorship:** Consult with professors or mentors for guidance and reviews.\\n- **Start Early:** Allow ample time for each step, especially peer review.\\n- **Collaborate:** Work with colleagues or peers to gain different perspectives and support.\\n\\nBy following these steps and maintaining persistence, you can successfully navigate the publication process and contribute valuable research to your field.\""
126
+ ]
127
+ },
128
+ "execution_count": 11,
129
+ "metadata": {},
130
+ "output_type": "execute_result"
131
+ }
132
+ ],
133
+ "source": [
134
+ "actual_response"
135
+ ]
136
+ }
137
+ ],
138
+ "metadata": {
139
+ "kernelspec": {
140
+ "display_name": ".venv",
141
+ "language": "python",
142
+ "name": "python3"
143
+ },
144
+ "language_info": {
145
+ "codemirror_mode": {
146
+ "name": "ipython",
147
+ "version": 3
148
+ },
149
+ "file_extension": ".py",
150
+ "mimetype": "text/x-python",
151
+ "name": "python",
152
+ "nbconvert_exporter": "python",
153
+ "pygments_lexer": "ipython3",
154
+ "version": "3.11.1"
155
+ }
156
+ },
157
+ "nbformat": 4,
158
+ "nbformat_minor": 2
159
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit==1.42.0
2
+ python-dotenv==1.0.1
3
+ groq==0.20.0
4
+ pymupdf== 1.25.4
utils.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from dotenv import load_dotenv
4
+ from typing import Tuple
5
+ from groq import Groq
6
+
7
+ def get_api_key() -> str:
8
+ """
9
+ Get the api key to the LLM
10
+
11
+ Returns:
12
+ str: The api key to be used to reach the LLM
13
+ """
14
+ load_dotenv()
15
+ api_key = os.getenv('key')
16
+
17
+ return api_key
18
+
19
+
20
+ def get_response(prompt: str, api_key) -> Tuple[str, str]:
21
+ """
22
+ Gets the response of the LLM on the provided prompt.
23
+
24
+ Args:
25
+ prompt (str): The prompt to be plugged in
26
+
27
+ Returns:
28
+ tuple:
29
+ str: Includes the thinking part of the LLM, showing its thought process.
30
+ str: The actual answer to your prompt
31
+
32
+ """
33
+ client = Groq(api_key=api_key)
34
+ completion = client.chat.completions.create(
35
+ model="deepseek-r1-distill-llama-70b",
36
+ messages=[
37
+ {
38
+ 'role': 'user',
39
+ 'content': prompt
40
+ }
41
+ ],
42
+ temperature=0.6,
43
+ max_completion_tokens=4096,
44
+ top_p=0.95,
45
+ stream=True,
46
+ stop=None,
47
+ )
48
+
49
+ chunks = []
50
+ for chunk in completion:
51
+ current_chunk = chunk.choices[0].delta.content or ""
52
+ chunks.append(current_chunk)
53
+
54
+ full_response = "".join(chunks)
55
+
56
+
57
+ # Splitting the text
58
+ thought_process_match = re.search(r"<think>\s*(.*?)\s*</think>", full_response, re.DOTALL)
59
+
60
+ thought_process = thought_process_match.group(1) if thought_process_match else ""
61
+
62
+ actual_response = re.sub(r"<think>.*?</think>\s*", "", full_response, flags=re.DOTALL)
63
+
64
+ return thought_process, actual_response