mgbam commited on
Commit
5141f31
·
verified ·
1 Parent(s): efa51c3

Create config.py

Browse files
Files changed (1) hide show
  1. config.py +138 -0
config.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+ import os
3
+
4
+ class ResearchConfig:
5
+ # Environment & API configuration
6
+ DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
7
+ CHROMA_PATH = "chroma_db"
8
+
9
+ # Document processing settings
10
+ CHUNK_SIZE = 512
11
+ CHUNK_OVERLAP = 64
12
+ MAX_CONCURRENT_REQUESTS = 5
13
+ EMBEDDING_DIMENSIONS = 1536
14
+
15
+ # Mapping of documents to research topics
16
+ DOCUMENT_MAP = {
17
+ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
18
+ "CV-Transformer Hybrid Architecture",
19
+ "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
20
+ "Transformer Architecture Analysis",
21
+ "Latest Trends in Machine Learning Methods Using Quantum Computing":
22
+ "Quantum ML Frontiers"
23
+ }
24
+
25
+ # Template for detailed analysis using Markdown and LaTeX formatting
26
+ ANALYSIS_TEMPLATE = (
27
+ "Let's think step by step. Synthesize a comprehensive technical report based on the following documents. "
28
+ "Focus on identifying the key innovations, empirical results, and potential limitations. Explicitly state any assumptions made during your analysis. "
29
+ "The report MUST be valid Markdown, and all mathematical notation MUST be correctly formatted LaTeX (e.g., `E=mc^2`).\n\n"
30
+ "Documents:\n{context}\n\n"
31
+ "Respond with the following structure:\n"
32
+ "# Technical Analysis Report\n\n"
33
+ "1. **Key Technical Contributions:** (Bullet points highlighting the main innovations)\n"
34
+ "2. **Novel Methodologies:** (Detailed explanation of the new methods used)\n"
35
+ "3. **Empirical Results:** (Quantitative results with specific metrics, e.g., accuracy, precision, recall, F1-score. Include confidence intervals where appropriate.)\n"
36
+ "4. **Potential Applications:** (Real-world applications of the technology)\n"
37
+ "5. **Limitations and Future Directions:** (Current limitations and suggestions for future research)\n\n"
38
+ "Format: Markdown with LaTeX mathematical notation where applicable."
39
+ )
40
+
41
+ # Domain-specific fallback analyses and prompts
42
+ DOMAIN_FALLBACKS = {
43
+ "biomedical research": """
44
+ # Biomedical Research Analysis
45
+ ## Key Contributions
46
+ - Integration of clinical trial design with digital biomarkers.
47
+ - Multi-omics data used for precise patient stratification.
48
+ ## Methodologies
49
+ - Machine learning for precision medicine.
50
+ - Federated learning for multi-center trials.
51
+ ## Empirical Results
52
+ - Significant improvements in patient outcomes.
53
+ ## Applications
54
+ - Personalized medicine, early diagnosis, treatment optimization.
55
+ """,
56
+ "legal research": """
57
+ # Legal Research Analysis
58
+ ## Key Contributions
59
+ - Analysis of legal precedents using NLP.
60
+ - Advanced case law retrieval and summarization.
61
+ ## Methodologies
62
+ - Automated legal reasoning with transformer models.
63
+ - Sentiment analysis on judicial opinions.
64
+ ## Empirical Results
65
+ - Improved accuracy in predicting case outcomes.
66
+ ## Applications
67
+ - Legal analytics, risk assessment, regulatory compliance.
68
+ """,
69
+ "environmental and energy studies": """
70
+ # Environmental and Energy Studies Analysis
71
+ ## Key Contributions
72
+ - Novel approaches to renewable energy efficiency.
73
+ - Integration of policy analysis with technical metrics.
74
+ ## Methodologies
75
+ - Simulation models for climate impact.
76
+ - Data fusion from sensor networks and satellite imagery.
77
+ ## Empirical Results
78
+ - Enhanced performance in energy forecasting.
79
+ ## Applications
80
+ - Sustainable urban planning and energy policy formulation.
81
+ """,
82
+ "competitive programming and theoretical computer science": """
83
+ # Competitive Programming & Theoretical CS Analysis
84
+ ## Key Contributions
85
+ - Advanced approximation algorithms for NP-hard problems.
86
+ - Use of parameterized complexity and fixed-parameter tractability.
87
+ ## Methodologies
88
+ - Branch-and-bound combined with dynamic programming.
89
+ - Quantum-inspired algorithms for optimization.
90
+ ## Empirical Results
91
+ - Significant improvements in computational efficiency.
92
+ ## Applications
93
+ - Optimization in competitive programming and algorithm design.
94
+ """,
95
+ "social sciences": """
96
+ # Social Sciences Analysis
97
+ ## Key Contributions
98
+ - Identification of economic trends through data analytics.
99
+ - Integration of sociological data with computational models.
100
+ ## Methodologies
101
+ - Advanced statistical modeling for behavioral analysis.
102
+ - Machine learning for trend forecasting.
103
+ ## Empirical Results
104
+ - High correlation with traditional survey methods.
105
+ ## Applications
106
+ - Policy design, urban studies, social impact analysis.
107
+ """
108
+ }
109
+
110
+ DOMAIN_PROMPTS = {
111
+ "biomedical research": """
112
+ Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs. For example, discuss how a new drug might impact patient survival rates or how a new diagnostic technique might improve early detection of a disease. Discuss specific clinical studies if available.
113
+ """,
114
+ "legal research": """
115
+ Emphasize legal precedents, case law, and nuanced statutory interpretations. For example, when analyzing a case, identify the key holdings, explain the legal reasoning behind the decision, and compare it to other relevant cases. If a statute is involved, discuss how the court interpreted the statute and whether there are any ambiguities or conflicts with other laws.
116
+ """,
117
+ "environmental and energy studies": """
118
+ Highlight renewable energy technologies, efficiency metrics, and policy implications. Provide specific data points on energy consumption and environmental impact. For instance, compare the energy efficiency of solar panels from different manufacturers, or discuss the impact of a specific environmental regulation on air quality.
119
+ """,
120
+ "competitive programming and theoretical computer science": """
121
+ Focus on algorithmic complexity, innovative proofs, and computational techniques. For example, analyze the time and space complexity of a new algorithm, or explain the key steps in a mathematical proof. Include pseudocode or code snippets where appropriate.
122
+ """,
123
+ "social sciences": """
124
+ Concentrate on economic trends, sociological data, and correlations impacting public policy. For example, analyze the impact of a new social program on poverty rates, or discuss the relationship between education levels and income inequality. Cite specific studies and statistical data to support your claims.
125
+ """
126
+ }
127
+
128
+ # Ensemble model settings
129
+ ENSEMBLE_MODELS = {
130
+ "deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
131
+ "deepseek-coder": {"max_tokens": 2500, "temp": 0.5}
132
+ }
133
+
134
+ # CLIP model settings for image embeddings
135
+ CLIP_SETTINGS = {
136
+ "model": "openai/clip-vit-large-patch14",
137
+ "image_db": "image_vectors"
138
+ }