Spaces:
Running
Running
# config.py | |
import os | |
class ResearchConfig: | |
# Environment & API configuration | |
DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY") | |
CHROMA_PATH = "chroma_db" | |
# Document processing settings | |
CHUNK_SIZE = 512 | |
CHUNK_OVERLAP = 64 | |
MAX_CONCURRENT_REQUESTS = 5 | |
EMBEDDING_DIMENSIONS = 1536 | |
# Mapping of documents to research topics | |
DOCUMENT_MAP = { | |
"Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%": | |
"CV-Transformer Hybrid Architecture", | |
"Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing": | |
"Transformer Architecture Analysis", | |
"Latest Trends in Machine Learning Methods Using Quantum Computing": | |
"Quantum ML Frontiers" | |
} | |
# Template for detailed analysis using Markdown and LaTeX formatting | |
ANALYSIS_TEMPLATE = ( | |
"Let's think step by step. Synthesize a comprehensive technical report based on the following documents. " | |
"Focus on identifying the key innovations, empirical results, and potential limitations. Explicitly state any assumptions made during your analysis. " | |
"The report MUST be valid Markdown, and all mathematical notation MUST be correctly formatted LaTeX (e.g., `E=mc^2`).\n\n" | |
"Documents:\n{context}\n\n" | |
"Respond with the following structure:\n" | |
"# Technical Analysis Report\n\n" | |
"1. **Key Technical Contributions:** (Bullet points highlighting the main innovations)\n" | |
"2. **Novel Methodologies:** (Detailed explanation of the new methods used)\n" | |
"3. **Empirical Results:** (Quantitative results with specific metrics, e.g., accuracy, precision, recall, F1-score. Include confidence intervals where appropriate.)\n" | |
"4. **Potential Applications:** (Real-world applications of the technology)\n" | |
"5. **Limitations and Future Directions:** (Current limitations and suggestions for future research)\n\n" | |
"Format: Markdown with LaTeX mathematical notation where applicable." | |
) | |
# Domain-specific fallback analyses and prompts | |
DOMAIN_FALLBACKS = { | |
"biomedical research": """ | |
# Biomedical Research Analysis | |
## Key Contributions | |
- Integration of clinical trial design with digital biomarkers. | |
- Multi-omics data used for precise patient stratification. | |
## Methodologies | |
- Machine learning for precision medicine. | |
- Federated learning for multi-center trials. | |
## Empirical Results | |
- Significant improvements in patient outcomes. | |
## Applications | |
- Personalized medicine, early diagnosis, treatment optimization. | |
""", | |
"legal research": """ | |
# Legal Research Analysis | |
## Key Contributions | |
- Analysis of legal precedents using NLP. | |
- Advanced case law retrieval and summarization. | |
## Methodologies | |
- Automated legal reasoning with transformer models. | |
- Sentiment analysis on judicial opinions. | |
## Empirical Results | |
- Improved accuracy in predicting case outcomes. | |
## Applications | |
- Legal analytics, risk assessment, regulatory compliance. | |
""", | |
"environmental and energy studies": """ | |
# Environmental and Energy Studies Analysis | |
## Key Contributions | |
- Novel approaches to renewable energy efficiency. | |
- Integration of policy analysis with technical metrics. | |
## Methodologies | |
- Simulation models for climate impact. | |
- Data fusion from sensor networks and satellite imagery. | |
## Empirical Results | |
- Enhanced performance in energy forecasting. | |
## Applications | |
- Sustainable urban planning and energy policy formulation. | |
""", | |
"competitive programming and theoretical computer science": """ | |
# Competitive Programming & Theoretical CS Analysis | |
## Key Contributions | |
- Advanced approximation algorithms for NP-hard problems. | |
- Use of parameterized complexity and fixed-parameter tractability. | |
## Methodologies | |
- Branch-and-bound combined with dynamic programming. | |
- Quantum-inspired algorithms for optimization. | |
## Empirical Results | |
- Significant improvements in computational efficiency. | |
## Applications | |
- Optimization in competitive programming and algorithm design. | |
""", | |
"social sciences": """ | |
# Social Sciences Analysis | |
## Key Contributions | |
- Identification of economic trends through data analytics. | |
- Integration of sociological data with computational models. | |
## Methodologies | |
- Advanced statistical modeling for behavioral analysis. | |
- Machine learning for trend forecasting. | |
## Empirical Results | |
- High correlation with traditional survey methods. | |
## Applications | |
- Policy design, urban studies, social impact analysis. | |
""" | |
} | |
DOMAIN_PROMPTS = { | |
"biomedical research": """ | |
Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs. For example, discuss how a new drug might impact patient survival rates or how a new diagnostic technique might improve early detection of a disease. Discuss specific clinical studies if available. | |
""", | |
"legal research": """ | |
Emphasize legal precedents, case law, and nuanced statutory interpretations. For example, when analyzing a case, identify the key holdings, explain the legal reasoning behind the decision, and compare it to other relevant cases. If a statute is involved, discuss how the court interpreted the statute and whether there are any ambiguities or conflicts with other laws. | |
""", | |
"environmental and energy studies": """ | |
Highlight renewable energy technologies, efficiency metrics, and policy implications. Provide specific data points on energy consumption and environmental impact. For instance, compare the energy efficiency of solar panels from different manufacturers, or discuss the impact of a specific environmental regulation on air quality. | |
""", | |
"competitive programming and theoretical computer science": """ | |
Focus on algorithmic complexity, innovative proofs, and computational techniques. For example, analyze the time and space complexity of a new algorithm, or explain the key steps in a mathematical proof. Include pseudocode or code snippets where appropriate. | |
""", | |
"social sciences": """ | |
Concentrate on economic trends, sociological data, and correlations impacting public policy. For example, analyze the impact of a new social program on poverty rates, or discuss the relationship between education levels and income inequality. Cite specific studies and statistical data to support your claims. | |
""" | |
} | |
# Ensemble model settings | |
ENSEMBLE_MODELS = { | |
"deepseek-chat": {"max_tokens": 2000, "temp": 0.7}, | |
"deepseek-coder": {"max_tokens": 2500, "temp": 0.5} | |
} | |
# CLIP model settings for image embeddings | |
CLIP_SETTINGS = { | |
"model": "openai/clip-vit-large-patch14", | |
"image_db": "image_vectors" | |
} | |