import streamlit as st from PIL import Image from streamlit_lottie import st_lottie import json import os import glob from streamlit_option_menu import option_menu from projects import display_projects #setting layout to wide st.set_page_config(layout="wide") # Load CSS for styling with a minimalist grey background with open("style.css") as f: css_content = f.read() css_content += ''' body { background-color: #f0f2f6; } ''' st.markdown(''.format(css_content), unsafe_allow_html=True) def load_lottiefile(filepath: str): with open(filepath, "r") as file: return json.load(file) def display_header(): st.write(''' # Nihar Palem ##### ''') # Assuming you have a Lottie animation to display lottie_animation = load_lottiefile("bio.json") st_lottie(lottie_animation, height=300, key="header_animation") def display_summary(): #st.markdown('## Summary', unsafe_allow_html=True) st.markdown(""" Hello! I'm **Sai Nihar Reddy Palem**, an Applied AI Engineer, Data Scientist, and AI Researcher based in San Jose, California. Originally from Hyderabad, India, I've embarked on a transformative journey from Electrical Engineering to becoming a passionate AI researcher exploring the frontiers of artificial intelligence. My path began with a solid engineering foundation, evolved through diverse industry experiences across data engineering and analytics, and reached new heights with my **Master's degree in Applied Data Science** from San Jose State University. Over the past 2+ years, I've immersed myself in the cutting-edge world of multimodal AI, Large Language Model fine-tuning, and multi-agent architectures, consistently pushing the boundaries between theoretical research and practical implementation. Currently, I'm working with Google on bettering their multimodal capabilities, contributing to the advancement of state-of-the-art AI systems. What drives me is the exciting challenge of systematically translating breakthrough research papers into production-ready solutions that create real-world impact. From achieving 12% performance improvements through advanced LLM optimization techniques to actively participating and learning from technical events like hackathons where I've built innovative multi-agent systems in just 5 hours (most recent), my journey reflects a deep commitment to both research excellence and practical innovation. I've contributed to open-source projects that have garnered 10,000+ community interactions, developed comprehensive evaluation frameworks for state-of-the-art models like Gemini 3.0, and created an applications that democratize AI technology for businesses and individuals alike. **What You'll Find in This Portfolio**: Education, Work Experience, Projects, Skills, Research Notes, Social Media, Open Source Applications, Awards """) def display_education(): st.markdown('## Education') st.write(""" - **Masters In Data Analytics**, *San Jose State University*, USA (2023-2024) - Courses: Data Mining, Deep Learning, Big Data Technologies, Data Visualization, Machine Learning, Database Management Systems - Achievements: - A Grade in Deep Learning - **Bachelor of Technology (B.Tech) in Electrical and Electronics Engineering (EEE)**, *Sreenidhi Institute of Science and Technology (SNIST)*, Hyderabad (2015-2019) - Activities: - Memeber of the Robotics Club:, built line follower and theft-alert detection bots. - Member of the college cricket team; Runner up in City-level tournament """) def display_work_experience(): st.markdown('## Work Experience') st.write(""" **Turing, San Jose, CA, USA** March 2024 - Present - **Data Scientist & Applied AI Engineer** - Collaborated with AI engineers, product teams, researchers, and Google DeepMind team to integrate LLM evaluation systems into production workflows using PyTorch and distributed computing - Engineered comprehensive evaluation benchmarks for Gemini 3.0 by analyzing reasoning loss patterns and image loss patterns in state-of-the-art Vision-Language Models (VLMs) including o3 and Gemini 2.5 Pro, developing custom datasets across multiple domains (mathematics, finance, chemistry, biology) spanning educational levels from high-school through PhD with statistical validation methods - Implemented advanced LLM fine-tuning strategies for Qwen model including Parameter-Efficient Fine-Tuning (PEFT) with LoRA and 2-stage whole model training on multi-GPU clusters, achieving 12% performance improvement across 15+ categories - Developed "auto hinter" system to improve LLM reasoning, guiding models towards correct answers based on question complexity, resulting in 8% performance increment on PhD-level questions - Built "auto rater" system to assess responses from leading models like Gemini 2.5 Pro and o3 custom builds, scoring across four key dimensions: completeness, coherence, clarity, correctness, style and formatting **San Jose State University, San Jose, CA, USA** August 2024 - December 2024 - **Teaching Assistant** - Mentored 80+ graduate students on data modeling projects, providing feedback on technical documentation - Reviewed and debugged student data pipelines, offering solutions for data analysis and ML model challenges - Improved student performance, with 75% of mentored students achieving an 'A' grade -Conducted weekly office hours to assist students with complex data science concepts and project implementations **Bharat Electronics Limited, Hyderabad, India** February 2021 - March 2022 - **Data Analyst** - Optimized SQL queries for sales and payroll databases using indexes and CTEs, reducing execution times by 40% - Developed and maintained 20+ Tableau dashboards, reducing production costs by 15% and improving sales forecasts by 10% - Implemented automated billing checks using SQL procedures, reducing financial discrepancies by 30% - Optimized ETL pipelines with staging tables and data quality checks, increasing ingestion efficiency by 25% **Technical Writer** 2023-Present - Embarked on a new journey in 2023 as a technical writer, sharing insights and developments in data science and data engineering with a growing audience. - Authored numerous articles that explore complex topics in an accessible and informative manner, focusing on AI, data science, machine learning and data engineering. - This new habit aims to educate and inspire, bridging the gap between technical expertise and practical application in the modern data landscape. - Find my work on [Medium](https://medium.com/@nihar-palem) and [Substack](https://niharpalem.substack.com/publish/posts). """) def display_skills(): st.title('Skills') # Define tab titles tab_titles = [ "Programming & Core", "AI & ML", "Data Engineering", "Data Architecture", "Visualization", "Specialized Systems", "Multimodal AI", "LLM & Advanced AI" ] # Create tabs tabs = st.tabs(tab_titles) # Programming & Core Technologies with tabs[0]: st.subheader("Programming & Core Technologies") st.markdown(""" - **Programming Languages**: - Python (Advanced) - SQL (Advanced) - Shell Scripting - **Database Systems**: - Relational: MySQL, PostgreSQL - NoSQL: MongoDB - Data Warehouses: Snowflake, Redshift - Vector Databases: FAISS, Pinecone - **Development Tools**: - Version Control: Git, GitHub - Containerization: Docker - Orchestration: Kubernetes (Basic) - IDE: VS Code, PyCharm - Microsoft Office Suite - **Frameworks & Libraries**: - LangChain - Hugging Face (Transformers, Diffusers) - Scikit-Learn, Pandas, NumPy - Apache Spark """) # AI & Machine Learning with tabs[1]: st.subheader("AI & Machine Learning") st.markdown(""" - **Machine Learning Frameworks**: - PyTorch (Advanced, PyTorch Distributed, DDP) - TensorFlow - Scikit-Learn - XGBoost, Random Forest, AdaBoost - **Deep Learning**: - Vision Transformers (ViT) - Vision Language Models - Large Language Models - Sentecne Transformers - Diffusion Models - ResNet Architectures - Neural Networks - BiLSTM - **Distributed Training**: - Multi-GPU Clusters (16+ GPUs) - PyTorch DDP (Distributed Data Parallel) - DeepSpeed - Megatron - CUDA Acceleration - FlashAttention - **Computer Vision**: - MediaPipe - OpenCV - Image Processing Pipelines - Satellite Imagery Analysis - **Model Optimization**: - Model Compression (Quantization, Distillation) - Performance Optimization - CUDA Programming - Parallel Processing """) # Data Engineering & Cloud with tabs[2]: st.subheader("Data Engineering & Cloud") st.markdown(""" - **Cloud Platforms**: - AWS (Certified - Lambda, S3, Glue, EC2, Redshift) - Google Cloud Platform (GCP) - Cloud Architecture Design - **Big Data Technologies**: - Apache Spark (PySpark) - Apache Airflow - BigQuery - Hadoop Ecosystem - **Data Pipeline Tools**: - ETL/ELT Pipeline Design - Workflow Orchestration - Concurrent Processing - Real-time Data Streaming - ThreadPoolExecutor Optimization - **Infrastructure**: - CI/CD Pipelines (GitHub Actions) - Infrastructure as Code - Kubernetes Basics - Production Monitoring - Distributed Training Clusters """) # Data Architecture & Analytics with tabs[3]: st.subheader("Data Architecture & Analytics") st.markdown(""" - **Data Modeling**: - OLAP/OLTP Systems - Star/Snowflake Schema - Data Normalization - Database Optimization - **Analytics Techniques**: - Streaming Analytics - Batch Processing - Time Series Analysis - Statistical Analysis - A/B Testing - Hypothesis Testing - **Data Processing**: - Pandas, NumPy - Data Wrangling - Feature Engineering - Data Quality Assurance - Data Quality Management - **Performance Optimization**: - Query Optimization - Indexing Strategies - Caching Mechanisms - SQL Performance Tuning """) # Visualization & Deployment with tabs[4]: st.subheader("Visualization & Tools") st.markdown(""" - **Business Intelligence**: - Tableau - Power BI - Dashboard Design - KPI Monitoring - **Technical Visualization**: - Plotly - Seaborn - Matplotlib - Interactive Charts - **Deployment & Interface**: - Streamlit - Web Development - Hugging Face Spaces - **Collaboration Tools**: - JIRA - Notion - Git Workflow - Agile Methodologies """) # Specialized Systems with tabs[5]: st.subheader("Specialized Systems") st.markdown(""" - **Recommender Systems**: - Hybrid Filtering Techniques - Content-Based Filtering - Collaborative Filtering - Matrix Factorization (SVD) - **Ensemble Methods**: - Multi-model Consensus Systems - Classifier Combinations - Voting Systems - Stacking Implementations - **Performance Optimization**: - CUDA Acceleration - Parallel Processing - Resource Management - Scalability Design - **Custom Solutions**: - Natural Language Processing - Computer Vision Systems - Time Series Forecasting - Anomaly Detection - Real-time Web Scraping - Automated Data Quality Checks """) # Multimodal AI with tabs[6]: st.subheader("Multimodal AI") st.markdown(""" - **Vision-Language Models**: - Qwen-VL - Gemini Multimodal - Vision-Language Understanding - Cross-modal Fine-tuning - Multimodal Evaluation - **Visual AI**: - Visual Question Answering (VQA) - Vision Transformers (ViT) - Stable Diffusion XL - Generative AI (Vision) - Image-Text Alignment - **Multi-Agent Systems**: - Multi-Agent Multimodal Workflows - Strategic Agent Architecture - Visual Agent Integration - QA Agent Implementation - **Evaluation & Testing**: - Multimodal Benchmarking - Cross-modal Bias Detection - Performance Optimization - Adversarial Testing - Statistical Validation Methods """) # LLM & Advanced AI with tabs[7]: st.subheader("LLM & Advanced AI") st.markdown(""" - **Large Language Models**: - Fine-tuning (PEFT, LoRA, QLoRA) - 2-Stage Training - VLLM/LMMs - Qwen, LLaMA (Llama-3.1-8B), GPT Integration - **Advanced Techniques**: - Prompt Engineering (Advanced, Context Injection) - RAG (Retrieval-Augmented Generation) - LLM Evaluation Benchmarking - LLM-as-judge - Auto Hinter Systems - **Production AI Systems**: - Multi-Agent Systems - API Integration - Performance Optimization - Tenstorrent Hardware Utilization - MLOps - **Specialized Applications**: - Semantic Job Matching - Resume Generation - Marketing Campaign Automation - Infrastructure Change Detection - Exercise Pose Correction - **AI Testing & Validation**: - Unit/Integration Testing for AI - Offline Evaluation Frameworks - Model Validation - ROC Curve Analysis - RMSE Validation - Bias Mitigation """) def display_articles(): """Display articles from HTML files in the articles directory""" st.markdown('## Articles') # Define the articles directory path articles_dir = "articles" # You can change this path as needed # Check if articles directory exists if not os.path.exists(articles_dir): st.warning(f"Articles directory '{articles_dir}' not found. Please create the directory and add your HTML files.") st.info("Create an 'articles' folder in your project directory and add your HTML files there.") return # Get all HTML files from the articles directory html_files = glob.glob(os.path.join(articles_dir, "*.html")) if not html_files: st.info("No HTML articles found in the articles directory. Add some .html files to get started!") return # Sort files by name for consistent ordering html_files.sort() st.markdown("Click on any article below to view:") st.markdown("") # Add some space # Display each article as a clean clickable card for i, html_file in enumerate(html_files): # Extract filename without path and extension, format it nicely file_name = os.path.splitext(os.path.basename(html_file))[0] display_name = file_name.replace('_', ' ').replace('-', ' ').title() # Get file size try: file_size = os.path.getsize(html_file) size_kb = round(file_size / 1024, 1) size_text = f"{size_kb} KB" except: size_text = "- KB" # Create a clean card-like button article_clicked = st.button( f"📄 {display_name} ({size_text})", key=f"article_{i}", use_container_width=True ) if article_clicked: try: with open(html_file, 'r', encoding='utf-8') as file: html_content = file.read() # Display article content inline st.markdown("---") st.markdown(f"### {display_name}") # Add download option col1, col2 = st.columns([1, 4]) with col1: st.download_button( label="⬇️ Download", data=html_content, file_name=os.path.basename(html_file), mime="text/html", key=f"download_clicked_{i}" ) with col2: if st.button("❌ Close Article", key=f"close_{i}"): st.rerun() # Display the HTML content st.components.v1.html(html_content, height=600, scrolling=True) except Exception as e: st.error(f"Error loading article: {str(e)}") # Add small spacing between articles st.markdown("") def display_apps(): st.markdown('## Apps') st.markdown(""" - [CNN arch](https://cnn-arch.streamlit.app/) """) st.markdown(""" - [TuNNe](https://huggingface.co/spaces/Niharmahesh/TuNNe) """) def display_certifications(): st.markdown('## Certifications') certifications = [ {"title": "Python for Data Science and Machine Learning Bootcamp", "issuer": "Udemy", "date": "2023", "skills": "Python, Data Science, Machine Learning"}, {"title": "HackerRank SQL (Basic)", "issuer": "HackerRank", "date": "2023", "skills": "SQL, Database Management"}, {"title": "AWS Cloud Practitioner", "issuer": "Udemy", "date": "2023", "skills": "Cloud Computing, AWS Services"}, {"title": "AWS Certified Cloud Practitioner", "issuer": "Amazon Web Services", "date": "2023", "skills": "Cloud Architecture, AWS Best Practices"} ] for cert in certifications: with st.expander(cert["title"]): st.write(f"**Issuer:** {cert['issuer']}") st.write(f"**Date:** {cert['date']}") st.write(f"**Skills:** {cert['skills']}") def display_social_media(): st.markdown('## Social Media') st.markdown(""" - [LinkedIn](https://www.linkedin.com/in/sai-nihar-1b955a183/) - [GitHub](https://github.com/niharpalem) - [Medium](https://medium.com/@nihar-palem) - [Twitter](https://twitter.com/niharpalem_2497) - [Email](mailto:sainiharreddy.palem@sjsu.edu) """) # Updated menu with articles section menu_items_with_icons = { "🎓": display_education, "💼": display_work_experience, "📁": display_projects, "🛠️": display_skills, "📝": display_articles, # New articles section "🌐": display_social_media, "🏆": display_certifications, "📱": display_apps } def main(): # Initialize session state for selected function if 'selected_function' not in st.session_state: st.session_state.selected_function = None # Default to None to not display any section initially # Display the header with your name and Lottie animation first display_header() # Display the summary section immediately after the header display_summary() # Create a row of buttons for each icon in the menu cols = st.columns(len(menu_items_with_icons)) for col, (icon, func) in zip(cols, menu_items_with_icons.items()): if col.button(icon): # Update the session state to the selected function st.session_state.selected_function = func # If a function has been selected, call it if st.session_state.selected_function is not None: st.session_state.selected_function() if __name__ == "__main__": main()