Ploomber AI Editor | electricity-prediction-b199

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px
from datetime import datetime, timedelta

# Set page config
st.set_page_config(page_title="Electricity Consumption Predictor", layout="wide")

# Title
st.title("⚡ Electricity Consumption Prediction")

# Initialize or load historical data
if 'historical_data' not in st.session_state:
    # Generate sample historical data
    dates = pd.date_range(start='2020-01-01', end='2023-12-31', freq='D')
    np.random.seed(42)
    
    # Create synthetic consumption data with seasonal patterns
    consumption = np.sin(np.arange(len(dates)) * 2 * np.pi / 365) * 100 + \
                 np.random.normal(500, 50, len(dates))
    
    st.session_state.historical_data = pd.DataFrame({
        'date': dates,
        'consumption': consumption
    })

# Sidebar inputs
st.sidebar.header("Input Parameters")

# Date selection
selected_date = st.sidebar.date_input(
    "Select date",
    value=datetime.now(),
    min_value=st.session_state.historical_data.date.min(),
    max_value=st.session_state.historical_data.date.max()
)

# Number of days for prediction
prediction_days = st.sidebar.slider(
    "Number of days to predict",
    min_value=1,
    max_value=90,
    value=30
)

def prepare_features(df):
    """Prepare features for the model."""
    df = df.copy()
    df['month'] = df.date.dt.month
    df['day'] = df.date.dt.day
    df['day_of_week'] = df.date.dt.dayofweek
    df['day_of_year'] = df.date.dt.dayofyear
    return df

def train_model():
    """Train the prediction model."""
    df = prepare_features(st.session_state.historical_data)
    
    # Prepare features and target
    features = ['month', 'day', 'day_of_week', 'day_of_year']
    X = df[features]
    y = df['consumption']
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Calculate metrics
    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)
    
    metrics = {
        'train_r2': r2_score(y_train, train_pred),
        'test_r2': r2_score(y_test, test_pred),
        'train_rmse': np.sqrt(mean_squared_error(y_train, train_pred)),
        'test_rmse': np.sqrt(mean_squared_error(y_test, test_pred))
    }
    
    return model, metrics

def generate_future_dates(start_date, days):
    """Generate future dates for prediction."""
    future_dates = pd.date_range(start=start_date, periods=days + 1)[1:]
    future_df = pd.DataFrame({'date': future_dates})
    return prepare_features(future_df)

# Train model and make predictions
if st.sidebar.button("Generate Prediction"):
    with st.spinner("Training model and generating predictions..."):
        # Train model
        model, metrics = train_model()
        
        # Generate future dates and make predictions
        future_data = generate_future_dates(selected_date, prediction_days)
        predictions = model.predict(future_data[['month', 'day', 'day_of_week', 'day_of_year']])
        
        # Create prediction DataFrame
        prediction_df = pd.DataFrame({
            'date': future_data.date,
            'predicted_consumption': predictions
        })
        
        # Display metrics
        st.header("Model Performance Metrics")
        col1, col2, col3, col4 = st.columns(4)
        col1.metric("Training R²", f"{metrics['train_r2']:.3f}")
        col2.metric("Test R²", f"{metrics['test_r2']:.3f}")
        col3.metric("Training RMSE", f"{metrics['train_rmse']:.2f}")
        col4.metric("Test RMSE", f"{metrics['test_rmse']:.2f}")
        
        # Plot historical data and predictions
        st.header("Consumption Prediction")
        
        # Prepare data for plotting
        historical_plot_data = st.session_state.historical_data[
            st.session_state.historical_data.date >= selected_date - timedelta(days=90)
        ]
        
        fig = px.line()
        
        # Add historical data
        fig.add_scatter(
            x=historical_plot_data.date,
            y=historical_plot_data.consumption,
            name="Historical",
            line=dict(color="blue")
        )
        
        # Add predictions
        fig.add_scatter(
            x=prediction_df.date,
            y=prediction_df.predicted_consumption,
            name="Predicted",
            line=dict(color="red", dash="dash")
        )
        
        fig.update_layout(
            title="Electricity Consumption: Historical vs Predicted",
            xaxis_title="Date",
            yaxis_title="Consumption (kWh)",
            hovermode="x unified"
        )
        
        st.plotly_chart(fig, use_container_width=True)
        
        # Display prediction table
        st.header("Detailed Predictions")
        st.dataframe(prediction_df.style.format({
            'predicted_consumption': '{:.2f}'
        }))
        
        # Download predictions
        csv = prediction_df.to_csv(index=False)
        st.download_button(
            label="Download Predictions",
            data=csv,
            file_name="electricity_predictions.csv",
            mime="text/csv"
        )
else:
    st.info("👈 Adjust the parameters in the sidebar and click 'Generate Prediction' to start")

# Add explanatory notes
st.markdown("""
---
### How it works:
1. The model uses historical electricity consumption data to learn patterns
2. Features used for prediction include:
   - Month of the year
   - Day of the month
   - Day of the week
   - Day of the year
3. The prediction is generated using a Random Forest model
4. The model takes into account seasonal patterns and trends

### Notes:
- Historical data is synthetic and used for demonstration
- For real applications, you should use actual historical consumption data
- The model can be improved by adding more features like weather data, holidays, etc.
""")
Loading code editor...
Click Save & Run to preview your app
Terminal