Ploomber AI Editor | csv-analyzer-727e

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
from st_aggrid import AgGrid, GridOptionsBuilder
import io

st.set_page_config(layout="wide")
st.title("CSV Editor & Analyzer")

# File upload
uploaded_file = st.file_uploader("Upload your CSV file", type=['csv'])

if uploaded_file is not None:
    # Read the CSV file
    df = pd.read_csv(uploaded_file)
    
    # Save the dataframe to session state if not already there
    if 'df' not in st.session_state:
        st.session_state.df = df
    
    # Create tabs for different functionalities
    tab1, tab2, tab3 = st.tabs(["Data Editor", "Visualizations", "Analysis"])
    
    with tab1:
        st.header("Data Editor")
        
        # Configure grid options
        gb = GridOptionsBuilder.from_dataframe(st.session_state.df)
        gb.configure_default_column(editable=True, sorteable=True, filterable=True)
        gb.configure_grid_options(enableRangeSelection=True)
        gb.configure_side_bar()
        
        # Create the AgGrid
        grid_response = AgGrid(
            st.session_state.df,
            gridOptions=gb.build(),
            height=400,
            enable_enterprise_modules=True,
            update_mode='MODEL_CHANGED'
        )
        
        # Update the dataframe in session state
        st.session_state.df = grid_response['data']
        
        # Download button for edited data
        if st.button("Download edited CSV"):
            csv = st.session_state.df.to_csv(index=False)
            st.download_button(
                label="Download CSV",
                data=csv,
                file_name="edited_data.csv",
                mime="text/csv"
            )
    
    with tab2:
        st.header("Visualizations")
        
        # Select columns for visualization
        numeric_cols = st.session_state.df.select_dtypes(include=['float64', 'int64']).columns
        
        if len(numeric_cols) > 0:
            # Scatter plot
            st.subheader("Scatter Plot")
            x_col = st.selectbox("Select X axis", numeric_cols, key='scatter_x')
            y_col = st.selectbox("Select Y axis", numeric_cols, key='scatter_y')
            
            fig = px.scatter(st.session_state.df, x=x_col, y=y_col)
            st.plotly_chart(fig, use_container_width=True)
            
            # Histogram
            st.subheader("Histogram")
            hist_col = st.selectbox("Select column for histogram", numeric_cols, key='hist')
            fig = px.histogram(st.session_state.df, x=hist_col)
            st.plotly_chart(fig, use_container_width=True)
            
            # Box plot
            st.subheader("Box Plot")
            box_col = st.selectbox("Select column for box plot", numeric_cols, key='box')
            fig = px.box(st.session_state.df, y=box_col)
            st.plotly_chart(fig, use_container_width=True)
    
    with tab3:
        st.header("Data Analysis")
        
        # Basic statistics
        st.subheader("Basic Statistics")
        st.write(st.session_state.df.describe())
        
        # Missing values analysis
        st.subheader("Missing Values")
        missing_data = pd.DataFrame({
            'Column': st.session_state.df.columns,
            'Missing Values': st.session_state.df.isnull().sum().values,
            'Percentage': (st.session_state.df.isnull().sum().values / len(st.session_state.df) * 100).round(2)
        })
        st.write(missing_data)
        
        # Correlation matrix
        st.subheader("Correlation Matrix")
        numeric_df = st.session_state.df.select_dtypes(include=['float64', 'int64'])
        if not numeric_df.empty:
            fig = px.imshow(numeric_df.corr(),
                          labels=dict(color="Correlation"),
                          color_continuous_scale='RdBu_r')
            st.plotly_chart(fig, use_container_width=True)
        
        # Column info
        st.subheader("Column Information")
        col_info = pd.DataFrame({
            'Column': st.session_state.df.columns,
            'Type': st.session_state.df.dtypes.values,
            'Unique Values': [st.session_state.df[col].nunique() for col in st.session_state.df.columns]
        })
        st.write(col_info)

else:
    st.info("Please upload a CSV file to begin analysis.")
Loading code editor...
Click Save & Run to preview your app
Terminal