Ploomber AI Editor

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import io
import docx2txt
from PyPDF2 import PdfReader
import os
import json
from nltk.stem import WordNetLemmatizer

# Code written and model developed by Swayam Singh
# Under the supervision of Mr. Sital Sharma

# Download necessary NLTK data
@st.cache_resource
def download_nltk_data():
    nltk.download('punkt')
    nltk.download('stopwords')
    nltk.download('wordnet')

download_nltk_data()

# Set page configuration
st.set_page_config(
    page_title="Answer Evaluation System",
    page_icon="📝",
    layout="wide"
)

# Display attribution
st.sidebar.markdown("""
### Created By
**Developer:** Swayam Singh  
**Supervisor:** Mr. Sital Sharma
""")

# Custom CSS
st.markdown("""
<style>
    .main-header {
        font-size: 2.5rem;
        color: #1E88E5;
        text-align: center;
        margin-bottom: 2rem;
    }
    .sub-header {
        font-size: 1.5rem;
        color: #0D47A1;
        margin-top: 1.5rem;
        margin-bottom: 1rem;
    }
    .card {
        padding: 1.5rem;
        border-radius: 0.5rem;
        background-color: #f8f9fa;
        margin-bottom: 1rem;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    }
    .score-high {
        color: #2E7D32;
        font-weight: bold;
    }
    .score-medium {
        color: #F57F17;
        font-weight: bold;
    }
    .score-low {
        color: #C62828;
        font-weight: bold;
    }
    .highlight {
        background-color: #FFECB3;
        padding: 0.2rem;
        border-radius: 0.2rem;
    }
    .missing-keywords {
        color: #C62828;
        margin-top: 0.5rem;
    }
</style>
""", unsafe_allow_html=True)

# Helper functions for text processing
def preprocess_text(text):
    """Clean and preprocess text"""
    # Convert to lowercase
    text = text.lower()
    # Remove special characters and digits
    text = re.sub(r'[^\w\s]', ' ', text)
    text = re.sub(r'\d+', ' ', text)
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def remove_stopwords(text):
    """Remove stopwords from text"""
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text)
    filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
    return ' '.join(filtered_text)

def extract_keywords(text, n=10):
    """Extract important keywords from text"""
    # Preprocess text
    text = preprocess_text(text)
    # Remove stopwords
    text_no_stopwords = remove_stopwords(text)
    
    # Create TF-IDF vectorizer
    vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=n)
    try:
        tfidf_matrix = vectorizer.fit_transform([text_no_stopwords])
        # Get feature names
        feature_names = vectorizer.get_feature_names_out()
        # Get TF-IDF scores
        tfidf_scores = tfidf_matrix.toarray()[0]
        # Create dictionary of terms and scores
        term_scores = {term: score for term, score in zip(feature_names, tfidf_scores)}
        # Sort by score
        sorted_terms = sorted(term_scores.items(), key=lambda x: x[1], reverse=True)
        # Return top n keywords
        return [term for term, score in sorted_terms[:n]]
    except:
        # If vectorizer fails (e.g., with very short text), fallback to simple word tokenization
        return text_no_stopwords.split()[:n]

def compute_cosine_similarity(text1, text2):
    """Compute cosine similarity between two texts"""
    # Preprocess texts
    text1 = preprocess_text(text1)
    text2 = preprocess_text(text2)
    
    # Vectorize
    vectorizer = TfidfVectorizer()
    try:
        tfidf_matrix = vectorizer.fit_transform([text1, text2])
        # Compute cosine similarity
        cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
        return cosine_sim
    except:
        # Fallback for very short texts
        return 0.0

def find_missing_keywords(model_answer, student_answer):
    """Find keywords in model answer that are missing in student answer"""
    # Preprocess texts
    model_answer = preprocess_text(model_answer)
    student_answer = preprocess_text(student_answer)
    
    # Remove stopwords
    model_no_stop = remove_stopwords(model_answer)
    student_no_stop = remove_stopwords(student_answer)
    
    # Extract model answer keywords
    model_keywords = extract_keywords(model_answer, n=15)
    
    # Check which keywords are missing in student answer
    student_words = set(student_no_stop.split())
    missing = [keyword for keyword in model_keywords if keyword not in student_words]
    
    return missing

def read_file_content(uploaded_file):
    """Read content from different file types"""
    file_extension = uploaded_file.name.split('.')[-1].lower()
    
    if file_extension == 'txt':
        # Read text file
        content = uploaded_file.getvalue().decode('utf-8')
    elif file_extension == 'docx':
        # Read docx file
        content = docx2txt.process(uploaded_file)
    elif file_extension == 'pdf':
        # Read pdf file
        pdf_reader = PdfReader(uploaded_file)
        content = ""
        for page in pdf_reader.pages:
            content += page.extract_text()
    else:
        content = "Unsupported file format"
    
    return content

# Main application
def main():
    """Main Streamlit application"""
    st.markdown("<h1 class='main-header'>Theoretical Answer Evaluation System</h1>", unsafe_allow_html=True)
    st.markdown("<p style='text-align: center;'>Developed by Swayam Singh under the supervision of Mr. Sital Sharma</p>", unsafe_allow_html=True)
    
    # Initialize session state for storing data
    if 'questions' not in st.session_state:
        st.session_state.questions = {}
    if 'model_answers' not in st.session_state:
        st.session_state.model_answers = {}
    if 'student_answers' not in st.session_state:
        st.session_state.student_answers = []
    if 'results' not in st.session_state:
        st.session_state.results = {}
    
    # Create tabs
    tab1, tab2, tab3 = st.tabs(["Upload Question Paper", "Submit Answers", "Evaluation Results"])
    
    # Tab 1: Upload Question Paper
    with tab1:
        st.markdown("<div class='card'>", unsafe_allow_html=True)
        st.markdown("<h2 class='sub-header'>Upload Question Paper</h2>", unsafe_allow_html=True)
        
        uploaded_file = st.file_uploader("Upload Question Paper (TXT, DOCX, PDF)", type=['txt', 'docx', 'pdf'])
        
        if uploaded_file is not None:
            # Read file content
            content = read_file_content(uploaded_file)
            
            st.markdown("### Question Paper Content")
            st.text_area("Content Preview", content, height=200)
            
            # Parse questions
            st.markdown("### Extract Questions")
            st.info("Please enter each question with its model answer below. You can add multiple questions.")
            
            # Add question and model answer
            col1, col2 = st.columns(2)
            with col1:
                q_number = st.number_input("Question Number", min_value=1, step=1)
            with col2:
                q_marks = st.number_input("Max Marks", min_value=1, step=1, value=10)
            
            question = st.text_area(f"Question {q_number}", key=f"q_{q_number}")
            model_answer = st.text_area(f"Model Answer for Question {q_number}", key=f"ma_{q_number}")
            
            if st.button("Add Question"):
                if question and model_answer:
                    # Save question and model answer
                    st.session_state.questions[q_number] = {
                        "question": question,
                        "marks": q_marks
                    }
                    st.session_state.model_answers[q_number] = model_answer
                    st.success(f"Question {q_number} added successfully!")
                else:
                    st.error("Please provide both question and model answer.")
        
        # Display added questions
        if st.session_state.questions:
            st.markdown("### Added Questions")
            for q_num, q_data in st.session_state.questions.items():
                with st.expander(f"Question {q_num} ({q_data['marks']} marks)"):
                    st.write("**Question:**", q_data["question"])
                    st.write("**Model Answer:**", st.session_state.model_answers[q_num])
        
        st.markdown("</div>", unsafe_allow_html=True)
    
    # Tab 2: Submit Answers
    with tab2:
        st.markdown("<div class='card'>", unsafe_allow_html=True)
        st.markdown("<h2 class='sub-header'>Submit Student Answers</h2>", unsafe_allow_html=True)
        
        if not st.session_state.questions:
            st.warning("Please upload a question paper and add questions first.")
        else:
            st.info("Enter student information and answers for evaluation.")
            
            # Student information
            col1, col2 = st.columns(2)
            with col1:
                student_name = st.text_input("Student Name")
            with col2:
                student_id = st.text_input("Student ID")
            
            # Answer submission
            st.markdown("### Student Answers")
            q_num = st.selectbox("Select Question Number", list(st.session_state.questions.keys()))
            
            if q_num:
                st.write("**Question:**", st.session_state.questions[q_num]["question"])
                student_answer = st.text_area("Student Answer", key=f"sa_{q_num}")
                
                if st.button("Submit Answer"):
                    if student_answer:
                        # Add student answer
                        answer_data = {
                            "student_name": student_name,
                            "student_id": student_id,
                            "question_number": q_num,
                            "question": st.session_state.questions[q_num]["question"],
                            "model_answer": st.session_state.model_answers[q_num],
                            "student_answer": student_answer,
                            "max_marks": st.session_state.questions[q_num]["marks"]
                        }
                        st.session_state.student_answers.append(answer_data)
                        st.success("Answer submitted successfully!")
                    else:
                        st.error("Please provide an answer.")
        
        # Display submitted answers
        if st.session_state.student_answers:
            st.markdown("### Submitted Answers")
            for i, answer in enumerate(st.session_state.student_answers):
                with st.expander(f"{answer['student_name']} - Q{answer['question_number']}"):
                    st.write("**Student Name:**", answer["student_name"])
                    st.write("**Student ID:**", answer["student_id"])
                    st.write("**Question:**", answer["question"])
                    st.write("**Answer:**", answer["student_answer"])
        
        st.markdown("</div>", unsafe_allow_html=True)
    
    # Tab 3: Evaluation Results
    with tab3:
        st.markdown("<div class='card'>", unsafe_allow_html=True)
        st.markdown("<h2 class='sub-header'>Evaluation Results</h2>", unsafe_allow_html=True)
        
        if not st.session_state.student_answers:
            st.warning("No answers have been submitted for evaluation yet.")
        else:
            if st.button("Evaluate All Answers"):
                with st.spinner("Evaluating answers..."):
                    # Evaluate each answer
                    for i, answer in enumerate(st.session_state.student_answers):
                        # Compute cosine similarity
                        similarity = compute_cosine_similarity(
                            answer["model_answer"], 
                            answer["student_answer"]
                        )
                        
                        # Find missing keywords
                        missing_keywords = find_missing_keywords(
                            answer["model_answer"], 
                            answer["student_answer"]
                        )
                        
                        # Calculate score based on similarity and missing keywords
                        max_marks = answer["max_marks"]
                        keyword_penalty = min(0.05 * len(missing_keywords), 0.5)  # Max 50% penalty for missing keywords
                        raw_score = similarity * (1 - keyword_penalty)
                        final_score = round(raw_score * max_marks, 1)
                        
                        # Generate feedback
                        if similarity >= 0.8:
                            feedback = "Excellent answer! Covers most key points with proper explanation."
                            score_class = "score-high"
                        elif similarity >= 0.6:
                            feedback = "Good answer but could be more comprehensive."
                            score_class = "score-medium"
                        else:
                            feedback = "Answer needs improvement. Missing several key concepts."
                            score_class = "score-low"
                        
                        # Add detailed feedback about missing keywords
                        if missing_keywords:
                            keyword_feedback = "Consider including these important concepts: " + ", ".join(missing_keywords)
                        else:
                            keyword_feedback = "All important keywords are present."
                        
                        # Save results
                        st.session_state.results[i] = {
                            "similarity": similarity,
                            "missing_keywords": missing_keywords,
                            "score": final_score,
                            "max_score": max_marks,
                            "feedback": feedback,
                            "keyword_feedback": keyword_feedback,
                            "score_class": score_class,
                            "percentage": round((final_score / max_marks) * 100, 1)
                        }
                    
                    st.success("Evaluation completed!")
            
            # Display results
            if st.session_state.results:
                st.markdown("### Evaluation Summary")
                
                # Create dataframe for summary
                results_data = []
                for i, answer in enumerate(st.session_state.student_answers):
                    if i in st.session_state.results:
                        results_data.append({
                            "Student Name": answer["student_name"],
                            "Student ID": answer["student_id"],
                            "Question": f"Q{answer['question_number']}",
                            "Score": f"{st.session_state.results[i]['score']}/{st.session_state.results[i]['max_score']}",
                            "Percentage": f"{st.session_state.results[i]['percentage']}%",
                            "Similarity": f"{st.session_state.results[i]['similarity']:.2f}"
                        })
                
                df = pd.DataFrame(results_data)
                st.dataframe(df, use_container_width=True)
                
                # Visualize results
                st.markdown("### Visualization")
                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
                
                # Pie chart of score distribution
                categories = {"Excellent (>80%)": 0, "Good (60-80%)": 0, "Needs Improvement (<60%)": 0}
                for i in st.session_state.results:
                    percentage = st.session_state.results[i]['percentage']
                    if percentage > 80:
                        categories["Excellent (>80%)"] += 1
                    elif percentage >= 60:
                        categories["Good (60-80%)"] += 1
                    else:
                        categories["Needs Improvement (<60%)"] += 1
                
                colors = ['#4CAF50', '#FF9800', '#F44336']
                ax1.pie(
                    categories.values(), 
                    labels=categories.keys(), 
                    autopct='%1.1f%%',
                    colors=colors
                )
                ax1.set_title('Score Distribution')
                
                # Bar chart of similarity scores
                names = [st.session_state.student_answers[i]["student_name"] for i in st.session_state.results]
                similarities = [st.session_state.results[i]["similarity"] for i in st.session_state.results]
                
                ax2.bar(names, similarities, color='#2196F3')
                ax2.set_title('Cosine Similarity Scores')
                ax2.set_ylim(0, 1)
                ax2.set_ylabel('Similarity')
                ax2.set_xlabel('Student')
                plt.xticks(rotation=45, ha='right')
                
                plt.tight_layout()
                st.pyplot(fig)
                
                # Detailed individual results
                st.markdown("### Detailed Results")
                for i, answer in enumerate(st.session_state.student_answers):
                    if i in st.session_state.results:
                        with st.expander(f"{answer['student_name']} - Q{answer['question_number']}"):
                            result = st.session_state.results[i]
                            
                            col1, col2, col3 = st.columns(3)
                            with col1:
                                st.metric("Score", f"{result['score']}/{result['max_score']}")
                            with col2:
                                st.metric("Percentage", f"{result['percentage']}%")
                            with col3:
                                st.metric("Similarity", f"{result['similarity']:.2f}")
                            
                            st.markdown(f"<p><strong>Feedback:</strong> <span class='{result['score_class']}'>{result['feedback']}</span></p>", unsafe_allow_html=True)
                            
                            if result['missing_keywords']:
                                st.markdown("<p class='missing-keywords'><strong>Missing Keywords:</strong> " + 
                                           ", ".join(result['missing_keywords']) + "</p>", unsafe_allow_html=True)
                            else:
                                st.markdown("<p><strong>All important keywords are present.</strong></p>", unsafe_allow_html=True)
                            
                            # Side by side comparison
                            st.markdown("#### Answer Comparison")
                            col1, col2 = st.columns(2)
                            with col1:
                                st.markdown("##### Model Answer")
                                st.text_area("", answer["model_answer"], height=200, key=f"ma_view_{i}")
                            with col2:
                                st.markdown("##### Student Answer")
                                st.text_area("", answer["student_answer"], height=200, key=f"sa_view_{i}")
                
                # Export results
                st.markdown("### Export Results")
                export_data = {
                    "summary": results_data,
                    "detailed_results": [
                        {
                            "student_name": st.session_state.student_answers[i]["student_name"],
                            "student_id": st.session_state.student_answers[i]["student_id"],
                            "question_number": st.session_state.student_answers[i]["question_number"],
                            "similarity": st.session_state.results[i]["similarity"],
                            "missing_keywords": st.session_state.results[i]["missing_keywords"],
                            "score": st.session_state.results[i]["score"],
                            "max_score": st.session_state.results[i]["max_score"],
                            "percentage": st.session_state.results[i]["percentage"],
                            "feedback": st.session_state.results[i]["feedback"],
                        }
                        for i in st.session_state.results
                    ]
                }
                
                json_data = json.dumps(export_data, indent=4)
                st.download_button(
                    label="Download Results (JSON)",
                    data=json_data,
                    file_name="evaluation_results.json",
                    mime="application/json"
                )
                
                # Create Excel report
                excel_buffer = io.BytesIO()
                with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
                    # Summary sheet
                    df.to_excel(writer, sheet_name='Summary', index=False)
                    
                    # Detailed results sheet
                    detailed_data = []
                    for i in st.session_state.results:
                        answer = st.session_state.student_answers[i]
                        result = st.session_state.results[i]
                        detailed_data.append({
                            "Student Name": answer["student_name"],
                            "Student ID": answer["student_id"],
                            "Question Number": answer["question_number"],
                            "Cosine Similarity": result["similarity"],
                            "Missing Keywords": ", ".join(result["missing_keywords"]) if result["missing_keywords"] else "None",
                            "Score": result["score"],
                            "Max Score": result["max_score"],
                            "Percentage": result["percentage"],
                            "Feedback": result["feedback"]
                        })
                    
                    pd.DataFrame(detailed_data).to_excel(writer, sheet_name='Detailed Results', index=False)
                    
                    # Format the Excel file
                    workbook = writer.book
                    worksheet = writer.sheets['Summary']
                    header_format = workbook.add_format({'bold': True, 'bg_color': '#4285F4', 'color': 'white'})
                    
                    for col_num, value in enumerate(df.columns.values):
                        worksheet.write(0, col_num, value, header_format)
                
                excel_buffer.seek(0)
                st.download_button(
                    label="Download Results (Excel)",
                    data=excel_buffer,
                    file_name="evaluation_results.xlsx",
                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
                )
        
        st.markdown("</div>", unsafe_allow_html=True)

if __name__ == "__main__":
    main()
Loading code editor...
Click Save & Run to preview your app
Terminal