To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import io
import docx2txt
from PyPDF2 import PdfReader
import os
import json
from nltk.stem import WordNetLemmatizer
# Code written and model developed by Swayam Singh
# Under the supervision of Mr. Sital Sharma
# Download necessary NLTK data
@st.cache_resource
def download_nltk_data():
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
download_nltk_data()
# Set page configuration
st.set_page_config(
page_title="Answer Evaluation System",
page_icon="📝",
layout="wide"
)
# Display attribution
st.sidebar.markdown("""
### Created By
**Developer:** Swayam Singh
**Supervisor:** Mr. Sital Sharma
""")
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
color: #1E88E5;
text-align: center;
margin-bottom: 2rem;
}
.sub-header {
font-size: 1.5rem;
color: #0D47A1;
margin-top: 1.5rem;
margin-bottom: 1rem;
}
.card {
padding: 1.5rem;
border-radius: 0.5rem;
background-color: #f8f9fa;
margin-bottom: 1rem;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.score-high {
color: #2E7D32;
font-weight: bold;
}
.score-medium {
color: #F57F17;
font-weight: bold;
}
.score-low {
color: #C62828;
font-weight: bold;
}
.highlight {
background-color: #FFECB3;
padding: 0.2rem;
border-radius: 0.2rem;
}
.missing-keywords {
color: #C62828;
margin-top: 0.5rem;
}
</style>
""", unsafe_allow_html=True)
# Helper functions for text processing
def preprocess_text(text):
"""Clean and preprocess text"""
# Convert to lowercase
text = text.lower()
# Remove special characters and digits
text = re.sub(r'[^\w\s]', ' ', text)
text = re.sub(r'\d+', ' ', text)
# Remove extra spaces
text = re.sub(r'\s+', ' ', text).strip()
return text
def remove_stopwords(text):
"""Remove stopwords from text"""
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(text)
filtered_text = [word for word in word_tokens if word.lower() not in stop_words]
return ' '.join(filtered_text)
def extract_keywords(text, n=10):
"""Extract important keywords from text"""
# Preprocess text
text = preprocess_text(text)
# Remove stopwords
text_no_stopwords = remove_stopwords(text)
# Create TF-IDF vectorizer
vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=n)
try:
tfidf_matrix = vectorizer.fit_transform([text_no_stopwords])
# Get feature names
feature_names = vectorizer.get_feature_names_out()
# Get TF-IDF scores
tfidf_scores = tfidf_matrix.toarray()[0]
# Create dictionary of terms and scores
term_scores = {term: score for term, score in zip(feature_names, tfidf_scores)}
# Sort by score
sorted_terms = sorted(term_scores.items(), key=lambda x: x[1], reverse=True)
# Return top n keywords
return [term for term, score in sorted_terms[:n]]
except:
# If vectorizer fails (e.g., with very short text), fallback to simple word tokenization
return text_no_stopwords.split()[:n]
def compute_cosine_similarity(text1, text2):
"""Compute cosine similarity between two texts"""
# Preprocess texts
text1 = preprocess_text(text1)
text2 = preprocess_text(text2)
# Vectorize
vectorizer = TfidfVectorizer()
try:
tfidf_matrix = vectorizer.fit_transform([text1, text2])
# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
return cosine_sim
except:
# Fallback for very short texts
return 0.0
def find_missing_keywords(model_answer, student_answer):
"""Find keywords in model answer that are missing in student answer"""
# Preprocess texts
model_answer = preprocess_text(model_answer)
student_answer = preprocess_text(student_answer)
# Remove stopwords
model_no_stop = remove_stopwords(model_answer)
student_no_stop = remove_stopwords(student_answer)
# Extract model answer keywords
model_keywords = extract_keywords(model_answer, n=15)
# Check which keywords are missing in student answer
student_words = set(student_no_stop.split())
missing = [keyword for keyword in model_keywords if keyword not in student_words]
return missing
def read_file_content(uploaded_file):
"""Read content from different file types"""
file_extension = uploaded_file.name.split('.')[-1].lower()
if file_extension == 'txt':
# Read text file
content = uploaded_file.getvalue().decode('utf-8')
elif file_extension == 'docx':
# Read docx file
content = docx2txt.process(uploaded_file)
elif file_extension == 'pdf':
# Read pdf file
pdf_reader = PdfReader(uploaded_file)
content = ""
for page in pdf_reader.pages:
content += page.extract_text()
else:
content = "Unsupported file format"
return content
# Main application
def main():
"""Main Streamlit application"""
st.markdown("<h1 class='main-header'>Theoretical Answer Evaluation System</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align: center;'>Developed by Swayam Singh under the supervision of Mr. Sital Sharma</p>", unsafe_allow_html=True)
# Initialize session state for storing data
if 'questions' not in st.session_state:
st.session_state.questions = {}
if 'model_answers' not in st.session_state:
st.session_state.model_answers = {}
if 'student_answers' not in st.session_state:
st.session_state.student_answers = []
if 'results' not in st.session_state:
st.session_state.results = {}
# Create tabs
tab1, tab2, tab3 = st.tabs(["Upload Question Paper", "Submit Answers", "Evaluation Results"])
# Tab 1: Upload Question Paper
with tab1:
st.markdown("<div class='card'>", unsafe_allow_html=True)
st.markdown("<h2 class='sub-header'>Upload Question Paper</h2>", unsafe_allow_html=True)
uploaded_file = st.file_uploader("Upload Question Paper (TXT, DOCX, PDF)", type=['txt', 'docx', 'pdf'])
if uploaded_file is not None:
# Read file content
content = read_file_content(uploaded_file)
st.markdown("### Question Paper Content")
st.text_area("Content Preview", content, height=200)
# Parse questions
st.markdown("### Extract Questions")
st.info("Please enter each question with its model answer below. You can add multiple questions.")
# Add question and model answer
col1, col2 = st.columns(2)
with col1:
q_number = st.number_input("Question Number", min_value=1, step=1)
with col2:
q_marks = st.number_input("Max Marks", min_value=1, step=1, value=10)
question = st.text_area(f"Question {q_number}", key=f"q_{q_number}")
model_answer = st.text_area(f"Model Answer for Question {q_number}", key=f"ma_{q_number}")
if st.button("Add Question"):
if question and model_answer:
# Save question and model answer
st.session_state.questions[q_number] = {
"question": question,
"marks": q_marks
}
st.session_state.model_answers[q_number] = model_answer
st.success(f"Question {q_number} added successfully!")
else:
st.error("Please provide both question and model answer.")
# Display added questions
if st.session_state.questions:
st.markdown("### Added Questions")
for q_num, q_data in st.session_state.questions.items():
with st.expander(f"Question {q_num} ({q_data['marks']} marks)"):
st.write("**Question:**", q_data["question"])
st.write("**Model Answer:**", st.session_state.model_answers[q_num])
st.markdown("</div>", unsafe_allow_html=True)
# Tab 2: Submit Answers
with tab2:
st.markdown("<div class='card'>", unsafe_allow_html=True)
st.markdown("<h2 class='sub-header'>Submit Student Answers</h2>", unsafe_allow_html=True)
if not st.session_state.questions:
st.warning("Please upload a question paper and add questions first.")
else:
st.info("Enter student information and answers for evaluation.")
# Student information
col1, col2 = st.columns(2)
with col1:
student_name = st.text_input("Student Name")
with col2:
student_id = st.text_input("Student ID")
# Answer submission
st.markdown("### Student Answers")
q_num = st.selectbox("Select Question Number", list(st.session_state.questions.keys()))
if q_num:
st.write("**Question:**", st.session_state.questions[q_num]["question"])
student_answer = st.text_area("Student Answer", key=f"sa_{q_num}")
if st.button("Submit Answer"):
if student_answer:
# Add student answer
answer_data = {
"student_name": student_name,
"student_id": student_id,
"question_number": q_num,
"question": st.session_state.questions[q_num]["question"],
"model_answer": st.session_state.model_answers[q_num],
"student_answer": student_answer,
"max_marks": st.session_state.questions[q_num]["marks"]
}
st.session_state.student_answers.append(answer_data)
st.success("Answer submitted successfully!")
else:
st.error("Please provide an answer.")
# Display submitted answers
if st.session_state.student_answers:
st.markdown("### Submitted Answers")
for i, answer in enumerate(st.session_state.student_answers):
with st.expander(f"{answer['student_name']} - Q{answer['question_number']}"):
st.write("**Student Name:**", answer["student_name"])
st.write("**Student ID:**", answer["student_id"])
st.write("**Question:**", answer["question"])
st.write("**Answer:**", answer["student_answer"])
st.markdown("</div>", unsafe_allow_html=True)
# Tab 3: Evaluation Results
with tab3:
st.markdown("<div class='card'>", unsafe_allow_html=True)
st.markdown("<h2 class='sub-header'>Evaluation Results</h2>", unsafe_allow_html=True)
if not st.session_state.student_answers:
st.warning("No answers have been submitted for evaluation yet.")
else:
if st.button("Evaluate All Answers"):
with st.spinner("Evaluating answers..."):
# Evaluate each answer
for i, answer in enumerate(st.session_state.student_answers):
# Compute cosine similarity
similarity = compute_cosine_similarity(
answer["model_answer"],
answer["student_answer"]
)
# Find missing keywords
missing_keywords = find_missing_keywords(
answer["model_answer"],
answer["student_answer"]
)
# Calculate score based on similarity and missing keywords
max_marks = answer["max_marks"]
keyword_penalty = min(0.05 * len(missing_keywords), 0.5) # Max 50% penalty for missing keywords
raw_score = similarity * (1 - keyword_penalty)
final_score = round(raw_score * max_marks, 1)
# Generate feedback
if similarity >= 0.8:
feedback = "Excellent answer! Covers most key points with proper explanation."
score_class = "score-high"
elif similarity >= 0.6:
feedback = "Good answer but could be more comprehensive."
score_class = "score-medium"
else:
feedback = "Answer needs improvement. Missing several key concepts."
score_class = "score-low"
# Add detailed feedback about missing keywords
if missing_keywords:
keyword_feedback = "Consider including these important concepts: " + ", ".join(missing_keywords)
else:
keyword_feedback = "All important keywords are present."
# Save results
st.session_state.results[i] = {
"similarity": similarity,
"missing_keywords": missing_keywords,
"score": final_score,
"max_score": max_marks,
"feedback": feedback,
"keyword_feedback": keyword_feedback,
"score_class": score_class,
"percentage": round((final_score / max_marks) * 100, 1)
}
st.success("Evaluation completed!")
# Display results
if st.session_state.results:
st.markdown("### Evaluation Summary")
# Create dataframe for summary
results_data = []
for i, answer in enumerate(st.session_state.student_answers):
if i in st.session_state.results:
results_data.append({
"Student Name": answer["student_name"],
"Student ID": answer["student_id"],
"Question": f"Q{answer['question_number']}",
"Score": f"{st.session_state.results[i]['score']}/{st.session_state.results[i]['max_score']}",
"Percentage": f"{st.session_state.results[i]['percentage']}%",
"Similarity": f"{st.session_state.results[i]['similarity']:.2f}"
})
df = pd.DataFrame(results_data)
st.dataframe(df, use_container_width=True)
# Visualize results
st.markdown("### Visualization")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Pie chart of score distribution
categories = {"Excellent (>80%)": 0, "Good (60-80%)": 0, "Needs Improvement (<60%)": 0}
for i in st.session_state.results:
percentage = st.session_state.results[i]['percentage']
if percentage > 80:
categories["Excellent (>80%)"] += 1
elif percentage >= 60:
categories["Good (60-80%)"] += 1
else:
categories["Needs Improvement (<60%)"] += 1
colors = ['#4CAF50', '#FF9800', '#F44336']
ax1.pie(
categories.values(),
labels=categories.keys(),
autopct='%1.1f%%',
colors=colors
)
ax1.set_title('Score Distribution')
# Bar chart of similarity scores
names = [st.session_state.student_answers[i]["student_name"] for i in st.session_state.results]
similarities = [st.session_state.results[i]["similarity"] for i in st.session_state.results]
ax2.bar(names, similarities, color='#2196F3')
ax2.set_title('Cosine Similarity Scores')
ax2.set_ylim(0, 1)
ax2.set_ylabel('Similarity')
ax2.set_xlabel('Student')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
st.pyplot(fig)
# Detailed individual results
st.markdown("### Detailed Results")
for i, answer in enumerate(st.session_state.student_answers):
if i in st.session_state.results:
with st.expander(f"{answer['student_name']} - Q{answer['question_number']}"):
result = st.session_state.results[i]
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Score", f"{result['score']}/{result['max_score']}")
with col2:
st.metric("Percentage", f"{result['percentage']}%")
with col3:
st.metric("Similarity", f"{result['similarity']:.2f}")
st.markdown(f"<p><strong>Feedback:</strong> <span class='{result['score_class']}'>{result['feedback']}</span></p>", unsafe_allow_html=True)
if result['missing_keywords']:
st.markdown("<p class='missing-keywords'><strong>Missing Keywords:</strong> " +
", ".join(result['missing_keywords']) + "</p>", unsafe_allow_html=True)
else:
st.markdown("<p><strong>All important keywords are present.</strong></p>", unsafe_allow_html=True)
# Side by side comparison
st.markdown("#### Answer Comparison")
col1, col2 = st.columns(2)
with col1:
st.markdown("##### Model Answer")
st.text_area("", answer["model_answer"], height=200, key=f"ma_view_{i}")
with col2:
st.markdown("##### Student Answer")
st.text_area("", answer["student_answer"], height=200, key=f"sa_view_{i}")
# Export results
st.markdown("### Export Results")
export_data = {
"summary": results_data,
"detailed_results": [
{
"student_name": st.session_state.student_answers[i]["student_name"],
"student_id": st.session_state.student_answers[i]["student_id"],
"question_number": st.session_state.student_answers[i]["question_number"],
"similarity": st.session_state.results[i]["similarity"],
"missing_keywords": st.session_state.results[i]["missing_keywords"],
"score": st.session_state.results[i]["score"],
"max_score": st.session_state.results[i]["max_score"],
"percentage": st.session_state.results[i]["percentage"],
"feedback": st.session_state.results[i]["feedback"],
}
for i in st.session_state.results
]
}
json_data = json.dumps(export_data, indent=4)
st.download_button(
label="Download Results (JSON)",
data=json_data,
file_name="evaluation_results.json",
mime="application/json"
)
# Create Excel report
excel_buffer = io.BytesIO()
with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
# Summary sheet
df.to_excel(writer, sheet_name='Summary', index=False)
# Detailed results sheet
detailed_data = []
for i in st.session_state.results:
answer = st.session_state.student_answers[i]
result = st.session_state.results[i]
detailed_data.append({
"Student Name": answer["student_name"],
"Student ID": answer["student_id"],
"Question Number": answer["question_number"],
"Cosine Similarity": result["similarity"],
"Missing Keywords": ", ".join(result["missing_keywords"]) if result["missing_keywords"] else "None",
"Score": result["score"],
"Max Score": result["max_score"],
"Percentage": result["percentage"],
"Feedback": result["feedback"]
})
pd.DataFrame(detailed_data).to_excel(writer, sheet_name='Detailed Results', index=False)
# Format the Excel file
workbook = writer.book
worksheet = writer.sheets['Summary']
header_format = workbook.add_format({'bold': True, 'bg_color': '#4285F4', 'color': 'white'})
for col_num, value in enumerate(df.columns.values):
worksheet.write(0, col_num, value, header_format)
excel_buffer.seek(0)
st.download_button(
label="Download Results (Excel)",
data=excel_buffer,
file_name="evaluation_results.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
st.markdown("</div>", unsafe_allow_html=True)
if __name__ == "__main__":
main()
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?