Harmful contents detection
To upload files, please first save the app
import streamlit as st
import pandas as pd
from profanity_check import predict, predict_prob
import nltk
from nltk.tokenize import sent_tokenize
# Download required NLTK data
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
st.title("Content Safety Detector")
st.write("This app helps detect potentially harmful or inappropriate content in text.")
# Text input area
text_input = st.text_area(
"Enter the text you want to analyze:",
height=200,
placeholder="Type or paste your text here..."
)
if text_input:
st.subheader("Analysis Results")
# Split text into sentences
sentences = sent_tokenize(text_input)
# Analyze each sentence
results = []
for sentence in sentences:
if sentence.strip(): # Skip empty sentences
probability = predict_prob([sentence])[0]
is_harmful = predict([sentence])[0]
results.append({
'Sentence': sentence,
'Harmful Content Probability': f"{probability:.2%}",
'Is Harmful': '⚠️ Yes' if is_harmful else '✅ No'
})
# Create a DataFrame
df = pd.DataFrame(results)
# Display overall statistics
total_sentences = len(results)
harmful_sentences = sum(1 for r in results if '⚠️ Yes' in r['Is Harmful'])
col1, col2 = st.columns(2)
with col1:
st.metric("Total Sentences", total_sentences)
with col2:
st.metric("Potentially Harmful Sentences", harmful_sentences)
if harmful_sentences > 0:
st.warning(
f"⚠️ This text contains {harmful_sentences} potentially harmful sentences. "
"Please review the content carefully."
)
else:
st.success("✅ No harmful content detected in the text.")
# Display detailed results
st.subheader("Detailed Analysis")
st.dataframe(
df,
column_config={
"Sentence": st.column_config.TextColumn("Sentence", width="large"),
"Harmful Content Probability": st.column_config.TextColumn("Probability", width="medium"),
"Is Harmful": st.column_config.TextColumn("Status", width="small"),
},
hide_index=True,
)
st.caption("""
Note: This tool uses machine learning to detect potentially harmful content.
It may not be 100% accurate and should be used as a guidance rather than a definitive assessment.
Please use your judgment when reviewing the results.
""")
# Sidebar with information
with st.sidebar:
st.title("About")
st.info("""
This content detector helps identify potentially harmful or inappropriate content in text.
It analyzes the text sentence by sentence and provides:
- Overall statistics
- Probability scores
- Sentence-level analysis
Use this tool responsibly and remember that machine learning models
may not catch all harmful content or may flag safe content incorrectly.
""")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?