create a UI for fake news detection and sentiment analysis
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
import pickle
import re
# Download required NLTK data
@st.cache_resource
def download_nltk_data():
nltk.download('stopwords')
nltk.download('punkt')
download_nltk_data()
# Function to clean text
def clean_text(text):
text = text.lower()
text = re.sub(r'[^a-zA-Z\s]', '', text)
return text
# Function to generate fake model for demo purposes
@st.cache_resource
def create_fake_model():
# Create a simple fake model that randomly classifies text
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
classifier = LogisticRegression()
# Generate some fake training data
fake_texts = [
"This is a fake news article",
"This is a real news article",
"Breaking news: something happened",
"You won't believe what happened next"
]
fake_labels = [1, 0, 0, 1]
# Fit the model
X = vectorizer.fit_transform(fake_texts)
classifier.fit(X, fake_labels)
return vectorizer, classifier
# Initialize the fake model
vectorizer, classifier = create_fake_model()
def analyze_sentiment(text):
analysis = TextBlob(text)
# Get polarity score (-1 to 1)
polarity = analysis.sentiment.polarity
# Get subjectivity score (0 to 1)
subjectivity = analysis.sentiment.subjectivity
return polarity, subjectivity
st.title("News Analyzer")
st.write("Enter a news article to analyze its authenticity and sentiment")
# Text input
news_text = st.text_area("Enter the news text here:", height=200)
if news_text:
st.write("---")
# Analyze the text
if st.button("Analyze"):
# Clean the text
cleaned_text = clean_text(news_text)
# Fake news detection
X = vectorizer.transform([cleaned_text])
prediction = classifier.predict_proba(X)[0]
# Sentiment analysis
polarity, subjectivity = analyze_sentiment(news_text)
# Display results
col1, col2 = st.columns(2)
with col1:
st.subheader("Authenticity Analysis")
st.write("Probability of being fake news:", f"{prediction[1]:.2%}")
st.write("Probability of being real news:", f"{prediction[0]:.2%}")
# Visual indicator
if prediction[1] > 0.5:
st.error("⚠️ This might be fake news!")
else:
st.success("✅ This appears to be legitimate news")
with col2:
st.subheader("Sentiment Analysis")
st.write("Polarity:", f"{polarity:.2f}")
st.write("Subjectivity:", f"{subjectivity:.2f}")
# Sentiment interpretation
if polarity > 0:
sentiment = "Positive"
color = "green"
elif polarity < 0:
sentiment = "Negative"
color = "red"
else:
sentiment = "Neutral"
color = "gray"
st.markdown(f"Overall sentiment: :<span style='color: {color}'>{sentiment}</span>", unsafe_allow_html=True)
# Subjectivity interpretation
if subjectivity > 0.5:
st.write("This text appears to be more subjective/opinionated")
else:
st.write("This text appears to be more objective/factual")
# Display text statistics
st.write("---")
st.subheader("Text Statistics")
words = len(news_text.split())
sentences = len(nltk.sent_tokenize(news_text))
stats_col1, stats_col2, stats_col3 = st.columns(3)
stats_col1.metric("Word Count", words)
stats_col2.metric("Sentence Count", sentences)
stats_col3.metric("Average Words per Sentence", round(words/sentences, 1) if sentences > 0 else 0)
# Add explanation
st.write("---")
st.write("""
### How it works
- **Fake News Detection**: The system uses machine learning to analyze the text patterns and predict if the content might be fake news.
- **Sentiment Analysis**: The text is analyzed for its emotional tone (positive/negative) and subjectivity (objective/subjective).
- **Text Statistics**: Basic metrics about the text structure are provided.
**Note**: This is a demonstration model and should not be used as the sole source for determining news authenticity.
""")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?