create a webapp for fake news detection and sentiment analysis
To upload files, please first save the app
import streamlit as st
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import pandas as pd
from textblob import TextBlob
import pickle
from pathlib import Path
# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
def preprocess_text(text):
# Tokenize
tokens = word_tokenize(text.lower())
# Remove stopwords and lemmatize
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum() and token not in stop_words]
return ' '.join(tokens)
def train_fake_news_model():
# Create some sample training data
data = {
'text': [
'BREAKING: Aliens have landed in New York City!',
'Scientists discover new vaccine that cures all diseases',
'Local restaurant introduces new menu items',
'Weather forecast predicts rain tomorrow',
'Government hiding secret underground cities',
'Celebrity spotted walking dog in park'
],
'label': [1, 1, 0, 0, 1, 0] # 1 for fake, 0 for real
}
df = pd.DataFrame(data)
# Preprocess the texts
X = [preprocess_text(text) for text in df['text']]
y = df['label']
# Create and train the model
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)
model = MultinomialNB()
model.fit(X_vectorized, y)
return vectorizer, model
def analyze_sentiment(text):
analysis = TextBlob(text)
# Get polarity (-1 to 1) and convert to percentage
sentiment_score = analysis.sentiment.polarity
sentiment_percentage = (sentiment_score + 1) * 50
if sentiment_score > 0:
sentiment = "Positive"
elif sentiment_score < 0:
sentiment = "Negative"
else:
sentiment = "Neutral"
return sentiment, sentiment_percentage
def main():
st.title("News Analyzer")
st.write("Analyze news for potential fake content and sentiment")
# Create and save model if it doesn't exist
model_path = Path("fake_news_model.pkl")
vectorizer_path = Path("vectorizer.pkl")
if not model_path.exists() or not vectorizer_path.exists():
vectorizer, model = train_fake_news_model()
with open(model_path, 'wb') as f:
pickle.dump(model, f)
with open(vectorizer_path, 'wb') as f:
pickle.dump(vectorizer, f)
else:
with open(model_path, 'rb') as f:
model = pickle.load(f)
with open(vectorizer_path, 'rb') as f:
vectorizer = pickle.load(f)
# Text input
text_input = st.text_area("Enter the news text to analyze:", height=150)
if st.button("Analyze"):
if text_input.strip() == "":
st.warning("Please enter some text to analyze.")
else:
st.write("---")
# Fake news detection
processed_text = preprocess_text(text_input)
vectorized_text = vectorizer.transform([processed_text])
prediction = model.predict(vectorized_text)[0]
# Sentiment analysis
sentiment, sentiment_score = analyze_sentiment(text_input)
# Display results
col1, col2 = st.columns(2)
with col1:
st.subheader("Authenticity Analysis")
if prediction == 1:
st.error("⚠️ This text might be fake news!")
st.write("The content shows patterns similar to unreliable news sources.")
else:
st.success("✅ This text appears to be legitimate.")
st.write("The content shows patterns similar to reliable news sources.")
with col2:
st.subheader("Sentiment Analysis")
st.write(f"Sentiment: {sentiment}")
st.progress(sentiment_score/100)
st.write(f"Sentiment Score: {sentiment_score:.1f}%")
st.write("---")
st.write("Note: This is a simplified demo model. For real applications, a more comprehensive dataset and advanced models would be needed.")
if __name__ == "__main__":
main()
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?