Ploomber AI Editor | news-analyzer-578c

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import pandas as pd
from textblob import TextBlob
import pickle
from pathlib import Path

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def preprocess_text(text):
    # Tokenize
    tokens = word_tokenize(text.lower())
    # Remove stopwords and lemmatize
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum() and token not in stop_words]
    return ' '.join(tokens)

def train_fake_news_model():
    # Create some sample training data
    data = {
        'text': [
            'BREAKING: Aliens have landed in New York City!',
            'Scientists discover new vaccine that cures all diseases',
            'Local restaurant introduces new menu items',
            'Weather forecast predicts rain tomorrow',
            'Government hiding secret underground cities',
            'Celebrity spotted walking dog in park'
        ],
        'label': [1, 1, 0, 0, 1, 0]  # 1 for fake, 0 for real
    }
    df = pd.DataFrame(data)
    
    # Preprocess the texts
    X = [preprocess_text(text) for text in df['text']]
    y = df['label']
    
    # Create and train the model
    vectorizer = TfidfVectorizer()
    X_vectorized = vectorizer.fit_transform(X)
    model = MultinomialNB()
    model.fit(X_vectorized, y)
    
    return vectorizer, model

def analyze_sentiment(text):
    analysis = TextBlob(text)
    # Get polarity (-1 to 1) and convert to percentage
    sentiment_score = analysis.sentiment.polarity
    sentiment_percentage = (sentiment_score + 1) * 50
    
    if sentiment_score > 0:
        sentiment = "Positive"
    elif sentiment_score < 0:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
        
    return sentiment, sentiment_percentage

def main():
    st.title("News Analyzer")
    st.write("Analyze news for potential fake content and sentiment")
    
    # Create and save model if it doesn't exist
    model_path = Path("fake_news_model.pkl")
    vectorizer_path = Path("vectorizer.pkl")
    
    if not model_path.exists() or not vectorizer_path.exists():
        vectorizer, model = train_fake_news_model()
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
        with open(vectorizer_path, 'wb') as f:
            pickle.dump(vectorizer, f)
    else:
        with open(model_path, 'rb') as f:
            model = pickle.load(f)
        with open(vectorizer_path, 'rb') as f:
            vectorizer = pickle.load(f)
    
    # Text input
    text_input = st.text_area("Enter the news text to analyze:", height=150)
    
    if st.button("Analyze"):
        if text_input.strip() == "":
            st.warning("Please enter some text to analyze.")
        else:
            st.write("---")
            
            # Fake news detection
            processed_text = preprocess_text(text_input)
            vectorized_text = vectorizer.transform([processed_text])
            prediction = model.predict(vectorized_text)[0]
            
            # Sentiment analysis
            sentiment, sentiment_score = analyze_sentiment(text_input)
            
            # Display results
            col1, col2 = st.columns(2)
            
            with col1:
                st.subheader("Authenticity Analysis")
                if prediction == 1:
                    st.error("⚠️ This text might be fake news!")
                    st.write("The content shows patterns similar to unreliable news sources.")
                else:
                    st.success("✅ This text appears to be legitimate.")
                    st.write("The content shows patterns similar to reliable news sources.")
            
            with col2:
                st.subheader("Sentiment Analysis")
                st.write(f"Sentiment: {sentiment}")
                st.progress(sentiment_score/100)
                st.write(f"Sentiment Score: {sentiment_score:.1f}%")
            
            st.write("---")
            st.write("Note: This is a simplified demo model. For real applications, a more comprehensive dataset and advanced models would be needed.")

if __name__ == "__main__":
    main()
Loading code editor...
Click Save & Run to preview your app
Terminal