Ploomber AI Editor | news-analyzer-f622

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from textblob import TextBlob
import nltk
from nltk.corpus import stopwords
import pickle
import re

# Download required NLTK data
@st.cache_resource
def download_nltk_data():
    nltk.download('stopwords')
    nltk.download('punkt')

download_nltk_data()

# Function to clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

# Function to generate fake model for demo purposes
@st.cache_resource
def create_fake_model():
    # Create a simple fake model that randomly classifies text
    vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
    classifier = LogisticRegression()
    
    # Generate some fake training data
    fake_texts = [
        "This is a fake news article",
        "This is a real news article",
        "Breaking news: something happened",
        "You won't believe what happened next"
    ]
    fake_labels = [1, 0, 0, 1]
    
    # Fit the model
    X = vectorizer.fit_transform(fake_texts)
    classifier.fit(X, fake_labels)
    
    return vectorizer, classifier

# Initialize the fake model
vectorizer, classifier = create_fake_model()

def analyze_sentiment(text):
    analysis = TextBlob(text)
    # Get polarity score (-1 to 1)
    polarity = analysis.sentiment.polarity
    # Get subjectivity score (0 to 1)
    subjectivity = analysis.sentiment.subjectivity
    return polarity, subjectivity

st.title("News Analyzer")
st.write("Enter a news article to analyze its authenticity and sentiment")

# Text input
news_text = st.text_area("Enter the news text here:", height=200)

if news_text:
    st.write("---")
    
    # Analyze the text
    if st.button("Analyze"):
        # Clean the text
        cleaned_text = clean_text(news_text)
        
        # Fake news detection
        X = vectorizer.transform([cleaned_text])
        prediction = classifier.predict_proba(X)[0]
        
        # Sentiment analysis
        polarity, subjectivity = analyze_sentiment(news_text)
        
        # Display results
        col1, col2 = st.columns(2)
        
        with col1:
            st.subheader("Authenticity Analysis")
            st.write("Probability of being fake news:", f"{prediction[1]:.2%}")
            st.write("Probability of being real news:", f"{prediction[0]:.2%}")
            
            # Visual indicator
            if prediction[1] > 0.5:
                st.error("⚠️ This might be fake news!")
            else:
                st.success("✅ This appears to be legitimate news")
        
        with col2:
            st.subheader("Sentiment Analysis")
            st.write("Polarity:", f"{polarity:.2f}")
            st.write("Subjectivity:", f"{subjectivity:.2f}")
            
            # Sentiment interpretation
            if polarity > 0:
                sentiment = "Positive"
                color = "green"
            elif polarity < 0:
                sentiment = "Negative"
                color = "red"
            else:
                sentiment = "Neutral"
                color = "gray"
            
            st.markdown(f"Overall sentiment: :<span style='color: {color}'>{sentiment}</span>", unsafe_allow_html=True)
            
            # Subjectivity interpretation
            if subjectivity > 0.5:
                st.write("This text appears to be more subjective/opinionated")
            else:
                st.write("This text appears to be more objective/factual")
        
        # Display text statistics
        st.write("---")
        st.subheader("Text Statistics")
        words = len(news_text.split())
        sentences = len(nltk.sent_tokenize(news_text))
        
        stats_col1, stats_col2, stats_col3 = st.columns(3)
        stats_col1.metric("Word Count", words)
        stats_col2.metric("Sentence Count", sentences)
        stats_col3.metric("Average Words per Sentence", round(words/sentences, 1) if sentences > 0 else 0)

# Add explanation
st.write("---")
st.write("""
### How it works
- **Fake News Detection**: The system uses machine learning to analyze the text patterns and predict if the content might be fake news.
- **Sentiment Analysis**: The text is analyzed for its emotional tone (positive/negative) and subjectivity (objective/subjective).
- **Text Statistics**: Basic metrics about the text structure are provided.

**Note**: This is a demonstration model and should not be used as the sole source for determining news authenticity.
""")
Loading code editor...
Click Save & Run to preview your app
Terminal