create a streamlit app that scrapes wikipedia poplution
To upload files, please first save the app
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
st.title("World Population from Wikipedia")
# NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
# you can remove the corsproxy.io prefix.
url = "https://corsproxy.io/?https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population"
@st.cache_data
def get_population_data():
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# Find the main population table
table = soup.find('table', {'class': 'wikitable'})
# Extract data from table
data = []
for row in table.find_all('tr')[1:]: # Skip header row
cols = row.find_all(['td', 'th'])
if len(cols) >= 3: # Ensure we have enough columns
rank = cols[0].text.strip()
country = cols[1].text.strip()
population = cols[2].text.strip()
# Clean the data
try:
rank = int(rank.replace('[]', ''))
population = int(population.replace(',', '').split('[')[0])
data.append({
'Rank': rank,
'Country': country,
'Population': population
})
except:
continue
return pd.DataFrame(data)
try:
# Load and display the data
df = get_population_data()
# Add a search box
search = st.text_input("Search for a country:")
# Filter the dataframe based on search
if search:
filtered_df = df[df['Country'].str.contains(search, case=False)]
else:
filtered_df = df
# Display the data
st.write(f"Showing {len(filtered_df)} countries")
st.dataframe(filtered_df)
# Create a bar chart of top 10 countries
st.subheader("Top 10 Countries by Population")
top_10 = df.head(10)
st.bar_chart(data=top_10.set_index('Country')['Population'])
except Exception as e:
st.error(f"An error occurred while fetching the data: {str(e)}")
st.write("This might be due to changes in Wikipedia's page structure or network issues.")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?