bot to scan and process receipts
To upload files, please first save the app
import streamlit as st
from PIL import Image
import pytesseract
import pandas as pd
import io
import re
from datetime import datetime
st.title("Receipt Scanner & Processor")
# Initialize session state for storing receipts
if "receipts" not in st.session_state:
st.session_state.receipts = []
def extract_receipt_info(text):
"""Extract key information from receipt text"""
# Initialize dictionary to store receipt information
receipt_info = {
'date': None,
'total': None,
'store': None,
'items': []
}
lines = text.split('\n')
# Try to find date (assuming common date formats)
date_pattern = r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}'
for line in lines:
date_match = re.search(date_pattern, line)
if date_match:
try:
receipt_info['date'] = datetime.strptime(date_match.group(), '%m/%d/%Y').strftime('%Y-%m-%d')
break
except:
continue
# Try to find total (looking for lines with "TOTAL" or similar)
total_pattern = r'(?i)total[:\s]*[$]?\s*(\d+\.?\d*)'
for line in lines:
total_match = re.search(total_pattern, line)
if total_match:
receipt_info['total'] = float(total_match.group(1))
break
# Try to find store name (usually in first few lines)
if len(lines) > 0:
receipt_info['store'] = lines[0].strip()
return receipt_info
# File uploader
uploaded_file = st.file_uploader("Upload receipt image", type=['png', 'jpg', 'jpeg'])
if uploaded_file is not None:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Receipt', use_column_width=True)
# Add a process button
if st.button('Process Receipt'):
with st.spinner('Processing receipt...'):
# Convert image to text using OCR
text = pytesseract.image_to_string(image)
# Extract information
receipt_info = extract_receipt_info(text)
# Add timestamp
receipt_info['processed_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# Add to session state
st.session_state.receipts.append(receipt_info)
# Show extracted information
st.subheader("Extracted Information")
st.write("Store:", receipt_info['store'])
st.write("Date:", receipt_info['date'])
st.write("Total:", f"${receipt_info['total']:.2f}" if receipt_info['total'] else "Not found")
# Show raw text
with st.expander("Show Raw Text"):
st.text(text)
# Display history of processed receipts
if st.session_state.receipts:
st.subheader("Processing History")
df = pd.DataFrame(st.session_state.receipts)
st.table(df)
# Export option
if st.button('Export to CSV'):
csv = df.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv,
file_name="receipts_export.csv",
mime="text/csv"
)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?