app to load image documents search for cnpj using gemini and output the cnpj in structured json
To upload files, please first save the app
import streamlit as st
from PIL import Image
import pytesseract
import json
st.title('CNPJ Document Search')
# Upload an image
uploaded_file = st.file_uploader('Choose an image...', type=['jpg', 'png', 'jpeg'])
if uploaded_file is not None:
# Open the image file
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image.', use_column_width=True)
st.write('---')
# Use pytesseract to extract text
extracted_text = pytesseract.image_to_string(image)
st.subheader('Extracted Text')
st.write(extracted_text)
# Search for CNPJ using a regex
import re
cnpj_pattern = r'\b\d{2}\.\d{3}\.\d{3}/\d{4}-\d{2}\b'
cnpj_matches = re.findall(cnpj_pattern, extracted_text)
# Output CNPJ in structured JSON format
cnpj_results = {'CNPJ': cnpj_matches}
st.subheader('CNPJ Results')
st.json(cnpj_results)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?