app to load image documents search for cnpj using gemini and output the cnpj in structured json
To upload files, please first save the app
import streamlit as st
import google.generativeai as genai
from PIL import Image
import json
import os
# Configure page
st.set_page_config(page_title="CNPJ Extractor", layout="wide")
# Initialize Gemini
GOOGLE_API_KEY = st.secrets.get("GOOGLE_API_KEY", "your-api-key-here")
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro-vision')
def extract_cnpj(image):
"""Extract CNPJ from image using Gemini."""
prompt = """
Look at this image and find any CNPJ numbers.
A CNPJ is a 14-digit number usually formatted as XX.XXX.XXX/XXXX-XX.
Return the result as a JSON with the following structure:
{
"cnpj": "XX.XXX.XXX/XXXX-XX",
"confidence": "high/medium/low"
}
If no CNPJ is found, return {"cnpj": null, "confidence": null}
Only return the JSON, no additional text.
"""
try:
response = model.generate_content([prompt, image])
# Extract JSON from response
response_text = response.text
# Clean up the response to ensure it's valid JSON
response_text = response_text.strip()
if response_text.startswith("```json"):
response_text = response_text[7:-3]
elif response_text.startswith("```"):
response_text = response_text[3:-3]
result = json.loads(response_text)
return result
except Exception as e:
st.error(f"Error processing image: {str(e)}")
return {"cnpj": None, "confidence": None}
# Main app
st.title("đ CNPJ Extractor")
st.write("Upload an image containing a CNPJ number and the AI will extract it.")
# File uploader
uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_column_width=True)
# Add a button to process the image
if st.button("Extract CNPJ"):
with st.spinner("Processing image..."):
result = extract_cnpj(image)
# Display results
st.subheader("Results")
if result["cnpj"]:
st.success(f"CNPJ found: {result['cnpj']}")
st.info(f"Confidence: {result['confidence']}")
else:
st.warning("No CNPJ found in the image.")
# Display raw JSON
st.subheader("Raw JSON Output")
st.json(result)
# Add instructions
with st.expander("âšī¸ Instructions"):
st.markdown("""
1. Upload an image containing a CNPJ number
2. Click 'Extract CNPJ' to process the image
3. The AI will attempt to find and extract any CNPJ numbers
4. Results will be displayed in structured format
Note: For best results, ensure the CNPJ is clearly visible in the image.
""")
# Footer
st.markdown("---")
st.markdown("Made with â¤ī¸ using Streamlit and Google Gemini")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?