app to load image documents search for cnpj using gemini and output the cnpj in structured json
To upload files, please first save the app
import streamlit as st
import google.generativeai as genai
from PIL import Image
import json
import os
# Configure the page
st.set_page_config(page_title="CNPJ Extractor", layout="wide")
# Initialize Gemini API
# Note: In a production environment, use st.secrets or environment variables
GOOGLE_API_KEY = st.text_input("Enter your Google API Key", type="password")
if GOOGLE_API_KEY:
genai.configure(api_key=GOOGLE_API_KEY)
def extract_cnpj(image):
"""Use Gemini to extract CNPJ from image"""
model = genai.GenerativeModel('gemini-pro-vision')
prompt = """
Please analyze this image and extract any CNPJ numbers you find.
A CNPJ is a Brazilian company registration number in the format XX.XXX.XXX/XXXX-XX.
Return the result as a JSON string with the following structure:
{
"cnpj": "XX.XXX.XXX/XXXX-XX",
"confidence": "high/medium/low"
}
If no CNPJ is found, return {"cnpj": null, "confidence": null}
"""
response = model.generate_content([prompt, image])
try:
# Extract JSON from response
response_text = response.text
# Find JSON-like string in response
start = response_text.find('{')
end = response_text.rfind('}') + 1
json_str = response_text[start:end]
result = json.loads(json_str)
return result
except Exception as e:
return {"cnpj": None, "confidence": None, "error": str(e)}
def main():
st.title("🔍 CNPJ Extractor from Images")
st.write("""
Upload an image containing a CNPJ (Brazilian company registration number) and
this app will attempt to extract it using Google's Gemini AI.
""")
# File uploader
uploaded_file = st.file_uploader(
"Choose an image file",
type=['png', 'jpg', 'jpeg']
)
if uploaded_file and GOOGLE_API_KEY:
# Display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_column_width=True)
with st.spinner("Analyzing image..."):
try:
result = extract_cnpj(image)
# Display results in a nice format
st.subheader("Results")
if result.get("cnpj"):
st.success(f"CNPJ found: {result['cnpj']}")
st.info(f"Confidence: {result['confidence']}")
else:
st.warning("No CNPJ found in the image")
# Display raw JSON
st.subheader("Raw JSON Output")
st.json(result)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
elif not GOOGLE_API_KEY:
st.warning("Please enter your Google API Key to proceed")
if __name__ == "__main__":
main()
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?