create a app to make the comparison of 2 documents, could be pdf. The app need to display the preview of documents uploaded by the user in a separate tabs and one tab with the result of comparative provide by an LLM
Drop files here
or click to upload
import streamlit as st
import PyPDF2
import io
import os
from dotenv import load_dotenv
from openai import OpenAI
import tempfile
# Load environment variables
load_dotenv()
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
def extract_text_from_pdf(pdf_file):
"""Extract text from a PDF file"""
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def compare_documents(text1, text2):
"""Compare two documents using OpenAI's GPT model"""
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a document comparison expert. Compare the following two documents and provide a detailed analysis of their similarities and differences. Focus on key points, content structure, and important variations."},
{"role": "user", "content": f"Document 1:\n{text1}\n\nDocument 2:\n{text2}"}
],
max_tokens=1000
)
return response.choices[0].message.content
except Exception as e:
return f"Error in comparison: {str(e)}"
# App title
st.title("Document Comparison Tool")
st.write("Upload two PDF documents to compare their contents")
# File uploaders
col1, col2 = st.columns(2)
with col1:
file1 = st.file_uploader("Upload first document", type=['pdf'], key="file1")
with col2:
file2 = st.file_uploader("Upload second document", type=['pdf'], key="file2")
# Initialize session state for storing comparison results
if 'comparison_result' not in st.session_state:
st.session_state.comparison_result = None
# Only proceed if both files are uploaded
if file1 and file2:
# Create tabs for document previews and comparison
tab1, tab2, tab3 = st.tabs(["Document 1", "Document 2", "Comparison Results"])
# Extract text from both documents
text1 = extract_text_from_pdf(file1)
text2 = extract_text_from_pdf(file2)
# Display document previews
with tab1:
st.subheader("Document 1 Preview")
st.text_area("Content", text1, height=400)
with tab2:
st.subheader("Document 2 Preview")
st.text_area("Content", text2, height=400)
with tab3:
st.subheader("Comparison Analysis")
if st.button("Compare Documents"):
with st.spinner("Analyzing documents..."):
st.session_state.comparison_result = compare_documents(text1, text2)
if st.session_state.comparison_result:
st.markdown(st.session_state.comparison_result)
else:
st.info("Please upload both documents to start the comparison")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?