Ploomber AI Editor | document-qa-5100

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import tempfile
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Setup page config
st.set_page_config(page_title="Chat with PDF", page_icon="📚")
st.title("Chat with PDF 📚")

# Initialize session state variables
if "conversation" not in st.session_state:
    st.session_state.conversation = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
if "processComplete" not in st.session_state:
    st.session_state.processComplete = None

# Get OpenAI API key
with st.sidebar:
    openai_api_key = st.text_input("OpenAI API Key", type="password")
    process_button = st.button("Process")

uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")

def process_pdf():
    if uploaded_file and openai_api_key:
        # Save uploaded file temporarily
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            tmp_file.write(uploaded_file.read())
            tmp_file_path = tmp_file.name

        # Extract text from PDF
        pdf_reader = PdfReader(tmp_file_path)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()

        # Split text into chunks
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )
        chunks = text_splitter.split_text(text)

        # Create embeddings
        embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
        
        # Create vector store
        vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)

        # Create conversation chain
        llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)
        memory = ConversationBufferMemory(
            memory_key='chat_history',
            return_messages=True
        )
        
        st.session_state.conversation = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=vectorstore.as_retriever(),
            memory=memory
        )
        
        os.unlink(tmp_file_path)  # Delete temporary file
        st.session_state.processComplete = True

# Process PDF when button is clicked
if process_button:
    with st.spinner("Processing PDF..."):
        process_pdf()

# Only show chat interface if PDF has been processed
if st.session_state.processComplete:
    user_question = st.chat_input("Ask a question about your PDF:")
    if user_question:
        with st.spinner("Searching document and generating response..."):
            # Generate response
            response = st.session_state.conversation({"question": user_question})
            st.session_state.chat_history.append((user_question, response["answer"]))

    # Display chat history
    for question, answer in st.session_state.chat_history:
        with st.chat_message("user"):
            st.write(question)
        with st.chat_message("assistant"):
            st.write(answer)

# Display helpful messages
else:
    if not uploaded_file:
        st.info("Please upload a PDF file to begin.")
    elif not openai_api_key:
        st.info("Please enter your OpenAI API key in the sidebar.")
    else:
        st.info("Click 'Process' in the sidebar to start the conversation.")
Loading code editor...
Click Save & Run to preview your app
Terminal