make a rag application with llm where i upload pdf documents and users can ask question from the document and ai will give answers from that particular document only
To upload files, please first save the app
import streamlit as st
import tempfile
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
# Setup page config
st.set_page_config(page_title="Chat with PDF", page_icon="📚")
st.title("Chat with PDF 📚")
# Initialize session state variables
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "processComplete" not in st.session_state:
st.session_state.processComplete = None
# Get OpenAI API key
with st.sidebar:
openai_api_key = st.text_input("OpenAI API Key", type="password")
process_button = st.button("Process")
uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
def process_pdf():
if uploaded_file and openai_api_key:
# Save uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(uploaded_file.read())
tmp_file_path = tmp_file.name
# Extract text from PDF
pdf_reader = PdfReader(tmp_file_path)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Split text into chunks
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
# Create embeddings
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Create vector store
vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
# Create conversation chain
llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)
memory = ConversationBufferMemory(
memory_key='chat_history',
return_messages=True
)
st.session_state.conversation = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
memory=memory
)
os.unlink(tmp_file_path) # Delete temporary file
st.session_state.processComplete = True
# Process PDF when button is clicked
if process_button:
with st.spinner("Processing PDF..."):
process_pdf()
# Only show chat interface if PDF has been processed
if st.session_state.processComplete:
user_question = st.chat_input("Ask a question about your PDF:")
if user_question:
with st.spinner("Searching document and generating response..."):
# Generate response
response = st.session_state.conversation({"question": user_question})
st.session_state.chat_history.append((user_question, response["answer"]))
# Display chat history
for question, answer in st.session_state.chat_history:
with st.chat_message("user"):
st.write(question)
with st.chat_message("assistant"):
st.write(answer)
# Display helpful messages
else:
if not uploaded_file:
st.info("Please upload a PDF file to begin.")
elif not openai_api_key:
st.info("Please enter your OpenAI API key in the sidebar.")
else:
st.info("Click 'Process' in the sidebar to start the conversation.")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?