RAG Chatbot
A rudimental retrieval and generation model chatbot.
What is RAG?
What the hell?
What do we need?
Packages
pip install sentence-transformers
pip install openai
pip install numpyHere's the code
import openai
from sentence_transformers import SentenceTransformer, util
import numpy as np
# Loading your OpenAI API key from an environment variable or secure source
# is a better
openai.api_key = 'API KEY HERE, KEEP THIS A SECRET'
def read_file_in_chunks(file_path, chunk_size=1000):
"""
Generator to read a file in chunks of text.
"""
with open(file_path, 'r', encoding='utf-8') as file:
while True:
chunk = file.read(chunk_size)
if not chunk:
break
yield chunk
def find_most_relevant_chunk(question, chunks):
"""
Find the most relevant chunk using sentence embeddings for semantic search.
"""
model = SentenceTransformer('all-MiniLM-L6-v2')
question_embedding = model.encode(question, convert_to_tensor=True)
max_similarity = -np.inf
relevant_chunk = None
for chunk in chunks:
chunk_embedding = model.encode(chunk, convert_to_tensor=True)
similarity = util.pytorch_cos_sim(question_embedding, chunk_embedding)
if similarity > max_similarity:
max_similarity = similarity
relevant_chunk = chunk
return relevant_chunk
def ask_openai(question, context):
"""
Ask a question to OpenAI API with the provided context.
"""
try:
response = openai.Completion.create(
engine="davinci-002", # Update this to the latest or most suitable engine
prompt=f"{context}\n\nQuestion: {question}\nAnswer:",
temperature=0.5,
max_tokens=300,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0,
stop=["\n"]
)
return response.choices[0].text.strip()
except Exception as e:
return str(e)
# Example usage
file_path = 'PATH TO DATA FILE GOES HERE' # Ensure this path is correct
chunks = list(read_file_in_chunks(file_path))
while True:
# Prompt the user to enter a question
question = input("Please enter your question (or type 'exit' to quit): ")
if question.lower() == 'exit':
break
relevant_chunk = find_most_relevant_chunk(question, chunks)
if relevant_chunk:
answer = ask_openai(question, relevant_chunk)
# Print the answer in green and reset the color after
print(f"\033[92mAnswer:\n{answer}\033[0m")
else:
print("\033[92mCould not find a relevant section in the text for your question.\033[0m"
```Last updated