DocumentQnA
In [1]:
Copied!
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
import vexpresso
from vexpresso import DaftCollection
import numpy as np
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
import vexpresso
from vexpresso import DaftCollection
import numpy as np
In [2]:
Copied!
loader = TextLoader('data/state_of_the_union.txt')
documents = loader.load()
loader = TextLoader('data/state_of_the_union.txt')
documents = loader.load()
In [3]:
Copied!
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
In [4]:
Copied!
collection = DaftCollection.from_documents(texts)
collection = DaftCollection.from_documents(texts)
2023-06-09 10:36:45.044 | INFO | daft.context:runner:80 - Using PyRunner
In [5]:
Copied!
collection.show(5)
collection.show(5)
Out[5]:
| source Utf8 | text Utf8 | vexpresso_index Int64 |
|---|---|---|
| data/state_of_the_union.txt | Madam Speaker, Madam Vice President, our First Lady and S... | 0 |
| data/state_of_the_union.txt | Groups of citizens blocking tanks with their bodies. Ever... | 1 |
| data/state_of_the_union.txt | Putin’s latest attack on Ukraine was premeditated and unp... | 2 |
| data/state_of_the_union.txt | We are inflicting pain on Russia and supporting the peopl... | 3 |
| data/state_of_the_union.txt | And tonight I am announcing that we will join our allies ... | 4 |
In [6]:
Copied!
collection = collection.embed("text", embedding_fn=OpenAIEmbeddings(), to="text_embeddings").execute()
collection = collection.embed("text", embedding_fn=OpenAIEmbeddings(), to="text_embeddings").execute()
In [7]:
Copied!
collection.show(5)
collection.show(5)
Out[7]:
| source Utf8 | text Utf8 | vexpresso_index Int64 | text_embeddings Python |
|---|---|---|---|
| data/state_of_the_union.txt | Madam Speaker, Madam Vice President, our First Lady and S... | 0 | [-0.0035738709417817522, -0.010346638845427497, -0.018503... |
| data/state_of_the_union.txt | Groups of citizens blocking tanks with their bodies. Ever... | 1 | [-0.005130805271914687, -0.023566637815669266, 0.00297891... |
| data/state_of_the_union.txt | Putin’s latest attack on Ukraine was premeditated and unp... | 2 | [0.008376386985870597, -0.006582389149827369, 0.003706716... |
| data/state_of_the_union.txt | We are inflicting pain on Russia and supporting the peopl... | 3 | [-0.011775417413068503, -0.004066209363947368, 0.00990651... |
| data/state_of_the_union.txt | And tonight I am announcing that we will join our allies ... | 4 | [-0.019793263236145078, -0.031717077748406, 0.01474192684... |
In [8]:
Copied!
vecdb = collection.to_langchain("text", "text_embeddings")
vecdb = collection.to_langchain("text", "text_embeddings")
In [9]:
Copied!
qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vecdb)
qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vecdb)
/home/shyam/miniconda3/envs/py39/lib/python3.9/site-packages/langchain/chains/retrieval_qa/base.py:201: UserWarning: `VectorDBQA` is deprecated - please use `from langchain.chains import RetrievalQA` warnings.warn(
In [10]:
Copied!
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)
Out[10]:
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support since she's been nominated."