Skip to content

Commit d4fd716

Browse files
(docs) MMR page switch to AstraDBVectorStore (=data api) (#474)
* mmr page switch to AstraDBVectorStore (=data api) * Update docs/modules/examples/pages/mmr.adoc Co-authored-by: Christophe Bornet <cbornet@hotmail.com> * Update docs/modules/examples/pages/mmr.adoc Co-authored-by: Christophe Bornet <cbornet@hotmail.com> * Update docs/modules/examples/pages/mmr.adoc Co-authored-by: Christophe Bornet <cbornet@hotmail.com> * Update docs/modules/examples/pages/mmr.adoc Co-authored-by: Christophe Bornet <cbornet@hotmail.com> --------- Co-authored-by: Christophe Bornet <cbornet@hotmail.com>
1 parent 5684acf commit d4fd716

File tree

2 files changed

+79
-95
lines changed

2 files changed

+79
-95
lines changed

docs/modules/examples/pages/mmr.adoc

+39-50
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,12 @@ DB Access Token] with Database Administrator permissions.
2525
+
2626
[source,text]
2727
----
28-
ASTRA_DB_ID=aad075g999-8ab4-4d81-aa7d-7f58dbed3ead
28+
ASTRA_DB_API_ENDPOINT=https://...
2929
ASTRA_DB_APPLICATION_TOKEN=AstraCS:...
30-
OPENAI_API_KEY=sk-...
3130
ASTRA_DB_KEYSPACE=default_keyspace #optional
31+
OPENAI_API_KEY=sk-...
3232
----
3333
+
34-
[NOTE]
35-
====
36-
The `ASTRA_DB_ID` can be found in the {db-serverless} API Endpoint that's displayed for your vector-enabled database in {astra_ui}. If your API Endpoint is `https://aad075g999-8ab4-4d81-aa7d-7f58dbed3ead-us-east-2.apps.astra.datastax.com`, then your `ASTRA_DB_ID` is `aad075g999-8ab4-4d81-aa7d-7f58dbed3ead`.
37-
====
38-
+
3934
. Install the following dependencies:
4035
+
4136
[source,python]
@@ -52,12 +47,11 @@ See the https://docs.datastax.com/en/ragstack/docs/prerequisites.html[Prerequisi
5247
[source,python]
5348
----
5449
import os
55-
import cassio
5650
from dotenv import load_dotenv
5751
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
5852
from langchain_openai import OpenAI, OpenAIEmbeddings
5953
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
60-
from langchain_community.vectorstores import Cassandra
54+
from langchain_astradb import AstraDBVectorStore
6155
6256
load_dotenv()
6357
----
@@ -74,19 +68,14 @@ myEmbedding = OpenAIEmbeddings()
7468
+
7569
[source,python]
7670
----
77-
cassio.init(
78-
database_id=os.environ["ASTRA_DB_ID"],
79-
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
80-
keyspace=os.environ.get("ASTRA_DB_KEYSPACE"), # this is optional
81-
)
82-
83-
myCassandraVStore = Cassandra(
71+
my_astra_db_vstore = AstraDBVectorStore(
8472
embedding=myEmbedding,
85-
session=None,
86-
keyspace=None,
87-
table_name='vs_test2',
73+
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
74+
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
75+
namespace=os.environ.get("ASTRA_DB_KEYSPACE"), # this is optional
76+
collection_name="mmr_test",
8877
)
89-
index = VectorStoreIndexWrapper(vectorstore=myCassandraVStore)
78+
index = VectorStoreIndexWrapper(vectorstore=my_astra_db_vstore)
9079
----
9180

9281
== Populate the vector store
@@ -98,20 +87,20 @@ Note that the last sentence's content is considerably different from the others.
9887
----
9988
# declare data
10089
101-
BASE_SENTENCE_0 = ('The frogs and the toads were meeting in the night '
102-
'for a party under the moon.')
90+
BASE_SENTENCE_0 = ("The frogs and the toads were meeting in the night "
91+
"for a party under the moon.")
10392
104-
BASE_SENTENCE_1 = ('There was a party under the moon, that all toads, '
105-
'with the frogs, decided to throw that night.')
93+
BASE_SENTENCE_1 = ("There was a party under the moon, that all toads, "
94+
"with the frogs, decided to throw that night.")
10695
107-
BASE_SENTENCE_2 = ('And the frogs and the toads said: "Let us have a party '
108-
'tonight, as the moon is shining".')
96+
BASE_SENTENCE_2 = ("And the frogs and the toads said: \"Let us have a party "
97+
"tonight, as the moon is shining\".")
10998
110-
BASE_SENTENCE_3 = ('I remember that night... toads, along with frogs, '
111-
'were all busy planning a moonlit celebration.')
99+
BASE_SENTENCE_3 = ("I remember that night... toads, along with frogs, "
100+
"were all busy planning a moonlit celebration.")
112101
113-
DIFFERENT_SENTENCE = ('For the party, frogs and toads set a rule: '
114-
'everyone was to wear a purple hat.')
102+
DIFFERENT_SENTENCE = ("For the party, frogs and toads set a rule: "
103+
"everyone was to wear a purple hat.")
115104
116105
# insert into index
117106
texts = [
@@ -122,23 +111,23 @@ texts = [
122111
DIFFERENT_SENTENCE,
123112
]
124113
metadatas = [
125-
{'source': 'Barney\'s story at the pub'},
126-
{'source': 'Barney\'s story at the pub'},
127-
{'source': 'Barney\'s story at the pub'},
128-
{'source': 'Barney\'s story at the pub'},
129-
{'source': 'The chronicles at the village library'},
114+
{"source": "Barney's story at the pub"},
115+
{"source": "Barney's story at the pub"},
116+
{"source": "Barney's story at the pub"},
117+
{"source": "Barney's story at the pub"},
118+
{"source": "The chronicles at the village library"},
130119
]
131120
----
132121
+
133122
. Load the sentences into the vector store and print their IDs.
134123
+
135124
[source,python]
136125
----
137-
ids = myCassandraVStore.add_texts(
126+
ids = my_astra_db_vstore.add_texts(
138127
texts,
139128
metadatas=metadatas,
140129
)
141-
print('\n'.join(ids))
130+
print("\n".join(ids))
142131
----
143132

144133
== Create and compare retrievers
@@ -152,17 +141,17 @@ Ask them a question, and see how the MMR response differs from the similarity re
152141
+
153142
[source,python]
154143
----
155-
QUESTION = 'Tell me about the party that night.'
144+
QUESTION = "Tell me about the party that night."
156145
----
157146
+
158147
. Create a retriever with similarity search.
159148
+
160149
[source,python]
161150
----
162-
retrieverSim = myCassandraVStore.as_retriever(
163-
search_type='similarity',
151+
retriever_sim = my_astra_db_vstore.as_retriever(
152+
search_type="similarity",
164153
search_kwargs={
165-
'k': 2,
154+
"k": 2,
166155
},
167156
)
168157
@@ -172,19 +161,19 @@ chainSimSrc = RetrievalQAWithSourcesChain.from_chain_type(
172161
)
173162
174163
responseSimSrc = chainSimSrc.invoke({chainSimSrc.question_key: QUESTION})
175-
print('Similarity-based chain:')
176-
print(f' ANSWER : {responseSimSrc["answer"].strip()}')
177-
print(f' SOURCES: {responseSimSrc["sources"].strip()}')
164+
print("Similarity-based chain:")
165+
print(f" ANSWER : {responseSimSrc['answer'].strip()}")
166+
print(f" SOURCES: {responseSimSrc['sources'].strip()}")
178167
----
179168
+
180169
. Create a retriever with MMR search.
181170
+
182171
[source,python]
183172
----
184-
retrieverMMR = myCassandraVStore.as_retriever(
185-
search_type='mmr',
173+
retrieverMMR = myAstraDBVStore.as_retriever(
174+
search_type="mmr",
186175
search_kwargs={
187-
'k': 2,
176+
"k": 2,
188177
},
189178
)
190179
@@ -194,9 +183,9 @@ chainMMRSrc = RetrievalQAWithSourcesChain.from_chain_type(
194183
)
195184
196185
responseMMRSrc = chainMMRSrc.invoke({chainMMRSrc.question_key: QUESTION})
197-
print('MMR-based chain:')
198-
print(f' ANSWER : {responseMMRSrc["answer"].strip()}')
199-
print(f' SOURCES: {responseMMRSrc["sources"].strip()}')
186+
print("MMR-based chain:")
187+
print(f" ANSWER : {responseMMRSrc['answer'].strip()}")
188+
print(f" SOURCES: {responseMMRSrc['sources'].strip()}")
200189
----
201190
+
202191
. Run the code and observe the differences in the responses.

docs/modules/examples/partials/mmr-example.adoc

+40-45
Original file line numberDiff line numberDiff line change
@@ -4,50 +4,45 @@
44
[source,python]
55
----
66
import os
7-
import cassio
87
from dotenv import load_dotenv
98
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
109
from langchain_openai import OpenAI, OpenAIEmbeddings
1110
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
12-
from langchain_community.vectorstores import Cassandra
11+
from langchain_astradb import AstraDBVectorStore
1312
1413
# Load environment variables
1514
load_dotenv()
1615
17-
# Initialize OpenAI and embeddings
16+
# Initialize the OpenAI model and embeddings.
1817
llm = OpenAI(temperature=0)
1918
myEmbedding = OpenAIEmbeddings()
2019
21-
cassio.init(
22-
database_id=os.environ["ASTRA_DB_ID"],
23-
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
24-
keyspace=os.environ.get("ASTRA_DB_KEYSPACE"), # this is optional
25-
)
26-
27-
myCassandraVStore = Cassandra(
20+
# Initialize the vector store.
21+
myAstraDBVStore = AstraDBVectorStore(
2822
embedding=myEmbedding,
29-
session=None,
30-
keyspace=None,
31-
table_name='vs_test2',
23+
api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
24+
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
25+
namespace=os.environ.get("ASTRA_DB_KEYSPACE"), # this is optional
26+
collection_name="mmr_test",
3227
)
33-
index = VectorStoreIndexWrapper(vectorstore=myCassandraVStore)
28+
index = VectorStoreIndexWrapper(vectorstore=myAstraDBVStore)
3429
3530
# declare data
3631
37-
BASE_SENTENCE_0 = ('The frogs and the toads were meeting in the night '
38-
'for a party under the moon.')
32+
BASE_SENTENCE_0 = ("The frogs and the toads were meeting in the night "
33+
"for a party under the moon.")
3934
40-
BASE_SENTENCE_1 = ('There was a party under the moon, that all toads, '
41-
'with the frogs, decided to throw that night.')
35+
BASE_SENTENCE_1 = ("There was a party under the moon, that all toads, "
36+
"with the frogs, decided to throw that night.")
4237
43-
BASE_SENTENCE_2 = ('And the frogs and the toads said: "Let us have a party '
44-
'tonight, as the moon is shining".')
38+
BASE_SENTENCE_2 = ("And the frogs and the toads said: \"Let us have a party "
39+
"tonight, as the moon is shining\".")
4540
46-
BASE_SENTENCE_3 = ('I remember that night... toads, along with frogs, '
47-
'were all busy planning a moonlit celebration.')
41+
BASE_SENTENCE_3 = ("I remember that night... toads, along with frogs, "
42+
"were all busy planning a moonlit celebration.")
4843
49-
DIFFERENT_SENTENCE = ('For the party, frogs and toads set a rule: '
50-
'everyone was to wear a purple hat.')
44+
DIFFERENT_SENTENCE = ("For the party, frogs and toads set a rule: "
45+
"everyone was to wear a purple hat.")
5146
5247
# insert into index
5348
texts = [
@@ -58,29 +53,29 @@ texts = [
5853
DIFFERENT_SENTENCE,
5954
]
6055
metadatas = [
61-
{'source': 'Barney\'s story at the pub'},
62-
{'source': 'Barney\'s story at the pub'},
63-
{'source': 'Barney\'s story at the pub'},
64-
{'source': 'Barney\'s story at the pub'},
65-
{'source': 'The chronicles at the village library'},
56+
{"source": "Barney's story at the pub"},
57+
{"source": "Barney's story at the pub"},
58+
{"source": "Barney's story at the pub"},
59+
{"source": "Barney's story at the pub"},
60+
{"source": "The chronicles at the village library"},
6661
]
6762
68-
# add texts to vector store and print first
69-
ids = myCassandraVStore.add_texts(
63+
# add texts to vector store and print IDs
64+
ids = myAstraDBVStore.add_texts(
7065
texts,
7166
metadatas=metadatas,
7267
)
73-
print('\n'.join(ids))
68+
print("\n".join(ids))
7469
7570
# query the index
7671
77-
QUESTION = 'Tell me about the party that night.'
72+
QUESTION = "Tell me about the party that night."
7873
7974
# manual creation of the "retriever" with the 'similarity' search type
80-
retrieverSim = myCassandraVStore.as_retriever(
81-
search_type='similarity',
75+
retrieverSim = myAstraDBVStore.as_retriever(
76+
search_type="similarity",
8277
search_kwargs={
83-
'k': 2,
78+
"k": 2,
8479
},
8580
)
8681
@@ -91,18 +86,18 @@ chainSimSrc = RetrievalQAWithSourcesChain.from_chain_type(
9186
9287
# Run the chain and print results with sources
9388
responseSimSrc = chainSimSrc.invoke({chainSimSrc.question_key: QUESTION})
94-
print('Similarity-based chain:')
95-
print(f' ANSWER : {responseSimSrc["answer"].strip()}')
96-
print(f' SOURCES: {responseSimSrc["sources"].strip()}')
89+
print("Similarity-based chain:")
90+
print(f" ANSWER : {responseSimSrc['answer'].strip()}")
91+
print(f" SOURCES: {responseSimSrc['sources'].strip()}")
9792
9893
9994
# mmr search with sources
10095
10196
# manual creation of the "retriever" with the 'MMR' search type
102-
retrieverMMR = myCassandraVStore.as_retriever(
103-
search_type='mmr',
97+
retrieverMMR = myAstraDBVStore.as_retriever(
98+
search_type="mmr",
10499
search_kwargs={
105-
'k': 2,
100+
"k": 2,
106101
},
107102
)
108103
@@ -113,8 +108,8 @@ chainMMRSrc = RetrievalQAWithSourcesChain.from_chain_type(
113108
114109
# Run the chain and print results with sources
115110
responseMMRSrc = chainMMRSrc.invoke({chainMMRSrc.question_key: QUESTION})
116-
print('MMR-based chain:')
117-
print(f' ANSWER : {responseMMRSrc["answer"].strip()}')
118-
print(f' SOURCES: {responseMMRSrc["sources"].strip()}')
111+
print("MMR-based chain:")
112+
print(f" ANSWER : {responseMMRSrc['answer'].strip()}")
113+
print(f" SOURCES: {responseMMRSrc['sources'].strip()}")
119114
----
120115
====

0 commit comments

Comments
 (0)