@@ -25,17 +25,12 @@ DB Access Token] with Database Administrator permissions.
25
25
+
26
26
[source,text]
27
27
----
28
- ASTRA_DB_ID=aad075g999-8ab4-4d81-aa7d-7f58dbed3ead
28
+ ASTRA_DB_API_ENDPOINT=https://...
29
29
ASTRA_DB_APPLICATION_TOKEN=AstraCS:...
30
- OPENAI_API_KEY=sk-...
31
30
ASTRA_DB_KEYSPACE=default_keyspace #optional
31
+ OPENAI_API_KEY=sk-...
32
32
----
33
33
+
34
- [NOTE]
35
- ====
36
- The `ASTRA_DB_ID` can be found in the {db-serverless} API Endpoint that's displayed for your vector-enabled database in {astra_ui}. If your API Endpoint is `https://aad075g999-8ab4-4d81-aa7d-7f58dbed3ead-us-east-2.apps.astra.datastax.com`, then your `ASTRA_DB_ID` is `aad075g999-8ab4-4d81-aa7d-7f58dbed3ead`.
37
- ====
38
- +
39
34
. Install the following dependencies:
40
35
+
41
36
[source,python]
@@ -52,12 +47,11 @@ See the https://docs.datastax.com/en/ragstack/docs/prerequisites.html[Prerequisi
52
47
[source,python]
53
48
----
54
49
import os
55
- import cassio
56
50
from dotenv import load_dotenv
57
51
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
58
52
from langchain_openai import OpenAI, OpenAIEmbeddings
59
53
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
60
- from langchain_community.vectorstores import Cassandra
54
+ from langchain_astradb import AstraDBVectorStore
61
55
62
56
load_dotenv()
63
57
----
@@ -74,19 +68,14 @@ myEmbedding = OpenAIEmbeddings()
74
68
+
75
69
[source,python]
76
70
----
77
- cassio.init(
78
- database_id=os.environ["ASTRA_DB_ID"],
79
- token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
80
- keyspace=os.environ.get("ASTRA_DB_KEYSPACE"), # this is optional
81
- )
82
-
83
- myCassandraVStore = Cassandra(
71
+ my_astra_db_vstore = AstraDBVectorStore(
84
72
embedding=myEmbedding,
85
- session=None,
86
- keyspace=None,
87
- table_name='vs_test2',
73
+ api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
74
+ token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
75
+ namespace=os.environ.get("ASTRA_DB_KEYSPACE"), # this is optional
76
+ collection_name="mmr_test",
88
77
)
89
- index = VectorStoreIndexWrapper(vectorstore=myCassandraVStore )
78
+ index = VectorStoreIndexWrapper(vectorstore=my_astra_db_vstore )
90
79
----
91
80
92
81
== Populate the vector store
@@ -98,20 +87,20 @@ Note that the last sentence's content is considerably different from the others.
98
87
----
99
88
# declare data
100
89
101
- BASE_SENTENCE_0 = (' The frogs and the toads were meeting in the night '
102
- ' for a party under the moon.' )
90
+ BASE_SENTENCE_0 = (" The frogs and the toads were meeting in the night "
91
+ " for a party under the moon." )
103
92
104
- BASE_SENTENCE_1 = (' There was a party under the moon, that all toads, '
105
- ' with the frogs, decided to throw that night.' )
93
+ BASE_SENTENCE_1 = (" There was a party under the moon, that all toads, "
94
+ " with the frogs, decided to throw that night." )
106
95
107
- BASE_SENTENCE_2 = (' And the frogs and the toads said: "Let us have a party '
108
- ' tonight, as the moon is shining".' )
96
+ BASE_SENTENCE_2 = (" And the frogs and the toads said: \ "Let us have a party "
97
+ " tonight, as the moon is shining\"." )
109
98
110
- BASE_SENTENCE_3 = (' I remember that night... toads, along with frogs, '
111
- ' were all busy planning a moonlit celebration.' )
99
+ BASE_SENTENCE_3 = (" I remember that night... toads, along with frogs, "
100
+ " were all busy planning a moonlit celebration." )
112
101
113
- DIFFERENT_SENTENCE = (' For the party, frogs and toads set a rule: '
114
- ' everyone was to wear a purple hat.' )
102
+ DIFFERENT_SENTENCE = (" For the party, frogs and toads set a rule: "
103
+ " everyone was to wear a purple hat." )
115
104
116
105
# insert into index
117
106
texts = [
@@ -122,23 +111,23 @@ texts = [
122
111
DIFFERENT_SENTENCE,
123
112
]
124
113
metadatas = [
125
- {' source': ' Barney\ 's story at the pub' },
126
- {' source': ' Barney\ 's story at the pub' },
127
- {' source': ' Barney\ 's story at the pub' },
128
- {' source': ' Barney\ 's story at the pub' },
129
- {' source': ' The chronicles at the village library' },
114
+ {" source": " Barney's story at the pub" },
115
+ {" source": " Barney's story at the pub" },
116
+ {" source": " Barney's story at the pub" },
117
+ {" source": " Barney's story at the pub" },
118
+ {" source": " The chronicles at the village library" },
130
119
]
131
120
----
132
121
+
133
122
. Load the sentences into the vector store and print their IDs.
134
123
+
135
124
[source,python]
136
125
----
137
- ids = myCassandraVStore .add_texts(
126
+ ids = my_astra_db_vstore .add_texts(
138
127
texts,
139
128
metadatas=metadatas,
140
129
)
141
- print('\n' .join(ids))
130
+ print("\n" .join(ids))
142
131
----
143
132
144
133
== Create and compare retrievers
@@ -152,17 +141,17 @@ Ask them a question, and see how the MMR response differs from the similarity re
152
141
+
153
142
[source,python]
154
143
----
155
- QUESTION = ' Tell me about the party that night.'
144
+ QUESTION = " Tell me about the party that night."
156
145
----
157
146
+
158
147
. Create a retriever with similarity search.
159
148
+
160
149
[source,python]
161
150
----
162
- retrieverSim = myCassandraVStore .as_retriever(
163
- search_type=' similarity' ,
151
+ retriever_sim = my_astra_db_vstore .as_retriever(
152
+ search_type=" similarity" ,
164
153
search_kwargs={
165
- 'k' : 2,
154
+ "k" : 2,
166
155
},
167
156
)
168
157
@@ -172,19 +161,19 @@ chainSimSrc = RetrievalQAWithSourcesChain.from_chain_type(
172
161
)
173
162
174
163
responseSimSrc = chainSimSrc.invoke({chainSimSrc.question_key: QUESTION})
175
- print(' Similarity-based chain:' )
176
- print(f' ANSWER : {responseSimSrc[" answer" ].strip()}' )
177
- print(f' SOURCES: {responseSimSrc[" sources" ].strip()}' )
164
+ print(" Similarity-based chain:" )
165
+ print(f" ANSWER : {responseSimSrc[' answer' ].strip()}" )
166
+ print(f" SOURCES: {responseSimSrc[' sources' ].strip()}" )
178
167
----
179
168
+
180
169
. Create a retriever with MMR search.
181
170
+
182
171
[source,python]
183
172
----
184
- retrieverMMR = myCassandraVStore .as_retriever(
185
- search_type=' mmr' ,
173
+ retrieverMMR = myAstraDBVStore .as_retriever(
174
+ search_type=" mmr" ,
186
175
search_kwargs={
187
- 'k' : 2,
176
+ "k" : 2,
188
177
},
189
178
)
190
179
@@ -194,9 +183,9 @@ chainMMRSrc = RetrievalQAWithSourcesChain.from_chain_type(
194
183
)
195
184
196
185
responseMMRSrc = chainMMRSrc.invoke({chainMMRSrc.question_key: QUESTION})
197
- print(' MMR-based chain:' )
198
- print(f' ANSWER : {responseMMRSrc[" answer" ].strip()}' )
199
- print(f' SOURCES: {responseMMRSrc[" sources" ].strip()}' )
186
+ print(" MMR-based chain:" )
187
+ print(f" ANSWER : {responseMMRSrc[' answer' ].strip()}" )
188
+ print(f" SOURCES: {responseMMRSrc[' sources' ].strip()}" )
200
189
----
201
190
+
202
191
. Run the code and observe the differences in the responses.
0 commit comments