Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ruff rules for bandit #555

Merged
merged 2 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/examples/basic/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ def hello():

# Execute the application
if __name__ == "__main__":
app.run(port=8080, host="0.0.0.0")
app.run(port=8080)
2 changes: 1 addition & 1 deletion docker/examples/multistage/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ def hello():

# Execute the application
if __name__ == "__main__":
app.run(port=8080, host="0.0.0.0")
app.run(port=8080)
4 changes: 2 additions & 2 deletions docs/modules/examples/pages/langchain-unstructured-astra.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ load_dotenv()
url = "https://raw.githubusercontent.com/datastax/ragstack-ai/48bc55e7dc4de6a8b79fcebcedd242dc1254dd63/examples/notebooks/resources/attention_pages_9_10.pdf"
file_path = "./attention_pages_9_10.pdf"

response = requests.get(url)
response = requests.get(url, timeout=30)
if response.status_code == 200:
with open(file_path, "wb") as file:
file.write(response.content)
Expand Down Expand Up @@ -264,7 +264,7 @@ load_dotenv()
url = "https://raw.githubusercontent.com/datastax/ragstack-ai/48bc55e7dc4de6a8b79fcebcedd242dc1254dd63/examples/notebooks/resources/attention_pages_9_10.pdf"
file_path = "./attention_pages_9_10.pdf"

response = requests.get(url)
response = requests.get(url, timeout=30)
if response.status_code == 200:
with open(file_path, "wb") as file:
file.write(response.content)
Expand Down
14 changes: 10 additions & 4 deletions docs/modules/examples/pages/langchain_multimodal_gemini.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ Let's see if Gemini Pro Vision can identify a part to an espresso machine and te
----
import requests

source_img_data = requests.get('https://drive.google.com/uc?export=view&id=15ddcn-AIxpvRdWcFGvIr77XLWdo4Maof').content
source_img_data = requests.get(
'https://drive.google.com/uc?export=view&id=15ddcn-AIxpvRdWcFGvIr77XLWdo4Maof',
timeout=30,
).content
with open('coffee_maker_part.png', 'wb') as handler:
handler.write(source_img_data)
----
Expand Down Expand Up @@ -162,7 +165,7 @@ for i in range(len(df)):

# Download this product's image and save it to the Colab filesystem.
# In a production system this binary data would be stored in Google Cloud Storage
img_data = requests.get(image).content
img_data = requests.get(image, timeout=30).content
with open(f'{name}.png', 'wb') as handler:
handler.write(img_data)

Expand Down Expand Up @@ -303,7 +306,10 @@ from langchain.schema.messages import HumanMessage
from vertexai.preview.vision_models import MultiModalEmbeddingModel, Image
from astrapy.db import AstraDB

source_img_data = requests.get('https://drive.google.com/uc?export=view&id=15ddcn-AIxpvRdWcFGvIr77XLWdo4Maof').content
source_img_data = requests.get(
'https://drive.google.com/uc?export=view&id=15ddcn-AIxpvRdWcFGvIr77XLWdo4Maof',
timeout=30,
).content
with open('coffee_maker_part.png', 'wb') as handler:
handler.write(source_img_data)

Expand Down Expand Up @@ -343,7 +349,7 @@ for i in range(len(df)):

# Download this product's image and save it to your local filesystem.
# In a production system this binary data would be stored in Google Cloud Storage
img_data = requests.get(image).content
img_data = requests.get(image, timeout=30).content
with open(f'{name}.png', 'wb') as handler:
handler.write(img_data)

Expand Down
2 changes: 1 addition & 1 deletion docs/modules/examples/pages/llama-parse-astra.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Settings.embed_model = OpenAIEmbedding(
url = "https://arxiv.org/pdf/1706.03762.pdf"
file_path = "./attention.pdf"

response = requests.get(url)
response = requests.get(url, timeout=30)
if response.status_code == 200:
with open(file_path, "wb") as file:
file.write(response.content)
Expand Down
2 changes: 1 addition & 1 deletion docs/modules/examples/partials/llama-parse.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Settings.embed_model = OpenAIEmbedding(
# Download a PDF for indexing
url = "https://arxiv.org/pdf/1706.03762.pdf"
file_path = "./attention.pdf"
response = requests.get(url)
response = requests.get(url, timeout=30)
if response.status_code == 200:
with open(file_path, "wb") as file:
file.write(response.content)
Expand Down
2 changes: 1 addition & 1 deletion examples/evaluation/tru_dashboard.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import tru_shared

tru = tru_shared.init_tru()
tru.run_dashboard(address="0.0.0.0", port=8501, force=True)
tru.run_dashboard(force=True)
7 changes: 5 additions & 2 deletions examples/notebooks/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import tempfile
import time

from astrapy.db import AstraDB
Expand All @@ -16,9 +17,11 @@ def get_required_env(name) -> str:

# vertex-ai
if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
with open("/tmp/gcloud-account-key.json", "w") as f:
with tempfile.NamedTemporaryFile(
prefix="gcloud-account-key", suffix=".json", mode="w", delete=False
) as f:
f.write(os.getenv("GCLOUD_ACCOUNT_KEY_JSON", ""))
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/tmp/gcloud-account-key.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = f.name

client = AstraDB(
token=get_required_env("ASTRA_DB_APPLICATION_TOKEN"),
Expand Down
2 changes: 1 addition & 1 deletion examples/notebooks/langchain-unstructured-astra.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"import requests\n",
"\n",
"url = \"https://raw.githubusercontent.com/datastax/ragstack-ai/main/examples/notebooks/resources/attention_pages_9_10.pdf\"\n",
"response = requests.get(url)\n",
"response = requests.get(url, timeout=30)\n",
"with open(\"attention_pages_9_10.pdf\", \"wb\") as file:\n",
" file.write(response.content)"
]
Expand Down
12 changes: 8 additions & 4 deletions examples/notebooks/langchain_multimodal_gemini.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,15 @@
" auth.authenticate_user()\n",
"except ImportError:\n",
" import os\n",
" import tempfile\n",
"\n",
" if \"GOOGLE_APPLICATION_CREDENTIALS\" not in os.environ:\n",
" credentials = getpass(\"Enter Google JSON credentials file: \")\n",
" with open(\"/tmp/gcloud-account-key.json\", \"w\") as f:\n",
" with tempfile.NamedTemporaryFile(\n",
" prefix=\"gcloud-account-key\", suffix=\".json\", delete=False\n",
" ) as f:\n",
" f.write(credentials)\n",
" os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"/tmp/gcloud-account-key.json\""
" os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = f.name"
]
},
{
Expand Down Expand Up @@ -194,7 +197,8 @@
"import requests\n",
"\n",
"source_img_data = requests.get(\n",
" \"https://drive.google.com/uc?export=view&id=15ddcn-AIxpvRdWcFGvIr77XLWdo4Maof\"\n",
" \"https://drive.google.com/uc?export=view&id=15ddcn-AIxpvRdWcFGvIr77XLWdo4Maof\",\n",
" timeout=30,\n",
").content\n",
"with open(\"coffee_maker_part.png\", \"wb\") as handler:\n",
" handler.write(source_img_data)"
Expand Down Expand Up @@ -607,7 +611,7 @@
"\n",
" # Download this product's image and save it to the Colab filesystem.\n",
" # In a production system this binary data would be stored in Google Cloud Storage\n",
" img_data = requests.get(image).content\n",
" img_data = requests.get(image, timeout=30).content\n",
" with open(f\"{name}.png\", \"wb\") as handler:\n",
" handler.write(img_data)\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/notebooks/llama-parse-astra.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
"file_path = \"./attention.pdf\"\n",
"\n",
"# Perform the HTTP request\n",
"response = requests.get(url)\n",
"response = requests.get(url, timeout=30)\n",
"\n",
"# Check if the request was successful\n",
"if response.status_code == 200:\n",
Expand Down
7 changes: 5 additions & 2 deletions libs/e2e-tests/e2e_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
import pathlib
import tempfile
import time

import pytest
Expand Down Expand Up @@ -197,6 +198,8 @@ def _report_to_file(stats_str: str, filename: str, report_lines: list):

# vertex-ai
if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
with open("/tmp/gcloud-account-key.json", "w") as f:
with tempfile.NamedTemporaryFile(
prefix="gcloud-account-key", suffix=".json", mode="w", delete=False
) as f:
f.write(os.getenv("GCLOUD_ACCOUNT_KEY_JSON", ""))
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/tmp/gcloud-account-key.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = f.name
4 changes: 2 additions & 2 deletions libs/e2e-tests/e2e_tests/langchain/test_astra.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_wrong_connection_parameters(vectorstore: AstraDBVectorStore):
AstraDBVectorStore(
collection_name="something",
embedding=MockEmbeddings(),
token="xxxxx",
token="xxxxx", # noqa: S106
# we assume that post 1234 is not open locally
api_endpoint="https://locahost:1234",
)
Expand All @@ -90,7 +90,7 @@ def test_wrong_connection_parameters(vectorstore: AstraDBVectorStore):
AstraDBVectorStore(
collection_name="something",
embedding=MockEmbeddings(),
token="this-is-a-wrong-token",
token="this-is-a-wrong-token", # noqa: S106
api_endpoint=api_endpoint,
)
pytest.fail("Should have thrown exception")
Expand Down
2 changes: 1 addition & 1 deletion libs/e2e-tests/e2e_tests/langchain/test_cassandra_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_tool_with_openai_tool():
f"""
INSERT INTO default_keyspace.tool_table_users (user_id, user_name)
VALUES ({user_id}, 'my_user');
"""
""" # noqa: S608
)
db = CassandraDatabase()

Expand Down
4 changes: 2 additions & 2 deletions libs/e2e-tests/e2e_tests/llama_index/test_astra.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_ingest_errors(environment: Environment):
def test_wrong_connection_parameters(environment: Environment):
try:
AstraDBVectorStore(
token="xxxxx",
token="xxxxx", # noqa: S106
# we assume that post 1234 is not open locally
api_endpoint="https://locahost:1234",
collection_name="something",
Expand All @@ -113,7 +113,7 @@ def test_wrong_connection_parameters(environment: Environment):
try:
print("api_endpoint:", api_endpoint)
AstraDBVectorStore(
token="this-is-a-wrong-token",
token="this-is-a-wrong-token", # noqa: S106
api_endpoint=api_endpoint,
collection_name="something",
embedding_dimension=1536,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_tool_with_openai_tool():
f"""
INSERT INTO default_keyspace.tool_table_users (user_id, user_name)
VALUES ({user_id}, 'my_user');
"""
""" # noqa: S608
)
db = CassandraDatabase()

Expand Down
21 changes: 17 additions & 4 deletions libs/knowledge-graph/ragstack_knowledge_graph/knowledge_graph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import re
from itertools import repeat
from typing import Any, Dict, Iterable, Optional, Sequence, Tuple, Union, cast

Expand Down Expand Up @@ -29,6 +30,9 @@ def _parse_node(row: Any) -> Node:
)


_CQL_IDENTIFIER_PATTERN = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")


class CassandraKnowledgeGraph:
"""Cassandra Knowledge Graph.

Expand Down Expand Up @@ -56,6 +60,15 @@ def __init__(
session = check_resolve_session(session)
keyspace = check_resolve_keyspace(keyspace)

if not _CQL_IDENTIFIER_PATTERN.fullmatch(keyspace):
raise ValueError(f"Invalid keyspace: {keyspace}")

if not _CQL_IDENTIFIER_PATTERN.fullmatch(node_table):
raise ValueError(f"Invalid node table name: {node_table}")

if not _CQL_IDENTIFIER_PATTERN.fullmatch(edge_table):
raise ValueError(f"Invalid edge table name: {edge_table}")

self._text_embeddings = text_embeddings
self._text_embeddings_dim = (
# Embedding vectors must have dimension:
Expand All @@ -78,23 +91,23 @@ def __init__(
f"""INSERT INTO {keyspace}.{node_table} (
name, type, text_embedding, properties_json
) VALUES (?, ?, ?, ?)
"""
""" # noqa: S608
)

self._insert_relationship = self._session.prepare(
f"""
INSERT INTO {keyspace}.{edge_table} (
source_name, source_type, target_name, target_type, edge_type
) VALUES (?, ?, ?, ?, ?)
"""
""" # noqa: S608
)

self._query_relationship = self._session.prepare(
f"""
SELECT name, type, properties_json
FROM {keyspace}.{node_table}
WHERE name = ? AND type = ?
"""
""" # noqa: S608
)

self._query_nodes_by_embedding = self._session.prepare(
Expand All @@ -103,7 +116,7 @@ def __init__(
FROM {keyspace}.{node_table}
ORDER BY text_embedding ANN OF ?
LIMIT ?
"""
""" # noqa: S608
)

def _apply_schema(self) -> None:
Expand Down
10 changes: 8 additions & 2 deletions libs/knowledge-graph/ragstack_knowledge_graph/runnables.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,14 @@ def extract_entities(
`{format_instructions}` which describe how to produce the output.
"""
prompt = ChatPromptTemplate.from_messages([keyword_extraction_prompt])
assert "question" in prompt.input_variables
assert "format_instructions" in prompt.input_variables
if "question" not in prompt.input_variables:
raise ValueError(
"Missing 'question' placeholder in extraction prompt template."
)
if "format_instructions" not in prompt.input_variables:
raise ValueError(
"Missing 'format_instructions' placeholder in extraction prompt template."
)

class SimpleNode(BaseModel):
"""Represents a node in a graph with associated properties."""
Expand Down
1 change: 1 addition & 0 deletions libs/knowledge-store/notebooks/astra_support.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
" \"User-Agent\": \"Mozilla/5.0 (X11; Linux x86_64; rv:58.0) Gecko/20100101 \"\n",
" \"Firefox/58.0\",\n",
" },\n",
" timeout=30,\n",
" )\n",
" xml = r.text\n",
"\n",
Expand Down
6 changes: 5 additions & 1 deletion libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ def _pop_candidate(self, candidate_id: str) -> NDArray[np.float32]:
"""
# Get the embedding for the id.
index = self.candidate_id_to_index.pop(candidate_id)
assert self.candidates[index].id == candidate_id
if not self.candidates[index].id == candidate_id:
raise ValueError(
"ID in self.candidate_id_to_index doesn't match the ID of the "
"corresponding index in self.candidates"
)
embedding: NDArray[np.float32] = self.candidate_embeddings[index].copy()

# Swap that index with the last index in the candidates and
Expand Down
Loading
Loading