Skip to content

Commit 9fe63e1

Browse files
authored
Align knowledge-graph and knowledge-store linting (#540)
1 parent b6024c7 commit 9fe63e1

File tree

10 files changed

+76
-86
lines changed

10 files changed

+76
-86
lines changed

libs/knowledge-graph/notebooks/notebook.ipynb

+26-11
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@
5555
"\n",
5656
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter OpenAI API Key: \")\n",
5757
"os.environ[\"ASTRA_DB_DATABASE_ID\"] = input(\"Enter Astra DB Database ID: \")\n",
58-
"os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"] = getpass.getpass(\"Enter Astra DB Application Token: \")\n",
58+
"os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"] = getpass.getpass(\n",
59+
" \"Enter Astra DB Application Token: \"\n",
60+
")\n",
5961
"\n",
6062
"keyspace = input(\"Enter Astra DB Keyspace (Empty for default): \")\n",
6163
"if keyspace:\n",
@@ -75,8 +77,8 @@
7577
"text": [
7678
"Requirement already satisfied: python-dotenv in /Users/benjamin.chambers/Library/Caches/pypoetry/virtualenvs/knowledge-graph-bxUBmW8M-py3.11/lib/python3.11/site-packages (1.0.1)\n",
7779
"\n",
78-
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m24.0\u001B[0m\n",
79-
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n",
80+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
81+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
8082
"Note: you may need to restart the kernel to use updated packages.\n"
8183
]
8284
},
@@ -96,6 +98,7 @@
9698
"# See `env.template` for an example of what you should have there.\n",
9799
"%pip install python-dotenv\n",
98100
"import dotenv\n",
101+
"\n",
99102
"dotenv.load_dotenv()"
100103
]
101104
},
@@ -114,6 +117,7 @@
114117
"source": [
115118
"# Initialize cassandra connection from environment variables).\n",
116119
"import cassio\n",
120+
"\n",
117121
"cassio.init(auto=True)"
118122
]
119123
},
@@ -132,6 +136,7 @@
132136
"source": [
133137
"# Create graph store.\n",
134138
"from ragstack_knowledge_graph.cassandra_graph_store import CassandraGraphStore\n",
139+
"\n",
135140
"graph_store = CassandraGraphStore()"
136141
]
137142
},
@@ -342,6 +347,7 @@
342347
"source": [
343348
"# Render the extracted graph to GraphViz.\n",
344349
"from ragstack_knowledge_graph.render import render_graph_documents\n",
350+
"\n",
345351
"render_graph_documents(graph_documents)"
346352
]
347353
},
@@ -450,7 +456,8 @@
450456
"source": [
451457
"# Example showing extracted entities (nodes)\n",
452458
"from ragstack_knowledge_graph import extract_entities\n",
453-
"extract_entities(llm).invoke({ \"question\": \"Who is Marie Curie?\"})"
459+
"\n",
460+
"extract_entities(llm).invoke({\"question\": \"Who is Marie Curie?\"})"
454461
]
455462
},
456463
{
@@ -474,15 +481,19 @@
474481
"outputs": [],
475482
"source": [
476483
"from operator import itemgetter\n",
477-
"from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n",
484+
"\n",
478485
"from langchain_core.prompts import ChatPromptTemplate\n",
479-
"from ragstack_knowledge_graph import extract_entities\n",
486+
"from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n",
480487
"from langchain_openai import ChatOpenAI\n",
481-
"llm = ChatOpenAI(model_name = \"gpt-4\")\n",
488+
"from ragstack_knowledge_graph import extract_entities\n",
489+
"\n",
490+
"llm = ChatOpenAI(model_name=\"gpt-4\")\n",
491+
"\n",
482492
"\n",
483493
"def _combine_relations(relations):\n",
484494
" return \"\\n\".join(map(repr, relations))\n",
485495
"\n",
496+
"\n",
486497
"ANSWER_PROMPT = (\n",
487498
" \"The original question is given below.\"\n",
488499
" \"This question has been used to retrieve information from a knowledge graph.\"\n",
@@ -494,10 +505,14 @@
494505
")\n",
495506
"\n",
496507
"chain = (\n",
497-
" { \"question\": RunnablePassthrough() }\n",
498-
" | RunnablePassthrough.assign(entities = extract_entities(llm))\n",
499-
" | RunnablePassthrough.assign(triples = itemgetter(\"entities\") | graph_store.as_runnable())\n",
500-
" | RunnablePassthrough.assign(context = itemgetter(\"triples\") | RunnableLambda(_combine_relations))\n",
508+
" {\"question\": RunnablePassthrough()}\n",
509+
" | RunnablePassthrough.assign(entities=extract_entities(llm))\n",
510+
" | RunnablePassthrough.assign(\n",
511+
" triples=itemgetter(\"entities\") | graph_store.as_runnable()\n",
512+
" )\n",
513+
" | RunnablePassthrough.assign(\n",
514+
" context=itemgetter(\"triples\") | RunnableLambda(_combine_relations)\n",
515+
" )\n",
501516
" | ChatPromptTemplate.from_messages([ANSWER_PROMPT])\n",
502517
" | llm\n",
503518
")"

libs/knowledge-graph/pyproject.toml

-20
Original file line numberDiff line numberDiff line change
@@ -40,26 +40,6 @@ setuptools = "^70.0.0"
4040
requires = ["poetry-core"]
4141
build-backend = "poetry.core.masonry.api"
4242

43-
[tool.ruff.lint]
44-
# Enable Pyflakes `E` and `F` codes by default.
45-
select = [
46-
# Pyflakes
47-
"F",
48-
# Pycodestyle
49-
"E",
50-
"W",
51-
# isort
52-
"I001",
53-
]
54-
ignore = []
55-
56-
# Allow unused variables when underscore-prefixed.
57-
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
58-
59-
[tool.ruff.lint.mccabe]
60-
# Unlike Flake8, default to a complexity level of 10.
61-
max-complexity = 10
62-
6343
[tool.mypy]
6444
strict = true
6545
warn_unreachable = true

libs/knowledge-graph/tests/conftest.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,10 @@
77
from langchain.graphs.graph_document import GraphDocument, Node, Relationship
88
from langchain_core.documents import Document
99
from langchain_core.language_models import BaseChatModel
10+
from ragstack_knowledge_graph.cassandra_graph_store import CassandraGraphStore
1011
from testcontainers.core.container import DockerContainer
1112
from testcontainers.core.waiting_utils import wait_for_logs
1213

13-
from ragstack_knowledge_graph.cassandra_graph_store import CassandraGraphStore
14-
1514
load_dotenv()
1615

1716

libs/knowledge-graph/tests/test_extraction.py

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from langchain_community.graphs.graph_document import Node, Relationship
55
from langchain_core.documents import Document
66
from langchain_core.language_models import BaseChatModel
7-
87
from ragstack_knowledge_graph.extraction import (
98
KnowledgeSchema,
109
KnowledgeSchemaExtractor,

libs/knowledge-graph/tests/test_knowledge_graph.py

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import pytest
44
from cassandra.cluster import Session
5-
65
from ragstack_knowledge_graph.knowledge_graph import CassandraKnowledgeGraph
76
from ragstack_knowledge_graph.traverse import Node, Relation
87

libs/knowledge-graph/tests/test_schema_inference.py

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import pytest
44
from langchain_core.documents import Document
55
from langchain_core.language_models import BaseChatModel
6-
76
from ragstack_knowledge_graph.schema_inference import KnowledgeSchemaInferer
87

98
MARIE_CURIE_SOURCE = """

libs/knowledge-store/notebooks/astra_support.ipynb

+14-12
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
"metadata": {},
4141
"outputs": [],
4242
"source": [
43+
"import requests\n",
44+
"from bs4 import BeautifulSoup\n",
45+
"\n",
4346
"# Use sitemaps to crawl the content\n",
4447
"SITEMAPS = [\n",
4548
" \"https://docs.datastax.com/en/sitemap-astra-db-vector.xml\",\n",
@@ -54,9 +57,6 @@
5457
"\n",
5558
"SITE_PREFIX = \"astra\"\n",
5659
"\n",
57-
"from bs4 import BeautifulSoup\n",
58-
"import requests\n",
59-
"\n",
6060
"\n",
6161
"def load_pages(sitemap_url):\n",
6262
" r = requests.get(\n",
@@ -99,14 +99,14 @@
9999
"metadata": {},
100100
"outputs": [],
101101
"source": [
102+
"from typing import AsyncIterator, Iterable\n",
103+
"\n",
102104
"from langchain_community.document_loaders import AsyncHtmlLoader\n",
103-
"from bs4 import BeautifulSoup\n",
104105
"from langchain_core.documents import Document\n",
105-
"from typing import AsyncIterator, Iterable\n",
106-
"from ragstack_knowledge_store.graph_store import CONTENT_ID\n",
107106
"from markdownify import MarkdownConverter\n",
107+
"from ragstack_knowledge_store.graph_store import CONTENT_ID\n",
108+
"from ragstack_langchain.graph_store.extractors import HtmlInput, HtmlLinkExtractor\n",
108109
"from ragstack_langchain.graph_store.links import add_links\n",
109-
"from ragstack_langchain.graph_store.extractors import HtmlLinkExtractor, HtmlInput\n",
110110
"\n",
111111
"markdown_converter = MarkdownConverter(heading_style=\"ATX\")\n",
112112
"html_link_extractor = HtmlLinkExtractor()\n",
@@ -228,7 +228,7 @@
228228
" import cassio\n",
229229
"\n",
230230
" cassio.init(auto=True)\n",
231-
" from cassio.config import check_resolve_session, check_resolve_keyspace\n",
231+
" from cassio.config import check_resolve_keyspace, check_resolve_session\n",
232232
"\n",
233233
" session = check_resolve_session()\n",
234234
" keyspace = check_resolve_keyspace()\n",
@@ -252,7 +252,9 @@
252252
"cassio.init(auto=True)\n",
253253
"embeddings = OpenAIEmbeddings()\n",
254254
"graph_store = CassandraGraphStore(\n",
255-
" embeddings, node_table=f\"{SITE_PREFIX}_nodes\", targets_table=f\"{SITE_PREFIX}_targets\"\n",
255+
" embeddings,\n",
256+
" node_table=f\"{SITE_PREFIX}_nodes\",\n",
257+
" targets_table=f\"{SITE_PREFIX}_targets\",\n",
256258
")"
257259
]
258260
},
@@ -304,10 +306,10 @@
304306
"metadata": {},
305307
"outputs": [],
306308
"source": [
307-
"from langchain_openai import ChatOpenAI\n",
309+
"from langchain_core.output_parsers import StrOutputParser\n",
308310
"from langchain_core.prompts import ChatPromptTemplate\n",
309311
"from langchain_core.runnables import RunnablePassthrough\n",
310-
"from langchain_core.output_parsers import StrOutputParser\n",
312+
"from langchain_openai import ChatOpenAI\n",
311313
"\n",
312314
"llm = ChatOpenAI(model=\"gpt-4o\")\n",
313315
"\n",
@@ -348,7 +350,7 @@
348350
"metadata": {},
349351
"outputs": [],
350352
"source": [
351-
"from IPython.display import display, Markdown\n",
353+
"from IPython.display import Markdown, display\n",
352354
"\n",
353355
"\n",
354356
"# Helper method to render markdown in responses to a chain.\n",

libs/knowledge-store/notebooks/pdf_keybert.ipynb

+14-7
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@
7272
"\n",
7373
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter OpenAI API Key: \")\n",
7474
"os.environ[\"ASTRA_DB_DATABASE_ID\"] = input(\"Enter Astra DB Database ID: \")\n",
75-
"os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"] = getpass.getpass(\"Enter Astra DB Application Token: \")\n",
75+
"os.environ[\"ASTRA_DB_APPLICATION_TOKEN\"] = getpass.getpass(\n",
76+
" \"Enter Astra DB Application Token: \"\n",
77+
")\n",
7678
"\n",
7779
"keyspace = input(\"Enter Astra DB Keyspace (Empty for default): \")\n",
7880
"if keyspace:\n",
@@ -91,6 +93,7 @@
9193
"# See `env.template` for an example of what you should have there.\n",
9294
"%pip install python-dotenv\n",
9395
"import dotenv\n",
96+
"\n",
9497
"dotenv.load_dotenv()"
9598
]
9699
},
@@ -129,6 +132,7 @@
129132
"source": [
130133
"# Initialize cassandra connection from environment variables).\n",
131134
"import cassio\n",
135+
"\n",
132136
"cassio.init(auto=True)"
133137
]
134138
},
@@ -139,8 +143,9 @@
139143
"outputs": [],
140144
"source": [
141145
"# Create graph store.\n",
142-
"from ragstack_knowledge_store import KnowledgeStore\n",
143-
"graph_store = GraphStore(embeddings)"
146+
"from ragstack_langchain.graph_store import CassandraGraphStore\n",
147+
"\n",
148+
"graph_store = CassandraGraphStore(embeddings)"
144149
]
145150
},
146151
{
@@ -191,10 +196,11 @@
191196
"from keybert import KeyBERT\n",
192197
"\n",
193198
"kw_model = KeyBERT()\n",
194-
"keywords = kw_model.extract_keywords([doc.page_content for doc in pages],\n",
195-
" stop_words='english')\n",
199+
"keywords = kw_model.extract_keywords(\n",
200+
" [doc.page_content for doc in pages], stop_words=\"english\"\n",
201+
")\n",
196202
"\n",
197-
"for (doc, kws) in zip(pages, keywords):\n",
203+
"for doc, kws in zip(pages, keywords):\n",
198204
" # Consider only taking keywords within a certain distance?\n",
199205
" doc.metadata[\"keywords\"] = [kw for (kw, _) in kws]\n",
200206
"pages[0]"
@@ -239,9 +245,9 @@
239245
"outputs": [],
240246
"source": [
241247
"# Retrieve and generate using the relevant snippets of the blog.\n",
242-
"from langchain_core.runnables import RunnablePassthrough\n",
243248
"from langchain_core.output_parsers import StrOutputParser\n",
244249
"from langchain_core.prompts import ChatPromptTemplate\n",
250+
"from langchain_core.runnables import RunnablePassthrough\n",
245251
"\n",
246252
"retriever0 = graph_store.as_retriever(depth=0)\n",
247253
"retriever1 = graph_store.as_retriever(depth=1)\n",
@@ -253,6 +259,7 @@
253259
"\"\"\"\n",
254260
"prompt = ChatPromptTemplate.from_template(template)\n",
255261
"\n",
262+
"\n",
256263
"def format_docs(docs):\n",
257264
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
258265
"\n",

libs/knowledge-store/pyproject.toml

-24
Original file line numberDiff line numberDiff line change
@@ -33,30 +33,6 @@ numpy = [
3333
requires = ["poetry-core"]
3434
build-backend = "poetry.core.masonry.api"
3535

36-
37-
[tool.ruff]
38-
line-length = 98
39-
40-
[tool.ruff.lint]
41-
# Enable Pyflakes `E` and `F` codes by default.
42-
select = [
43-
# Pyflakes
44-
"F",
45-
# Pycodestyle
46-
"E",
47-
"W",
48-
# isort
49-
"I001",
50-
]
51-
ignore = []
52-
53-
# Allow unused variables when underscore-prefixed.
54-
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
55-
56-
[tool.ruff.lint.mccabe]
57-
# Unlike Flake8, default to a complexity level of 10.
58-
max-complexity = 10
59-
6036
[tool.mypy]
6137
strict = true
6238
warn_unreachable = true

0 commit comments

Comments
 (0)