Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Resume RAGulate Queries #552

Merged
merged 9 commits into from
Jul 9, 2024
24 changes: 13 additions & 11 deletions libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def __init__(
# so we can just delete a single "app" instead of the whole
# database.
self._tru.reset_database()


total_existing_queries = 0
for dataset in datasets:
queries, golden_set = dataset.get_queries_and_golden_set()
if self.sample_percent < 1.0:
Expand All @@ -99,6 +100,8 @@ def __init__(
app_ids=[dataset.name]
)
existing_queries = existing_records["input"].dropna().tolist()
total_existing_queries += len(existing_queries)

queries = [query for query in queries if query not in existing_queries]

self._queries[dataset.name] = queries
Expand All @@ -108,6 +111,9 @@ def __init__(
metric_count = 4
self._total_feedbacks = self._total_queries * metric_count

# Set finished queries count to total existing queries
self._finished_queries = total_existing_queries

def signal_handler(self, sig, frame):
self._sigint_received = True
self.stop_evaluation("sigint")
Expand Down Expand Up @@ -181,17 +187,14 @@ def query(self):

time.sleep(0.1)
logger.info(
f"Starting query {self.recipe_name} "
f"on {self.script_path}/{self.method_name} "
f"with ingredients: {self.ingredients} "
f"on datasets: {self.dataset_names()}"
f"Starting query {self.recipe_name} on {self.script_path}/{self.method_name} with ingredients: {self.ingredients} on datasets: {self.dataset_names()}"
)
logger.info(
"Progress postfix legend: (q)ueries completed; Evaluations (d)one, "
"(r)unning, (w)aiting, (f)ailed, (s)kipped"
"Progress postfix legend: (q)ueries completed; Evaluations (d)one, (r)unning, (w)aiting, (f)ailed, (s)kipped"
)

self._progress = tqdm(total=(self._total_queries + self._total_feedbacks))
self._progress = tqdm(total=(self._total_queries + self._total_feedbacks), initial=self._finished_queries)


for dataset_name in self._queries:
feedback_functions = [
Expand All @@ -217,10 +220,9 @@ def query(self):
with recorder:
pipeline.invoke(query)
except Exception as e:
# TODO: figure out why the logger isn't working after tru-lens starts. For now use print(). # noqa: E501
# TODO: figure out why the logger isn't working after tru-lens starts. For now use print()
print(
f"ERROR: Query: '{query}' caused exception, skipping. "
f"Exception {e}"
f"ERROR: Query: '{query}' caused exception, skipping. Exception {e}"
)
logger.error(f"Query: '{query}' caused exception: {e}, skipping.")
finally:
Expand Down
Loading