Skip to content

Commit 9b17ff3

Browse files
edumuellerFSLLee-Wuranusjr
authored
Add post endpoint for dataset events (#37570)
Co-authored-by: Wei Lee <weilee.rx@gmail.com> Co-authored-by: Tzu-ping Chung <uranusjr@gmail.com>
1 parent c6ba13a commit 9b17ff3

File tree

9 files changed

+250
-47
lines changed

9 files changed

+250
-47
lines changed

airflow/api_connexion/endpoints/dataset_endpoint.py

+48-1
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
from typing import TYPE_CHECKING
2121

2222
from connexion import NoContent
23+
from marshmallow import ValidationError
2324
from sqlalchemy import delete, func, select
2425
from sqlalchemy.orm import joinedload, subqueryload
2526

2627
from airflow.api_connexion import security
27-
from airflow.api_connexion.exceptions import NotFound
28+
from airflow.api_connexion.endpoints.request_dict import get_json_request_dict
29+
from airflow.api_connexion.exceptions import BadRequest, NotFound
2830
from airflow.api_connexion.parameters import apply_sorting, check_limit, format_datetime, format_parameters
2931
from airflow.api_connexion.schemas.dataset_schema import (
3032
DagScheduleDatasetReference,
@@ -33,22 +35,32 @@
3335
QueuedEvent,
3436
QueuedEventCollection,
3537
TaskOutletDatasetReference,
38+
create_dataset_event_schema,
3639
dataset_collection_schema,
3740
dataset_event_collection_schema,
41+
dataset_event_schema,
3842
dataset_schema,
3943
queued_event_collection_schema,
4044
queued_event_schema,
4145
)
46+
from airflow.datasets import Dataset
47+
from airflow.datasets.manager import dataset_manager
4248
from airflow.models.dataset import DatasetDagRunQueue, DatasetEvent, DatasetModel
49+
from airflow.security import permissions
50+
from airflow.utils import timezone
4351
from airflow.utils.db import get_query_count
52+
from airflow.utils.log.action_logger import action_event_from_permission
4453
from airflow.utils.session import NEW_SESSION, provide_session
54+
from airflow.www.decorators import action_logging
4555
from airflow.www.extensions.init_auth_manager import get_auth_manager
4656

4757
if TYPE_CHECKING:
4858
from sqlalchemy.orm import Session
4959

5060
from airflow.api_connexion.types import APIResponse
5161

62+
RESOURCE_EVENT_PREFIX = "dataset"
63+
5264

5365
@security.requires_access_dataset("GET")
5466
@provide_session
@@ -311,3 +323,38 @@ def delete_dataset_queued_events(
311323
"Queue event not found",
312324
detail=f"Queue event with dataset uri: `{uri}` was not found",
313325
)
326+
327+
328+
@security.requires_access_dataset("POST")
329+
@provide_session
330+
@action_logging(
331+
event=action_event_from_permission(
332+
prefix=RESOURCE_EVENT_PREFIX,
333+
permission=permissions.ACTION_CAN_CREATE,
334+
),
335+
)
336+
def create_dataset_event(session: Session = NEW_SESSION) -> APIResponse:
337+
"""Create dataset event."""
338+
body = get_json_request_dict()
339+
try:
340+
json_body = create_dataset_event_schema.load(body)
341+
except ValidationError as err:
342+
raise BadRequest(detail=str(err))
343+
344+
uri = json_body["dataset_uri"]
345+
dataset = session.scalar(select(DatasetModel).where(DatasetModel.uri == uri).limit(1))
346+
if not dataset:
347+
raise NotFound(title="Dataset not found", detail=f"Dataset with uri: '{uri}' not found")
348+
timestamp = timezone.utcnow()
349+
extra = json_body.get("extra", {})
350+
extra["from_rest_api"] = True
351+
dataset_event = dataset_manager.register_dataset_change(
352+
dataset=Dataset(uri),
353+
timestamp=timestamp,
354+
extra=extra,
355+
session=session,
356+
)
357+
if not dataset_event:
358+
raise NotFound(title="Dataset not found", detail=f"Dataset with uri: '{uri}' not found")
359+
event = dataset_event_schema.dump(dataset_event)
360+
return event

airflow/api_connexion/openapi/v1.yaml

+50-9
Original file line numberDiff line numberDiff line change
@@ -2129,21 +2129,21 @@ paths:
21292129
$ref: "#/components/responses/NotFound"
21302130

21312131
/datasets/events:
2132-
parameters:
2133-
- $ref: "#/components/parameters/PageLimit"
2134-
- $ref: "#/components/parameters/PageOffset"
2135-
- $ref: "#/components/parameters/OrderBy"
2136-
- $ref: "#/components/parameters/FilterDatasetID"
2137-
- $ref: "#/components/parameters/FilterSourceDAGID"
2138-
- $ref: "#/components/parameters/FilterSourceTaskID"
2139-
- $ref: "#/components/parameters/FilterSourceRunID"
2140-
- $ref: "#/components/parameters/FilterSourceMapIndex"
21412132
get:
21422133
summary: Get dataset events
21432134
description: Get dataset events
21442135
x-openapi-router-controller: airflow.api_connexion.endpoints.dataset_endpoint
21452136
operationId: get_dataset_events
21462137
tags: [Dataset]
2138+
parameters:
2139+
- $ref: "#/components/parameters/PageLimit"
2140+
- $ref: "#/components/parameters/PageOffset"
2141+
- $ref: "#/components/parameters/OrderBy"
2142+
- $ref: "#/components/parameters/FilterDatasetID"
2143+
- $ref: "#/components/parameters/FilterSourceDAGID"
2144+
- $ref: "#/components/parameters/FilterSourceTaskID"
2145+
- $ref: "#/components/parameters/FilterSourceRunID"
2146+
- $ref: "#/components/parameters/FilterSourceMapIndex"
21472147
responses:
21482148
"200":
21492149
description: Success.
@@ -2157,6 +2157,33 @@ paths:
21572157
$ref: "#/components/responses/PermissionDenied"
21582158
"404":
21592159
$ref: "#/components/responses/NotFound"
2160+
post:
2161+
summary: Create dataset event
2162+
description: Create dataset event
2163+
x-openapi-router-controller: airflow.api_connexion.endpoints.dataset_endpoint
2164+
operationId: create_dataset_event
2165+
tags: [Dataset]
2166+
requestBody:
2167+
required: true
2168+
content:
2169+
application/json:
2170+
schema:
2171+
$ref: '#/components/schemas/CreateDatasetEvent'
2172+
responses:
2173+
'200':
2174+
description: Success.
2175+
content:
2176+
application/json:
2177+
schema:
2178+
$ref: '#/components/schemas/DatasetEvent'
2179+
"400":
2180+
$ref: "#/components/responses/BadRequest"
2181+
'401':
2182+
$ref: '#/components/responses/Unauthenticated'
2183+
'403':
2184+
$ref: '#/components/responses/PermissionDenied'
2185+
'404':
2186+
$ref: '#/components/responses/NotFound'
21602187

21612188
/config:
21622189
get:
@@ -4290,6 +4317,20 @@ components:
42904317
description: The dataset event creation time
42914318
nullable: false
42924319

4320+
CreateDatasetEvent:
4321+
type: object
4322+
required:
4323+
- dataset_uri
4324+
properties:
4325+
dataset_uri:
4326+
type: string
4327+
description: The URI of the dataset
4328+
nullable: false
4329+
extra:
4330+
type: object
4331+
description: The dataset event extra
4332+
nullable: true
4333+
42934334
QueuedEvent:
42944335
type: object
42954336
properties:

airflow/api_connexion/schemas/dataset_schema.py

+8
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,16 @@ class DatasetEventCollectionSchema(Schema):
147147
total_entries = fields.Int()
148148

149149

150+
class CreateDatasetEventSchema(Schema):
151+
"""Create Dataset Event Schema."""
152+
153+
dataset_uri = fields.String()
154+
extra = JsonObjectField()
155+
156+
150157
dataset_event_schema = DatasetEventSchema()
151158
dataset_event_collection_schema = DatasetEventCollectionSchema()
159+
create_dataset_event_schema = CreateDatasetEventSchema()
152160

153161

154162
class QueuedEvent(NamedTuple):

airflow/datasets/manager.py

+25-12
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,14 @@ def create_datasets(self, dataset_models: list[DatasetModel], session: Session)
5656
self.notify_dataset_created(dataset=Dataset(uri=dataset_model.uri, extra=dataset_model.extra))
5757

5858
def register_dataset_change(
59-
self, *, task_instance: TaskInstance, dataset: Dataset, extra=None, session: Session, **kwargs
60-
) -> None:
59+
self,
60+
*,
61+
task_instance: TaskInstance | None = None,
62+
dataset: Dataset,
63+
extra=None,
64+
session: Session,
65+
**kwargs,
66+
) -> DatasetEvent | None:
6167
"""
6268
Register dataset related changes.
6369
@@ -71,17 +77,23 @@ def register_dataset_change(
7177
)
7278
if not dataset_model:
7379
self.log.warning("DatasetModel %s not found", dataset)
74-
return
75-
session.add(
76-
DatasetEvent(
77-
dataset_id=dataset_model.id,
78-
source_task_id=task_instance.task_id,
79-
source_dag_id=task_instance.dag_id,
80-
source_run_id=task_instance.run_id,
81-
source_map_index=task_instance.map_index,
82-
extra=extra,
80+
return None
81+
82+
event_kwargs = {
83+
"dataset_id": dataset_model.id,
84+
"extra": extra,
85+
}
86+
if task_instance:
87+
event_kwargs.update(
88+
{
89+
"source_task_id": task_instance.task_id,
90+
"source_dag_id": task_instance.dag_id,
91+
"source_run_id": task_instance.run_id,
92+
"source_map_index": task_instance.map_index,
93+
}
8394
)
84-
)
95+
dataset_event = DatasetEvent(**event_kwargs)
96+
session.add(dataset_event)
8597
session.flush()
8698

8799
self.notify_dataset_changed(dataset=dataset)
@@ -90,6 +102,7 @@ def register_dataset_change(
90102
if dataset_model.consuming_dags:
91103
self._queue_dagruns(dataset_model, session)
92104
session.flush()
105+
return dataset_event
93106

94107
def notify_dataset_created(self, dataset: Dataset):
95108
"""Run applicable notification actions when a dataset is created."""

airflow/providers/fab/auth_manager/security_manager/override.py

+2
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ class FabAirflowSecurityManagerOverride(AirflowSecurityManagerV2):
248248
(permissions.ACTION_CAN_CREATE, permissions.RESOURCE_DAG_RUN),
249249
(permissions.ACTION_CAN_EDIT, permissions.RESOURCE_DAG_RUN),
250250
(permissions.ACTION_CAN_DELETE, permissions.RESOURCE_DAG_RUN),
251+
(permissions.ACTION_CAN_CREATE, permissions.RESOURCE_DATASET),
251252
]
252253
# [END security_user_perms]
253254

@@ -275,6 +276,7 @@ class FabAirflowSecurityManagerOverride(AirflowSecurityManagerV2):
275276
(permissions.ACTION_CAN_DELETE, permissions.RESOURCE_VARIABLE),
276277
(permissions.ACTION_CAN_DELETE, permissions.RESOURCE_XCOM),
277278
(permissions.ACTION_CAN_DELETE, permissions.RESOURCE_DATASET),
279+
(permissions.ACTION_CAN_CREATE, permissions.RESOURCE_DATASET),
278280
]
279281
# [END security_op_perms]
280282

airflow/www/static/js/components/Table/Cells.tsx

+3
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ export const TaskInstanceLink = ({ cell: { value, row } }: CellProps) => {
150150
const { sourceRunId, sourceDagId, sourceMapIndex } = row.original;
151151
const gridUrl = getMetaValue("grid_url");
152152
const dagId = getMetaValue("dag_id");
153+
if (!value || !sourceRunId || !sourceDagId || !gridUrl) {
154+
return null;
155+
}
153156
const stringToReplace = dagId || "__DAG_ID__";
154157
const url = `${gridUrl?.replace(
155158
stringToReplace,

airflow/www/static/js/types/api-generated.ts

+34-25
Original file line numberDiff line numberDiff line change
@@ -684,31 +684,8 @@ export interface paths {
684684
"/datasets/events": {
685685
/** Get dataset events */
686686
get: operations["get_dataset_events"];
687-
parameters: {
688-
query: {
689-
/** The numbers of items to return. */
690-
limit?: components["parameters"]["PageLimit"];
691-
/** The number of items to skip before starting to collect the result set. */
692-
offset?: components["parameters"]["PageOffset"];
693-
/**
694-
* The name of the field to order the results by.
695-
* Prefix a field name with `-` to reverse the sort order.
696-
*
697-
* *New in version 2.1.0*
698-
*/
699-
order_by?: components["parameters"]["OrderBy"];
700-
/** The Dataset ID that updated the dataset. */
701-
dataset_id?: components["parameters"]["FilterDatasetID"];
702-
/** The DAG ID that updated the dataset. */
703-
source_dag_id?: components["parameters"]["FilterSourceDAGID"];
704-
/** The task ID that updated the dataset. */
705-
source_task_id?: components["parameters"]["FilterSourceTaskID"];
706-
/** The DAG run ID that updated the dataset. */
707-
source_run_id?: components["parameters"]["FilterSourceRunID"];
708-
/** The map index that updated the dataset. */
709-
source_map_index?: components["parameters"]["FilterSourceMapIndex"];
710-
};
711-
};
687+
/** Create dataset event */
688+
post: operations["create_dataset_event"];
712689
};
713690
"/config": {
714691
get: operations["get_config"];
@@ -1825,6 +1802,12 @@ export interface components {
18251802
/** @description The dataset event creation time */
18261803
timestamp?: string;
18271804
};
1805+
CreateDatasetEvent: {
1806+
/** @description The URI of the dataset */
1807+
dataset_uri: string;
1808+
/** @description The dataset event extra */
1809+
extra?: { [key: string]: unknown } | null;
1810+
};
18281811
QueuedEvent: {
18291812
/** @description The datata uri. */
18301813
uri?: string;
@@ -4598,6 +4581,26 @@ export interface operations {
45984581
404: components["responses"]["NotFound"];
45994582
};
46004583
};
4584+
/** Create dataset event */
4585+
create_dataset_event: {
4586+
responses: {
4587+
/** Success. */
4588+
200: {
4589+
content: {
4590+
"application/json": components["schemas"]["DatasetEvent"];
4591+
};
4592+
};
4593+
400: components["responses"]["BadRequest"];
4594+
401: components["responses"]["Unauthenticated"];
4595+
403: components["responses"]["PermissionDenied"];
4596+
404: components["responses"]["NotFound"];
4597+
};
4598+
requestBody: {
4599+
content: {
4600+
"application/json": components["schemas"]["CreateDatasetEvent"];
4601+
};
4602+
};
4603+
};
46014604
get_config: {
46024605
parameters: {
46034606
query: {
@@ -5167,6 +5170,9 @@ export type DatasetCollection = CamelCasedPropertiesDeep<
51675170
export type DatasetEvent = CamelCasedPropertiesDeep<
51685171
components["schemas"]["DatasetEvent"]
51695172
>;
5173+
export type CreateDatasetEvent = CamelCasedPropertiesDeep<
5174+
components["schemas"]["CreateDatasetEvent"]
5175+
>;
51705176
export type QueuedEvent = CamelCasedPropertiesDeep<
51715177
components["schemas"]["QueuedEvent"]
51725178
>;
@@ -5481,6 +5487,9 @@ export type GetDatasetVariables = CamelCasedPropertiesDeep<
54815487
export type GetDatasetEventsVariables = CamelCasedPropertiesDeep<
54825488
operations["get_dataset_events"]["parameters"]["query"]
54835489
>;
5490+
export type CreateDatasetEventVariables = CamelCasedPropertiesDeep<
5491+
operations["create_dataset_event"]["requestBody"]["content"]["application/json"]
5492+
>;
54845493
export type GetConfigVariables = CamelCasedPropertiesDeep<
54855494
operations["get_config"]["parameters"]["query"]
54865495
>;

0 commit comments

Comments
 (0)