Pyspark (dagster-pyspark)
- dagster_pyspark.PySparkResource ResourceDefinition [source]
- This resource provides access to a PySpark Session for executing PySpark code within Dagster. - Example: - @op
 def my_op(pyspark: PySparkResource)
 spark_session = pyspark.spark_session
 dataframe = spark_session.read.json("examples/src/main/resources/people.json")
 @job(
 resource_defs={
 "pyspark": PySparkResource(
 spark_config={
 "spark.executor.memory": "2g"
 }
 )
 }
 )
 def my_spark_job():
 my_op()
Legacy
- dagster_pyspark.pyspark_resource ResourceDefinition [source]
- This resource provides access to a PySpark SparkSession for executing PySpark code within Dagster. - Example: - @op(required_resource_keys={"pyspark"})
 def my_op(context):
 spark_session = context.resources.pyspark.spark_session
 dataframe = spark_session.read.json("examples/src/main/resources/people.json")
 my_pyspark_resource = pyspark_resource.configured(
 {"spark_conf": {"spark.executor.memory": "2g"}}
 )
 @job(resource_defs={"pyspark": my_pyspark_resource})
 def my_spark_job():
 my_op()