Run black pre-commit on python code in docs

2025-10-17 15:02:57 +00:00 · 2022-03-17 15:59:28 +02:00
parent f91b9ad11a
commit 6ee21a77d2
4 changed files with 48 additions and 28 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -99,8 +99,6 @@ repos:
      - id: markdownlint
        args: ["--fix"]

-  # Run tools on Jupyter notebooks
-
  # strip output from Jupyter notebooks
  - repo: https://github.com/kynan/nbstripout
    rev: 0.5.0
@@ -118,6 +116,13 @@ repos:
        args: [--target-version=py39]
      - id: nbqa-flake8

+  # Run black on python code blocks in documentation files.
+  - repo: https://github.com/asottile/blacken-docs
+    rev: v1.12.1
+    hooks:
+      - id: blacken-docs
+        args: [--target-version=py39]
+
 # Docker hooks do not work in pre-commit.ci
 # See: <https://github.com/pre-commit-ci/issues/issues/11>
 ci:
--- a/docs/using/recipes.md
+++ b/docs/using/recipes.md
@@ -300,34 +300,44 @@ A few suggestions have been made regarding using Docker Stacks with spark.

 Using Spark session for hadoop 2.7.3

-```py
+```python
 import os
+
 # !ls /usr/local/spark/jars/hadoop* # to figure out what version of hadoop
-os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell'
+os.environ[
+    "PYSPARK_SUBMIT_ARGS"
+] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell'

 import pyspark
+
 myAccessKey = input()
 mySecretKey = input()

-spark = pyspark.sql.SparkSession.builder \
-        .master("local[*]") \
-        .config("spark.hadoop.fs.s3a.access.key", myAccessKey) \
-        .config("spark.hadoop.fs.s3a.secret.key", mySecretKey) \
+spark = (
+    pyspark.sql.SparkSession.builder.master("local[*]")
+    .config("spark.hadoop.fs.s3a.access.key", myAccessKey)
+    .config("spark.hadoop.fs.s3a.secret.key", mySecretKey)
    .getOrCreate()
+)

 df = spark.read.parquet("s3://myBucket/myKey")
 ```

 Using Spark context for hadoop 2.6.0

-```py
+```python
 import os
-os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell'
+
+os.environ[
+    "PYSPARK_SUBMIT_ARGS"
+] = "--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell"

 import pyspark
+
 sc = pyspark.SparkContext("local[*]")

 from pyspark.sql import SQLContext
+
 sqlContext = SQLContext(sc)

 hadoopConf = sc._jsc.hadoopConfiguration()
@@ -346,14 +356,20 @@ Ref: <https://github.com/jupyter/docker-stacks/issues/127>

 ```python
 import os
-os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell'
+
+os.environ[
+    "PYSPARK_SUBMIT_ARGS"
+] = "--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell"
 import pyspark
 from pyspark.streaming.kafka import KafkaUtils
 from pyspark.streaming import StreamingContext
+
 sc = pyspark.SparkContext()
-ssc = StreamingContext(sc,1)
+ssc = StreamingContext(sc, 1)
 broker = "<my_broker_ip>"
-directKafkaStream = KafkaUtils.createDirectStream(ssc, ["test1"], {"metadata.broker.list": broker})
+directKafkaStream = KafkaUtils.createDirectStream(
+    ssc, ["test1"], {"metadata.broker.list": broker}
+)
 directKafkaStream.pprint()
 ssc.start()
 ```
--- a/docs/using/specifics.md
+++ b/docs/using/specifics.md
@@ -25,7 +25,7 @@ Those logs are still available but only in the container's logs.
 If you want to make them appear in the notebook, you can overwrite the configuration in a user level IPython kernel profile.
 To do that you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel.

-```Python
+```python
 c.IPKernelApp.capture_fd_output = True
 ```

@@ -91,7 +91,7 @@ In a Python notebook.
 from pyspark.sql import SparkSession

 # Spark session & context
-spark = SparkSession.builder.master('local').getOrCreate()
+spark = SparkSession.builder.master("local").getOrCreate()
 sc = spark.sparkContext

 # Sum of the first 100 whole numbers
@@ -143,7 +143,7 @@ sdf_len(sc, 100, repartition = 1) %>%
 Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
 options in a `%%init_spark` magic cell.

-```python
+```text
 %%init_spark
 # Configure Spark to use a local master
 launcher.master = "local"
@@ -183,7 +183,7 @@ see [Spark Configuration][spark-conf] for more information.
 from pyspark.sql import SparkSession

 # Spark session & context
-spark = SparkSession.builder.master('spark://master:7077').getOrCreate()
+spark = SparkSession.builder.master("spark://master:7077").getOrCreate()
 sc = spark.sparkContext

 # Sum of the first 100 whole numbers
@@ -234,7 +234,7 @@ sdf_len(sc, 100, repartition = 1) %>%
 Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
 options in a `%%init_spark` magic cell.

-```python
+```text
 %%init_spark
 # Configure Spark to use a local master
 launcher.master = "spark://master:7077"
@@ -264,8 +264,7 @@ from pyspark.sql import SparkSession
 spark = (
    SparkSession.builder.appName("elasticsearch")
    .config(
-        "spark.jars.packages",
-        "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
+        "spark.jars.packages", "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
    )
    .getOrCreate()
 )
@@ -292,7 +291,7 @@ The `jupyter/tensorflow-notebook` image supports the use of
 ```python
 import tensorflow as tf

-hello = tf.Variable('Hello World!')
+hello = tf.Variable("Hello World!")

 sess = tf.Session()
 init = tf.global_variables_initializer()
@@ -306,7 +305,7 @@ sess.run(hello)
 ```python
 import tensorflow as tf

-hello = tf.Variable('Hello Distributed World!')
+hello = tf.Variable("Hello Distributed World!")

 server = tf.train.Server.create_local_server()
 sess = tf.Session(server.target)
--- a/tagging/README.md
+++ b/tagging/README.md
@@ -55,6 +55,7 @@ All the taggers are inherited from `TaggerInterface`:
 ```python
 class TaggerInterface:
    """Common interface for all taggers"""
+
    @staticmethod
    def tag_value(container) -> str:
        raise NotImplementedError
@@ -84,6 +85,7 @@ All the other manifest classes are inherited from `ManifestInterface`:
 ```python
 class ManifestInterface:
    """Common interface for all manifests"""
+
    @staticmethod
    def markdown_piece(container) -> str:
        raise NotImplementedError
@@ -97,11 +99,9 @@ class ManifestInterface:
 class AptPackagesManifest(ManifestInterface):
    @staticmethod
    def markdown_piece(container) -> str:
-        return "\n".join([
-            "## Apt Packages",
-            "",
-            quoted_output(container, "apt list --installed")
-        ])
+        return "\n".join(
+            ["## Apt Packages", "", quoted_output(container, "apt list --installed")]
+        )
 ```

 - `quoted_output` simply runs the command inside container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece of file.