From 6ee21a77d2a4c915602f275d359e29e0069994bd Mon Sep 17 00:00:00 2001 From: Ayaz Salikhov Date: Thu, 17 Mar 2022 15:59:28 +0200 Subject: [PATCH] Run black pre-commit on python code in docs --- .pre-commit-config.yaml | 9 +++++++-- docs/using/recipes.md | 40 ++++++++++++++++++++++++++++------------ docs/using/specifics.md | 17 ++++++++--------- tagging/README.md | 10 +++++----- 4 files changed, 48 insertions(+), 28 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8d9a956e..e9928ce9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -99,8 +99,6 @@ repos: - id: markdownlint args: ["--fix"] - # Run tools on Jupyter notebooks - # strip output from Jupyter notebooks - repo: https://github.com/kynan/nbstripout rev: 0.5.0 @@ -118,6 +116,13 @@ repos: args: [--target-version=py39] - id: nbqa-flake8 + # Run black on python code blocks in documentation files. + - repo: https://github.com/asottile/blacken-docs + rev: v1.12.1 + hooks: + - id: blacken-docs + args: [--target-version=py39] + # Docker hooks do not work in pre-commit.ci # See: ci: diff --git a/docs/using/recipes.md b/docs/using/recipes.md index 8ea7735a..d35de54d 100644 --- a/docs/using/recipes.md +++ b/docs/using/recipes.md @@ -300,34 +300,44 @@ A few suggestions have been made regarding using Docker Stacks with spark. Using Spark session for hadoop 2.7.3 -```py +```python import os + # !ls /usr/local/spark/jars/hadoop* # to figure out what version of hadoop -os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell' +os.environ[ + "PYSPARK_SUBMIT_ARGS" +] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell' import pyspark + myAccessKey = input() mySecretKey = input() -spark = pyspark.sql.SparkSession.builder \ - .master("local[*]") \ - .config("spark.hadoop.fs.s3a.access.key", myAccessKey) \ - .config("spark.hadoop.fs.s3a.secret.key", mySecretKey) \ - .getOrCreate() +spark = ( + pyspark.sql.SparkSession.builder.master("local[*]") + .config("spark.hadoop.fs.s3a.access.key", myAccessKey) + .config("spark.hadoop.fs.s3a.secret.key", mySecretKey) + .getOrCreate() +) df = spark.read.parquet("s3://myBucket/myKey") ``` Using Spark context for hadoop 2.6.0 -```py +```python import os -os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell' + +os.environ[ + "PYSPARK_SUBMIT_ARGS" +] = "--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell" import pyspark + sc = pyspark.SparkContext("local[*]") from pyspark.sql import SQLContext + sqlContext = SQLContext(sc) hadoopConf = sc._jsc.hadoopConfiguration() @@ -346,14 +356,20 @@ Ref: ```python import os -os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell' + +os.environ[ + "PYSPARK_SUBMIT_ARGS" +] = "--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell" import pyspark from pyspark.streaming.kafka import KafkaUtils from pyspark.streaming import StreamingContext + sc = pyspark.SparkContext() -ssc = StreamingContext(sc,1) +ssc = StreamingContext(sc, 1) broker = "" -directKafkaStream = KafkaUtils.createDirectStream(ssc, ["test1"], {"metadata.broker.list": broker}) +directKafkaStream = KafkaUtils.createDirectStream( + ssc, ["test1"], {"metadata.broker.list": broker} +) directKafkaStream.pprint() ssc.start() ``` diff --git a/docs/using/specifics.md b/docs/using/specifics.md index 3bed66fc..c0269238 100644 --- a/docs/using/specifics.md +++ b/docs/using/specifics.md @@ -25,7 +25,7 @@ Those logs are still available but only in the container's logs. If you want to make them appear in the notebook, you can overwrite the configuration in a user level IPython kernel profile. To do that you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel. -```Python +```python c.IPKernelApp.capture_fd_output = True ``` @@ -91,7 +91,7 @@ In a Python notebook. from pyspark.sql import SparkSession # Spark session & context -spark = SparkSession.builder.master('local').getOrCreate() +spark = SparkSession.builder.master("local").getOrCreate() sc = spark.sparkContext # Sum of the first 100 whole numbers @@ -143,7 +143,7 @@ sdf_len(sc, 100, repartition = 1) %>% Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark options in a `%%init_spark` magic cell. -```python +```text %%init_spark # Configure Spark to use a local master launcher.master = "local" @@ -183,7 +183,7 @@ see [Spark Configuration][spark-conf] for more information. from pyspark.sql import SparkSession # Spark session & context -spark = SparkSession.builder.master('spark://master:7077').getOrCreate() +spark = SparkSession.builder.master("spark://master:7077").getOrCreate() sc = spark.sparkContext # Sum of the first 100 whole numbers @@ -234,7 +234,7 @@ sdf_len(sc, 100, repartition = 1) %>% Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark options in a `%%init_spark` magic cell. -```python +```text %%init_spark # Configure Spark to use a local master launcher.master = "spark://master:7077" @@ -264,8 +264,7 @@ from pyspark.sql import SparkSession spark = ( SparkSession.builder.appName("elasticsearch") .config( - "spark.jars.packages", - "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0" + "spark.jars.packages", "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0" ) .getOrCreate() ) @@ -292,7 +291,7 @@ The `jupyter/tensorflow-notebook` image supports the use of ```python import tensorflow as tf -hello = tf.Variable('Hello World!') +hello = tf.Variable("Hello World!") sess = tf.Session() init = tf.global_variables_initializer() @@ -306,7 +305,7 @@ sess.run(hello) ```python import tensorflow as tf -hello = tf.Variable('Hello Distributed World!') +hello = tf.Variable("Hello Distributed World!") server = tf.train.Server.create_local_server() sess = tf.Session(server.target) diff --git a/tagging/README.md b/tagging/README.md index c5eee5d0..3ba12333 100644 --- a/tagging/README.md +++ b/tagging/README.md @@ -55,6 +55,7 @@ All the taggers are inherited from `TaggerInterface`: ```python class TaggerInterface: """Common interface for all taggers""" + @staticmethod def tag_value(container) -> str: raise NotImplementedError @@ -84,6 +85,7 @@ All the other manifest classes are inherited from `ManifestInterface`: ```python class ManifestInterface: """Common interface for all manifests""" + @staticmethod def markdown_piece(container) -> str: raise NotImplementedError @@ -97,11 +99,9 @@ class ManifestInterface: class AptPackagesManifest(ManifestInterface): @staticmethod def markdown_piece(container) -> str: - return "\n".join([ - "## Apt Packages", - "", - quoted_output(container, "apt list --installed") - ]) + return "\n".join( + ["## Apt Packages", "", quoted_output(container, "apt list --installed")] + ) ``` - `quoted_output` simply runs the command inside container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece of file.