Run black pre-commit on python code in docs

This commit is contained in:
Ayaz Salikhov
2022-03-17 15:59:28 +02:00
parent f91b9ad11a
commit 6ee21a77d2
4 changed files with 48 additions and 28 deletions

View File

@@ -99,8 +99,6 @@ repos:
- id: markdownlint - id: markdownlint
args: ["--fix"] args: ["--fix"]
# Run tools on Jupyter notebooks
# strip output from Jupyter notebooks # strip output from Jupyter notebooks
- repo: https://github.com/kynan/nbstripout - repo: https://github.com/kynan/nbstripout
rev: 0.5.0 rev: 0.5.0
@@ -118,6 +116,13 @@ repos:
args: [--target-version=py39] args: [--target-version=py39]
- id: nbqa-flake8 - id: nbqa-flake8
# Run black on python code blocks in documentation files.
- repo: https://github.com/asottile/blacken-docs
rev: v1.12.1
hooks:
- id: blacken-docs
args: [--target-version=py39]
# Docker hooks do not work in pre-commit.ci # Docker hooks do not work in pre-commit.ci
# See: <https://github.com/pre-commit-ci/issues/issues/11> # See: <https://github.com/pre-commit-ci/issues/issues/11>
ci: ci:

View File

@@ -300,34 +300,44 @@ A few suggestions have been made regarding using Docker Stacks with spark.
Using Spark session for hadoop 2.7.3 Using Spark session for hadoop 2.7.3
```py ```python
import os import os
# !ls /usr/local/spark/jars/hadoop* # to figure out what version of hadoop # !ls /usr/local/spark/jars/hadoop* # to figure out what version of hadoop
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell' os.environ[
"PYSPARK_SUBMIT_ARGS"
] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell'
import pyspark import pyspark
myAccessKey = input() myAccessKey = input()
mySecretKey = input() mySecretKey = input()
spark = pyspark.sql.SparkSession.builder \ spark = (
.master("local[*]") \ pyspark.sql.SparkSession.builder.master("local[*]")
.config("spark.hadoop.fs.s3a.access.key", myAccessKey) \ .config("spark.hadoop.fs.s3a.access.key", myAccessKey)
.config("spark.hadoop.fs.s3a.secret.key", mySecretKey) \ .config("spark.hadoop.fs.s3a.secret.key", mySecretKey)
.getOrCreate() .getOrCreate()
)
df = spark.read.parquet("s3://myBucket/myKey") df = spark.read.parquet("s3://myBucket/myKey")
``` ```
Using Spark context for hadoop 2.6.0 Using Spark context for hadoop 2.6.0
```py ```python
import os import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell'
os.environ[
"PYSPARK_SUBMIT_ARGS"
] = "--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell"
import pyspark import pyspark
sc = pyspark.SparkContext("local[*]") sc = pyspark.SparkContext("local[*]")
from pyspark.sql import SQLContext from pyspark.sql import SQLContext
sqlContext = SQLContext(sc) sqlContext = SQLContext(sc)
hadoopConf = sc._jsc.hadoopConfiguration() hadoopConf = sc._jsc.hadoopConfiguration()
@@ -346,14 +356,20 @@ Ref: <https://github.com/jupyter/docker-stacks/issues/127>
```python ```python
import os import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell'
os.environ[
"PYSPARK_SUBMIT_ARGS"
] = "--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell"
import pyspark import pyspark
from pyspark.streaming.kafka import KafkaUtils from pyspark.streaming.kafka import KafkaUtils
from pyspark.streaming import StreamingContext from pyspark.streaming import StreamingContext
sc = pyspark.SparkContext() sc = pyspark.SparkContext()
ssc = StreamingContext(sc, 1) ssc = StreamingContext(sc, 1)
broker = "<my_broker_ip>" broker = "<my_broker_ip>"
directKafkaStream = KafkaUtils.createDirectStream(ssc, ["test1"], {"metadata.broker.list": broker}) directKafkaStream = KafkaUtils.createDirectStream(
ssc, ["test1"], {"metadata.broker.list": broker}
)
directKafkaStream.pprint() directKafkaStream.pprint()
ssc.start() ssc.start()
``` ```

View File

@@ -25,7 +25,7 @@ Those logs are still available but only in the container's logs.
If you want to make them appear in the notebook, you can overwrite the configuration in a user level IPython kernel profile. If you want to make them appear in the notebook, you can overwrite the configuration in a user level IPython kernel profile.
To do that you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel. To do that you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel.
```Python ```python
c.IPKernelApp.capture_fd_output = True c.IPKernelApp.capture_fd_output = True
``` ```
@@ -91,7 +91,7 @@ In a Python notebook.
from pyspark.sql import SparkSession from pyspark.sql import SparkSession
# Spark session & context # Spark session & context
spark = SparkSession.builder.master('local').getOrCreate() spark = SparkSession.builder.master("local").getOrCreate()
sc = spark.sparkContext sc = spark.sparkContext
# Sum of the first 100 whole numbers # Sum of the first 100 whole numbers
@@ -143,7 +143,7 @@ sdf_len(sc, 100, repartition = 1) %>%
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
options in a `%%init_spark` magic cell. options in a `%%init_spark` magic cell.
```python ```text
%%init_spark %%init_spark
# Configure Spark to use a local master # Configure Spark to use a local master
launcher.master = "local" launcher.master = "local"
@@ -183,7 +183,7 @@ see [Spark Configuration][spark-conf] for more information.
from pyspark.sql import SparkSession from pyspark.sql import SparkSession
# Spark session & context # Spark session & context
spark = SparkSession.builder.master('spark://master:7077').getOrCreate() spark = SparkSession.builder.master("spark://master:7077").getOrCreate()
sc = spark.sparkContext sc = spark.sparkContext
# Sum of the first 100 whole numbers # Sum of the first 100 whole numbers
@@ -234,7 +234,7 @@ sdf_len(sc, 100, repartition = 1) %>%
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
options in a `%%init_spark` magic cell. options in a `%%init_spark` magic cell.
```python ```text
%%init_spark %%init_spark
# Configure Spark to use a local master # Configure Spark to use a local master
launcher.master = "spark://master:7077" launcher.master = "spark://master:7077"
@@ -264,8 +264,7 @@ from pyspark.sql import SparkSession
spark = ( spark = (
SparkSession.builder.appName("elasticsearch") SparkSession.builder.appName("elasticsearch")
.config( .config(
"spark.jars.packages", "spark.jars.packages", "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
"org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
) )
.getOrCreate() .getOrCreate()
) )
@@ -292,7 +291,7 @@ The `jupyter/tensorflow-notebook` image supports the use of
```python ```python
import tensorflow as tf import tensorflow as tf
hello = tf.Variable('Hello World!') hello = tf.Variable("Hello World!")
sess = tf.Session() sess = tf.Session()
init = tf.global_variables_initializer() init = tf.global_variables_initializer()
@@ -306,7 +305,7 @@ sess.run(hello)
```python ```python
import tensorflow as tf import tensorflow as tf
hello = tf.Variable('Hello Distributed World!') hello = tf.Variable("Hello Distributed World!")
server = tf.train.Server.create_local_server() server = tf.train.Server.create_local_server()
sess = tf.Session(server.target) sess = tf.Session(server.target)

View File

@@ -55,6 +55,7 @@ All the taggers are inherited from `TaggerInterface`:
```python ```python
class TaggerInterface: class TaggerInterface:
"""Common interface for all taggers""" """Common interface for all taggers"""
@staticmethod @staticmethod
def tag_value(container) -> str: def tag_value(container) -> str:
raise NotImplementedError raise NotImplementedError
@@ -84,6 +85,7 @@ All the other manifest classes are inherited from `ManifestInterface`:
```python ```python
class ManifestInterface: class ManifestInterface:
"""Common interface for all manifests""" """Common interface for all manifests"""
@staticmethod @staticmethod
def markdown_piece(container) -> str: def markdown_piece(container) -> str:
raise NotImplementedError raise NotImplementedError
@@ -97,11 +99,9 @@ class ManifestInterface:
class AptPackagesManifest(ManifestInterface): class AptPackagesManifest(ManifestInterface):
@staticmethod @staticmethod
def markdown_piece(container) -> str: def markdown_piece(container) -> str:
return "\n".join([ return "\n".join(
"## Apt Packages", ["## Apt Packages", "", quoted_output(container, "apt list --installed")]
"", )
quoted_output(container, "apt list --installed")
])
``` ```
- `quoted_output` simply runs the command inside container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece of file. - `quoted_output` simply runs the command inside container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece of file.