mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-18 07:22:57 +00:00
Run black pre-commit on python code in docs
This commit is contained in:
@@ -99,8 +99,6 @@ repos:
|
|||||||
- id: markdownlint
|
- id: markdownlint
|
||||||
args: ["--fix"]
|
args: ["--fix"]
|
||||||
|
|
||||||
# Run tools on Jupyter notebooks
|
|
||||||
|
|
||||||
# strip output from Jupyter notebooks
|
# strip output from Jupyter notebooks
|
||||||
- repo: https://github.com/kynan/nbstripout
|
- repo: https://github.com/kynan/nbstripout
|
||||||
rev: 0.5.0
|
rev: 0.5.0
|
||||||
@@ -118,6 +116,13 @@ repos:
|
|||||||
args: [--target-version=py39]
|
args: [--target-version=py39]
|
||||||
- id: nbqa-flake8
|
- id: nbqa-flake8
|
||||||
|
|
||||||
|
# Run black on python code blocks in documentation files.
|
||||||
|
- repo: https://github.com/asottile/blacken-docs
|
||||||
|
rev: v1.12.1
|
||||||
|
hooks:
|
||||||
|
- id: blacken-docs
|
||||||
|
args: [--target-version=py39]
|
||||||
|
|
||||||
# Docker hooks do not work in pre-commit.ci
|
# Docker hooks do not work in pre-commit.ci
|
||||||
# See: <https://github.com/pre-commit-ci/issues/issues/11>
|
# See: <https://github.com/pre-commit-ci/issues/issues/11>
|
||||||
ci:
|
ci:
|
||||||
|
@@ -300,34 +300,44 @@ A few suggestions have been made regarding using Docker Stacks with spark.
|
|||||||
|
|
||||||
Using Spark session for hadoop 2.7.3
|
Using Spark session for hadoop 2.7.3
|
||||||
|
|
||||||
```py
|
```python
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# !ls /usr/local/spark/jars/hadoop* # to figure out what version of hadoop
|
# !ls /usr/local/spark/jars/hadoop* # to figure out what version of hadoop
|
||||||
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell'
|
os.environ[
|
||||||
|
"PYSPARK_SUBMIT_ARGS"
|
||||||
|
] = '--packages "org.apache.hadoop:hadoop-aws:2.7.3" pyspark-shell'
|
||||||
|
|
||||||
import pyspark
|
import pyspark
|
||||||
|
|
||||||
myAccessKey = input()
|
myAccessKey = input()
|
||||||
mySecretKey = input()
|
mySecretKey = input()
|
||||||
|
|
||||||
spark = pyspark.sql.SparkSession.builder \
|
spark = (
|
||||||
.master("local[*]") \
|
pyspark.sql.SparkSession.builder.master("local[*]")
|
||||||
.config("spark.hadoop.fs.s3a.access.key", myAccessKey) \
|
.config("spark.hadoop.fs.s3a.access.key", myAccessKey)
|
||||||
.config("spark.hadoop.fs.s3a.secret.key", mySecretKey) \
|
.config("spark.hadoop.fs.s3a.secret.key", mySecretKey)
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
|
)
|
||||||
|
|
||||||
df = spark.read.parquet("s3://myBucket/myKey")
|
df = spark.read.parquet("s3://myBucket/myKey")
|
||||||
```
|
```
|
||||||
|
|
||||||
Using Spark context for hadoop 2.6.0
|
Using Spark context for hadoop 2.6.0
|
||||||
|
|
||||||
```py
|
```python
|
||||||
import os
|
import os
|
||||||
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell'
|
|
||||||
|
os.environ[
|
||||||
|
"PYSPARK_SUBMIT_ARGS"
|
||||||
|
] = "--packages com.amazonaws:aws-java-sdk:1.10.34,org.apache.hadoop:hadoop-aws:2.6.0 pyspark-shell"
|
||||||
|
|
||||||
import pyspark
|
import pyspark
|
||||||
|
|
||||||
sc = pyspark.SparkContext("local[*]")
|
sc = pyspark.SparkContext("local[*]")
|
||||||
|
|
||||||
from pyspark.sql import SQLContext
|
from pyspark.sql import SQLContext
|
||||||
|
|
||||||
sqlContext = SQLContext(sc)
|
sqlContext = SQLContext(sc)
|
||||||
|
|
||||||
hadoopConf = sc._jsc.hadoopConfiguration()
|
hadoopConf = sc._jsc.hadoopConfiguration()
|
||||||
@@ -346,14 +356,20 @@ Ref: <https://github.com/jupyter/docker-stacks/issues/127>
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
import os
|
import os
|
||||||
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell'
|
|
||||||
|
os.environ[
|
||||||
|
"PYSPARK_SUBMIT_ARGS"
|
||||||
|
] = "--jars /home/jovyan/spark-streaming-kafka-assembly_2.10-1.6.1.jar pyspark-shell"
|
||||||
import pyspark
|
import pyspark
|
||||||
from pyspark.streaming.kafka import KafkaUtils
|
from pyspark.streaming.kafka import KafkaUtils
|
||||||
from pyspark.streaming import StreamingContext
|
from pyspark.streaming import StreamingContext
|
||||||
|
|
||||||
sc = pyspark.SparkContext()
|
sc = pyspark.SparkContext()
|
||||||
ssc = StreamingContext(sc,1)
|
ssc = StreamingContext(sc, 1)
|
||||||
broker = "<my_broker_ip>"
|
broker = "<my_broker_ip>"
|
||||||
directKafkaStream = KafkaUtils.createDirectStream(ssc, ["test1"], {"metadata.broker.list": broker})
|
directKafkaStream = KafkaUtils.createDirectStream(
|
||||||
|
ssc, ["test1"], {"metadata.broker.list": broker}
|
||||||
|
)
|
||||||
directKafkaStream.pprint()
|
directKafkaStream.pprint()
|
||||||
ssc.start()
|
ssc.start()
|
||||||
```
|
```
|
||||||
|
@@ -25,7 +25,7 @@ Those logs are still available but only in the container's logs.
|
|||||||
If you want to make them appear in the notebook, you can overwrite the configuration in a user level IPython kernel profile.
|
If you want to make them appear in the notebook, you can overwrite the configuration in a user level IPython kernel profile.
|
||||||
To do that you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel.
|
To do that you have to uncomment the following line in your `~/.ipython/profile_default/ipython_kernel_config.py` and restart the kernel.
|
||||||
|
|
||||||
```Python
|
```python
|
||||||
c.IPKernelApp.capture_fd_output = True
|
c.IPKernelApp.capture_fd_output = True
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ In a Python notebook.
|
|||||||
from pyspark.sql import SparkSession
|
from pyspark.sql import SparkSession
|
||||||
|
|
||||||
# Spark session & context
|
# Spark session & context
|
||||||
spark = SparkSession.builder.master('local').getOrCreate()
|
spark = SparkSession.builder.master("local").getOrCreate()
|
||||||
sc = spark.sparkContext
|
sc = spark.sparkContext
|
||||||
|
|
||||||
# Sum of the first 100 whole numbers
|
# Sum of the first 100 whole numbers
|
||||||
@@ -143,7 +143,7 @@ sdf_len(sc, 100, repartition = 1) %>%
|
|||||||
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
|
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
|
||||||
options in a `%%init_spark` magic cell.
|
options in a `%%init_spark` magic cell.
|
||||||
|
|
||||||
```python
|
```text
|
||||||
%%init_spark
|
%%init_spark
|
||||||
# Configure Spark to use a local master
|
# Configure Spark to use a local master
|
||||||
launcher.master = "local"
|
launcher.master = "local"
|
||||||
@@ -183,7 +183,7 @@ see [Spark Configuration][spark-conf] for more information.
|
|||||||
from pyspark.sql import SparkSession
|
from pyspark.sql import SparkSession
|
||||||
|
|
||||||
# Spark session & context
|
# Spark session & context
|
||||||
spark = SparkSession.builder.master('spark://master:7077').getOrCreate()
|
spark = SparkSession.builder.master("spark://master:7077").getOrCreate()
|
||||||
sc = spark.sparkContext
|
sc = spark.sparkContext
|
||||||
|
|
||||||
# Sum of the first 100 whole numbers
|
# Sum of the first 100 whole numbers
|
||||||
@@ -234,7 +234,7 @@ sdf_len(sc, 100, repartition = 1) %>%
|
|||||||
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
|
Spylon kernel instantiates a `SparkContext` for you in variable `sc` after you configure Spark
|
||||||
options in a `%%init_spark` magic cell.
|
options in a `%%init_spark` magic cell.
|
||||||
|
|
||||||
```python
|
```text
|
||||||
%%init_spark
|
%%init_spark
|
||||||
# Configure Spark to use a local master
|
# Configure Spark to use a local master
|
||||||
launcher.master = "spark://master:7077"
|
launcher.master = "spark://master:7077"
|
||||||
@@ -264,8 +264,7 @@ from pyspark.sql import SparkSession
|
|||||||
spark = (
|
spark = (
|
||||||
SparkSession.builder.appName("elasticsearch")
|
SparkSession.builder.appName("elasticsearch")
|
||||||
.config(
|
.config(
|
||||||
"spark.jars.packages",
|
"spark.jars.packages", "org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
|
||||||
"org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0"
|
|
||||||
)
|
)
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
)
|
)
|
||||||
@@ -292,7 +291,7 @@ The `jupyter/tensorflow-notebook` image supports the use of
|
|||||||
```python
|
```python
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
hello = tf.Variable('Hello World!')
|
hello = tf.Variable("Hello World!")
|
||||||
|
|
||||||
sess = tf.Session()
|
sess = tf.Session()
|
||||||
init = tf.global_variables_initializer()
|
init = tf.global_variables_initializer()
|
||||||
@@ -306,7 +305,7 @@ sess.run(hello)
|
|||||||
```python
|
```python
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
hello = tf.Variable('Hello Distributed World!')
|
hello = tf.Variable("Hello Distributed World!")
|
||||||
|
|
||||||
server = tf.train.Server.create_local_server()
|
server = tf.train.Server.create_local_server()
|
||||||
sess = tf.Session(server.target)
|
sess = tf.Session(server.target)
|
||||||
|
@@ -55,6 +55,7 @@ All the taggers are inherited from `TaggerInterface`:
|
|||||||
```python
|
```python
|
||||||
class TaggerInterface:
|
class TaggerInterface:
|
||||||
"""Common interface for all taggers"""
|
"""Common interface for all taggers"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def tag_value(container) -> str:
|
def tag_value(container) -> str:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
@@ -84,6 +85,7 @@ All the other manifest classes are inherited from `ManifestInterface`:
|
|||||||
```python
|
```python
|
||||||
class ManifestInterface:
|
class ManifestInterface:
|
||||||
"""Common interface for all manifests"""
|
"""Common interface for all manifests"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def markdown_piece(container) -> str:
|
def markdown_piece(container) -> str:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
@@ -97,11 +99,9 @@ class ManifestInterface:
|
|||||||
class AptPackagesManifest(ManifestInterface):
|
class AptPackagesManifest(ManifestInterface):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def markdown_piece(container) -> str:
|
def markdown_piece(container) -> str:
|
||||||
return "\n".join([
|
return "\n".join(
|
||||||
"## Apt Packages",
|
["## Apt Packages", "", quoted_output(container, "apt list --installed")]
|
||||||
"",
|
)
|
||||||
quoted_output(container, "apt list --installed")
|
|
||||||
])
|
|
||||||
```
|
```
|
||||||
|
|
||||||
- `quoted_output` simply runs the command inside container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece of file.
|
- `quoted_output` simply runs the command inside container using `DockerRunner.run_simple_command` and wraps it to triple quotes to create a valid markdown piece of file.
|
||||||
|
Reference in New Issue
Block a user