mirror of
https://github.com/jupyter/docker-stacks.git
synced 2025-10-15 05:52:57 +00:00
Fix notebooks
This commit is contained in:
@@ -9,48 +9,37 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"from pyspark.sql.functions import pandas_udf\n",
|
||||
"\n",
|
||||
"# Spark session & context\n",
|
||||
"spark = SparkSession.builder.master('local').getOrCreate()"
|
||||
"spark = SparkSession.builder.master(\"local\").getOrCreate()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+---+---+\n",
|
||||
"| id|age|\n",
|
||||
"+---+---+\n",
|
||||
"| 1| 21|\n",
|
||||
"+---+---+\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = spark.createDataFrame([(1, 21), (2, 30)], (\"id\", \"age\"))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def filter_func(iterator):\n",
|
||||
" for pdf in iterator:\n",
|
||||
" yield pdf[pdf.id == 1]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df.mapInPandas(filter_func, df.schema).show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -64,7 +53,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.6"
|
||||
"version": "3.9.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@@ -2,31 +2,14 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "error",
|
||||
"ename": "Error",
|
||||
"evalue": "Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
|
||||
"traceback": [
|
||||
"Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
|
||||
"at b.startServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:270430)",
|
||||
"at async b.createServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:269873)",
|
||||
"at async connect (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:397876)",
|
||||
"at async w.ensureConnectionAndNotebookImpl (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556625)",
|
||||
"at async w.ensureConnectionAndNotebook (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556303)",
|
||||
"at async w.clearResult (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:552346)",
|
||||
"at async w.reexecuteCell (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:540374)",
|
||||
"at async w.reexecuteCells (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:537541)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"# Spark session & context\n",
|
||||
"spark = SparkSession.builder.master('local').getOrCreate()\n",
|
||||
"spark = SparkSession.builder.master(\"local\").getOrCreate()\n",
|
||||
"sc = spark.sparkContext\n",
|
||||
"\n",
|
||||
"# Sum of the first 100 whole numbers\n",
|
||||
@@ -38,7 +21,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -52,7 +35,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.9.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -14,21 +14,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[8] at parallelize at <console>:28\n",
|
||||
"res4: Double = 5050.0\n"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"// Sum of the first 100 whole numbers\n",
|
||||
"val rdd = sc.parallelize(0 to 100)\n",
|
||||
|
Reference in New Issue
Block a user