From 9a328b260c515d504cc5db882e53e753e39b51fb Mon Sep 17 00:00:00 2001 From: Job Becht Date: Thu, 10 Oct 2024 14:29:16 +0200 Subject: [PATCH] Simplify scripts, remove browser check --- 01_query_word_cloud.py | 8 -------- 02_searches_time.py | 8 -------- 03_hansken_facet_heatmap.py | 14 +++----------- 04_types_in_piechart.py | 8 -------- 05_unique_values_treemap_chatmessage.py | 8 -------- 06_data_word_cloud.py | 11 ++--------- 6 files changed, 5 insertions(+), 52 deletions(-) diff --git a/01_query_word_cloud.py b/01_query_word_cloud.py index 9849a08..2d2305d 100644 --- a/01_query_word_cloud.py +++ b/01_query_word_cloud.py @@ -1,24 +1,16 @@ # %% [python] -import sys from wordcloud import WordCloud, STOPWORDS -from types import SimpleNamespace import matplotlib.pyplot as plt from hansken.connect import connect_project -# setup hansken connection -in_browser = 'js' in sys.modules - hansken_host = '' hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214' context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', project=hansken_project, keystore=f'http://{hansken_host}:9090/keystore/', - # Authentication is faked if we run in the browser, - # because an authenticated session should already be present - auth=SimpleNamespace() if in_browser else None, interactive=True) # Hansken SDK running on localhost diff --git a/02_searches_time.py b/02_searches_time.py index 6119dcb..12d8c57 100644 --- a/02_searches_time.py +++ b/02_searches_time.py @@ -5,27 +5,19 @@ # Replace `hansken_host` with the ip of a Hansken instance. # %% [python] -import sys import pandas as pd -from types import SimpleNamespace from matplotlib import pyplot from hansken.connect import connect_project from hansken.query import RangeFacet -# The line below finds out if we run in the browser by checking for the js module -in_browser = 'js' in sys.modules - hansken_host = '' hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214' context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', project=hansken_project, keystore=f'http://{hansken_host}:9090/keystore/', - # Authentication is faked if we run in the browser, - # because an authenticated session should already be present - auth=SimpleNamespace() if in_browser else None, interactive=True) # context = connect_project(endpoint='http://localhost:9091/gatekeeper/', diff --git a/03_hansken_facet_heatmap.py b/03_hansken_facet_heatmap.py index 1bfd25f..12a1316 100644 --- a/03_hansken_facet_heatmap.py +++ b/03_hansken_facet_heatmap.py @@ -1,10 +1,8 @@ # %% [markdown] # Plot searches over time -import sys import pandas as pd -from types import SimpleNamespace from matplotlib import pyplot as plt import seaborn as sns from matplotlib.colors import LogNorm @@ -14,10 +12,7 @@ from hansken.query import RangeFacet # %% [python] -# setup Hansken project context - -# The line below finds out if we run in the browser by checking for the js module -in_browser = 'js' in sys.modules +# setup Hansken project context hansken_host = '' hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214' @@ -25,9 +20,6 @@ hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214' context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', project=hansken_project, keystore=f'http://{hansken_host}:9090/keystore/', - # Authentication is faked if we run in the browser, - # because an authenticated session should already be present - auth=SimpleNamespace() if in_browser else None, interactive=True) # Hansken SDK running on localhost @@ -36,9 +28,9 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', # project='d42bd9c3-63db-474c-a36f-b87e1eb9e2d3', # keystore='http://localhost:9090/keystore/') -# %% +# %% -# Perform facet search in Hansken accross dates and present results in a heatmap +# Perform facet search in Hansken accross dates and present results in a heatmap start = '2022-7-1T00:00Z' end = '2022-7-31T23:59Z' diff --git a/04_types_in_piechart.py b/04_types_in_piechart.py index 748ee3d..3fe4edf 100644 --- a/04_types_in_piechart.py +++ b/04_types_in_piechart.py @@ -3,25 +3,17 @@ ### Setup Hansken connection # %% [python] -import sys import plotly.express as px -from types import SimpleNamespace from hansken.connect import connect_project from hansken.query import TermFacet -# The line below finds out if we run in the browser by checking for the js module -in_browser = 'js' in sys.modules - hansken_host = '' hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214' context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', project=hansken_project, keystore=f'http://{hansken_host}:9090/keystore/', - # Authentication is faked if we run in the browser, - # because an authenticated session should already be present - auth=SimpleNamespace() if in_browser else None, interactive=True) # Hansken SDK running on localhost diff --git a/05_unique_values_treemap_chatmessage.py b/05_unique_values_treemap_chatmessage.py index 9f69b0c..c40c6cf 100644 --- a/05_unique_values_treemap_chatmessage.py +++ b/05_unique_values_treemap_chatmessage.py @@ -3,25 +3,17 @@ ### Setup Hansken connection # %% [python] -import sys import squarify -from types import SimpleNamespace import matplotlib.pyplot as plt from hansken.connect import connect_project -# The line below finds out if we run in the browser by checking for the js module -in_browser = 'js' in sys.modules - hansken_host = '' hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214' context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', project=hansken_project, keystore=f'http://{hansken_host}:9090/keystore/', - # Authentication is faked if we run in the browser, - # because an authenticated session should already be present - auth=SimpleNamespace() if in_browser else None, interactive=True) # Hansken SDK running on localhost diff --git a/06_data_word_cloud.py b/06_data_word_cloud.py index bb60a38..7bd1d1c 100644 --- a/06_data_word_cloud.py +++ b/06_data_word_cloud.py @@ -1,25 +1,17 @@ # %% [python] import io -import sys from wordcloud import WordCloud, STOPWORDS -from types import SimpleNamespace import matplotlib.pyplot as plt from hansken.connect import connect_project -# setup hansken connection -in_browser = 'js' in sys.modules - hansken_host = '' hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214' context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', project=hansken_project, keystore=f'http://{hansken_host}:9090/keystore/', - # Authentication is faked if we run in the browser, - # because an authenticated session should already be present - auth=SimpleNamespace() if in_browser else None, interactive=True) # Hansken SDK running on localhost @@ -37,8 +29,9 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/', words = "" with context.search("type:document") as search_result: for trace in search_result: + # verify text data stream is available if "text" in trace.data_types: - with io.TextIOWrapper(trace.open(stream='text'), encoding="utf-8") as content: + with io.TextIOWrapper(trace.open(stream='text'), encoding="utf-8", errors="ignore") as content: words += content.read() words