mirror of
https://github.com/NetherlandsForensicInstitute/hansken-python-workshop.git
synced 2026-02-14 13:39:50 +00:00
Simplify scripts, remove browser check
This commit is contained in:
@@ -1,24 +1,16 @@
|
||||
# %% [python]
|
||||
import sys
|
||||
from wordcloud import WordCloud, STOPWORDS
|
||||
from types import SimpleNamespace
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from hansken.connect import connect_project
|
||||
|
||||
# setup hansken connection
|
||||
in_browser = 'js' in sys.modules
|
||||
|
||||
hansken_host = ''
|
||||
hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214'
|
||||
|
||||
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
project=hansken_project,
|
||||
keystore=f'http://{hansken_host}:9090/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
# Hansken SDK running on localhost
|
||||
|
||||
@@ -5,27 +5,19 @@
|
||||
# Replace `hansken_host` with the ip of a Hansken instance.
|
||||
|
||||
# %% [python]
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
from types import SimpleNamespace
|
||||
from matplotlib import pyplot
|
||||
|
||||
from hansken.connect import connect_project
|
||||
from hansken.query import RangeFacet
|
||||
|
||||
# The line below finds out if we run in the browser by checking for the js module
|
||||
in_browser = 'js' in sys.modules
|
||||
|
||||
hansken_host = ''
|
||||
hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214'
|
||||
|
||||
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
project=hansken_project,
|
||||
keystore=f'http://{hansken_host}:9090/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
# context = connect_project(endpoint='http://localhost:9091/gatekeeper/',
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
# %% [markdown]
|
||||
# Plot searches over time
|
||||
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
from types import SimpleNamespace
|
||||
from matplotlib import pyplot as plt
|
||||
import seaborn as sns
|
||||
from matplotlib.colors import LogNorm
|
||||
@@ -14,10 +12,7 @@ from hansken.query import RangeFacet
|
||||
|
||||
# %% [python]
|
||||
|
||||
# setup Hansken project context
|
||||
|
||||
# The line below finds out if we run in the browser by checking for the js module
|
||||
in_browser = 'js' in sys.modules
|
||||
# setup Hansken project context
|
||||
|
||||
hansken_host = ''
|
||||
hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214'
|
||||
@@ -25,9 +20,6 @@ hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214'
|
||||
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
project=hansken_project,
|
||||
keystore=f'http://{hansken_host}:9090/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
# Hansken SDK running on localhost
|
||||
@@ -36,9 +28,9 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
# project='d42bd9c3-63db-474c-a36f-b87e1eb9e2d3',
|
||||
# keystore='http://localhost:9090/keystore/')
|
||||
|
||||
# %%
|
||||
# %%
|
||||
|
||||
# Perform facet search in Hansken accross dates and present results in a heatmap
|
||||
# Perform facet search in Hansken accross dates and present results in a heatmap
|
||||
|
||||
start = '2022-7-1T00:00Z'
|
||||
end = '2022-7-31T23:59Z'
|
||||
|
||||
@@ -3,25 +3,17 @@
|
||||
### Setup Hansken connection
|
||||
|
||||
# %% [python]
|
||||
import sys
|
||||
import plotly.express as px
|
||||
from types import SimpleNamespace
|
||||
|
||||
from hansken.connect import connect_project
|
||||
from hansken.query import TermFacet
|
||||
|
||||
# The line below finds out if we run in the browser by checking for the js module
|
||||
in_browser = 'js' in sys.modules
|
||||
|
||||
hansken_host = ''
|
||||
hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214'
|
||||
|
||||
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
project=hansken_project,
|
||||
keystore=f'http://{hansken_host}:9090/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
# Hansken SDK running on localhost
|
||||
|
||||
@@ -3,25 +3,17 @@
|
||||
### Setup Hansken connection
|
||||
|
||||
# %% [python]
|
||||
import sys
|
||||
import squarify
|
||||
from types import SimpleNamespace
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from hansken.connect import connect_project
|
||||
|
||||
# The line below finds out if we run in the browser by checking for the js module
|
||||
in_browser = 'js' in sys.modules
|
||||
|
||||
hansken_host = ''
|
||||
hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214'
|
||||
|
||||
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
project=hansken_project,
|
||||
keystore=f'http://{hansken_host}:9090/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
# Hansken SDK running on localhost
|
||||
|
||||
@@ -1,25 +1,17 @@
|
||||
# %% [python]
|
||||
import io
|
||||
import sys
|
||||
from wordcloud import WordCloud, STOPWORDS
|
||||
from types import SimpleNamespace
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from hansken.connect import connect_project
|
||||
|
||||
# setup hansken connection
|
||||
in_browser = 'js' in sys.modules
|
||||
|
||||
hansken_host = ''
|
||||
hansken_project = '5ee273fd-0978-4a0a-b8b0-2af2f8479214'
|
||||
|
||||
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
project=hansken_project,
|
||||
keystore=f'http://{hansken_host}:9090/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
# Hansken SDK running on localhost
|
||||
@@ -37,8 +29,9 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
words = ""
|
||||
with context.search("type:document") as search_result:
|
||||
for trace in search_result:
|
||||
# verify text data stream is available
|
||||
if "text" in trace.data_types:
|
||||
with io.TextIOWrapper(trace.open(stream='text'), encoding="utf-8") as content:
|
||||
with io.TextIOWrapper(trace.open(stream='text'), encoding="utf-8", errors="ignore") as content:
|
||||
words += content.read()
|
||||
words
|
||||
|
||||
|
||||
Reference in New Issue
Block a user