mirror of
https://github.com/NetherlandsForensicInstitute/hansken-python-workshop.git
synced 2026-02-14 13:39:50 +00:00
Reformat code files
This commit is contained in:
@@ -11,32 +11,36 @@ from hansken.connect import connect_project
|
||||
in_browser = 'js' in sys.modules
|
||||
hansken_host = ''
|
||||
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
project='5ee273fd-0978-4a0a-b8b0-2af2f8479214',
|
||||
keystore=f'http://{hansken_host}:9091/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
project='5ee273fd-0978-4a0a-b8b0-2af2f8479214',
|
||||
keystore=f'http://{hansken_host}:9091/keystore/',
|
||||
# Authentication is faked if we run in the browser,
|
||||
# because an authenticated session should already be present
|
||||
auth=SimpleNamespace() if in_browser else None,
|
||||
interactive=True)
|
||||
|
||||
# Hansken SDK running on localhost
|
||||
|
||||
# context = connect_project(endpoint='http://localhost:9091/gatekeeper/',
|
||||
# project='d42bd9c3-63db-474c-a36f-b87e1eb9e2d3',
|
||||
# keystore='http://localhost:9090/keystore/')
|
||||
|
||||
# %% [markdown]
|
||||
### Collect words
|
||||
# The cell below searches for all `chatMessage` traces in the current project. The `chatMessage.message` property contains the actual message. All found messages are concatenated in a single long string.
|
||||
|
||||
# %% [python]
|
||||
words = ""
|
||||
with context.search("type:chatMessage") as searchResult:
|
||||
for result in searchResult:
|
||||
message = result.get("chatMessage.message")
|
||||
if message is not None:
|
||||
words += " " + message
|
||||
with context.search("type:chatMessage") as search_result:
|
||||
for result in search_result:
|
||||
message = result.get("chatMessage.message")
|
||||
if message is not None:
|
||||
words += " " + message
|
||||
words
|
||||
|
||||
# %% [markdown]
|
||||
### Draw Wordcloud
|
||||
# The cell below draws a wordcloud using the words occurring in the messages. `STOPWORDS` is used to ignore common english words.
|
||||
|
||||
# %% [python]
|
||||
# draw word cloud
|
||||
wc = WordCloud(stopwords=STOPWORDS, width=600, height=400).generate(words)
|
||||
|
||||
@@ -35,10 +35,11 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
# Group the number of searches by the accessedOn property on a scale of a day. A Facet on a date requires a min and max
|
||||
facet = RangeFacet('browserHistory.accessedOn', scale='day', min="2022-01-01", max="2023-01-01")
|
||||
# Perform search using the facet, set count=0 to prevent hansken returning traces
|
||||
with context.search("browserHistory.accessedOn=2022", facets=facet, count=0) as searchResult:
|
||||
# Convert to dataframe
|
||||
dateFacetResult = searchResult.facets[0]
|
||||
df = pd.DataFrame([[counter.value, counter.count] for _, counter in searchResult.facets[0].items()], columns=['Day', 'Count'])
|
||||
with context.search("browserHistory.accessedOn=2022", facets=facet, count=0) as search_result:
|
||||
# Convert to dataframe
|
||||
dateFacetResult = search_result.facets[0]
|
||||
df = pd.DataFrame([[counter.value, counter.count] for _, counter in search_result.facets[0].items()],
|
||||
columns=['Day', 'Count'])
|
||||
# make sure pandas knows this is a timestamp
|
||||
df['Day'] = pd.to_datetime(df['Day'])
|
||||
df
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
# %% [markdown]
|
||||
# Plot searches over time
|
||||
|
||||
## Initialize Hansken connection
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
from types import SimpleNamespace
|
||||
from matplotlib import pyplot
|
||||
from matplotlib import pyplot as plt
|
||||
import seaborn as sns
|
||||
from matplotlib.colors import LogNorm, Normalize
|
||||
from matplotlib.colors import LogNorm
|
||||
|
||||
from hansken.connect import connect_project
|
||||
from hansken.query import RangeFacet
|
||||
|
||||
# %% [python]
|
||||
|
||||
# setup Hansken project context
|
||||
@@ -39,7 +39,7 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
|
||||
start = '2022-7-1T00:00Z'
|
||||
end = '2022-7-31T23:59Z'
|
||||
#search_query = "type:chatMessage"
|
||||
# search_query = "type:chatMessage"
|
||||
search_query = "type:browserHistory"
|
||||
|
||||
# Group the number of searches by the accessedOn property on a scale of a day. A Facet on a date requires a min and max
|
||||
@@ -47,19 +47,19 @@ facet = RangeFacet('dates', scale='hour', min=start, max=end)
|
||||
|
||||
# Create a dataframe with entries per hour for the period indicated by start and end
|
||||
df = pd.DataFrame()
|
||||
df['Time'] = pd.date_range(start,end,freq='1H')
|
||||
df['Time'] = pd.date_range(start, end, freq='1H')
|
||||
df['Count'] = 0
|
||||
df.set_index('Time',inplace=True)
|
||||
df.set_index('Time', inplace=True)
|
||||
|
||||
# Perform search using the facet
|
||||
with context.search(search_query, facets=facet, count=0 ) as searchResult:
|
||||
for _, result in searchResult.facets[0].items():
|
||||
df.loc[pd.to_datetime(result.value),'Count']=result.count
|
||||
with context.search(search_query, facets=facet, count=0) as search_result:
|
||||
for _, result in search_result.facets[0].items():
|
||||
df.loc[pd.to_datetime(result.value), 'Count'] = result.count
|
||||
|
||||
# So that we can pivot and prepare a dataframe for our heatmap
|
||||
df_map = pd.pivot_table( df, fill_value=0.0, columns=df.index.date, index=df.index.hour, aggfunc="sum")['Count']
|
||||
|
||||
sns.heatmap(df_map, cmap="Greens",norm=LogNorm())
|
||||
df_map = pd.pivot_table(df, fill_value=0.0, columns=df.index.date, index=df.index.hour, aggfunc="sum")['Count']
|
||||
|
||||
sns.heatmap(df_map, cmap="Greens", norm=LogNorm())
|
||||
plt.show()
|
||||
|
||||
# %%
|
||||
|
||||
@@ -35,13 +35,13 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
# %% [python]
|
||||
facet = TermFacet('type', size=40)
|
||||
# Perform search using the facet, set count=0 to prevent hansken returning traces
|
||||
with context.search("*", facets=facet, count=0) as searchResult:
|
||||
with context.search("*", facets=facet, count=0) as search_result:
|
||||
# ignore origin because it is a metatype and compressed to limit the total number of types
|
||||
ignoreable_types = {'origin', 'compressed'}
|
||||
typeFacet = [bucket for bucket in searchResult.facets[0].values()
|
||||
if bucket.value not in ignoreable_types]
|
||||
counts = [bucket.count for bucket in typeFacet]
|
||||
names = [bucket.value for bucket in typeFacet]
|
||||
type_facet = [bucket for bucket in search_result.facets[0].values()
|
||||
if bucket.value not in ignoreable_types]
|
||||
counts = [bucket.count for bucket in type_facet]
|
||||
names = [bucket.value for bucket in type_facet]
|
||||
|
||||
fig = px.pie(values=counts, names=names, title=f'Trace types found in project')
|
||||
fig.show()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# %% [markdown]
|
||||
## Plot the distribution of senders of chat messages
|
||||
### Setup Hansken connection
|
||||
|
||||
# %% [python]
|
||||
import sys
|
||||
import squarify
|
||||
@@ -29,7 +30,8 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
|
||||
|
||||
# %% [markdown]
|
||||
### Retrieve all senders
|
||||
# The `unique_values` function returns all values for a given property within a project. In this case, we retrieve all values for `chatMessage.from`.
|
||||
# The `unique_values` function returns all values and the number of occurrences for a given property within a project.
|
||||
# In this case, we retrieve all values for `chatMessage.from`.
|
||||
|
||||
# %% [python]
|
||||
sizes = []
|
||||
@@ -42,7 +44,7 @@ for sender in context.unique_values("chatMessage.from"):
|
||||
### Use a treemap visualization to plot the distribution of senders.
|
||||
|
||||
# %% [python]
|
||||
fig = plt.figure(figsize=(12,6))
|
||||
fig = plt.figure(figsize=(12, 6))
|
||||
ax = fig.add_subplot(111)
|
||||
squarify.plot(sizes=sizes, label=labels, alpha=.6, ax=ax)
|
||||
plt.axis('off')
|
||||
|
||||
Reference in New Issue
Block a user