Reformat code files

This commit is contained in:
Mattijs Ugen
2023-07-08 17:17:32 +02:00
parent b46baf7541
commit 6f267feb12
5 changed files with 42 additions and 35 deletions

View File

@@ -11,32 +11,36 @@ from hansken.connect import connect_project
in_browser = 'js' in sys.modules
hansken_host = ''
context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
project='5ee273fd-0978-4a0a-b8b0-2af2f8479214',
keystore=f'http://{hansken_host}:9091/keystore/',
# Authentication is faked if we run in the browser,
# because an authenticated session should already be present
auth=SimpleNamespace() if in_browser else None,
interactive=True)
project='5ee273fd-0978-4a0a-b8b0-2af2f8479214',
keystore=f'http://{hansken_host}:9091/keystore/',
# Authentication is faked if we run in the browser,
# because an authenticated session should already be present
auth=SimpleNamespace() if in_browser else None,
interactive=True)
# Hansken SDK running on localhost
# context = connect_project(endpoint='http://localhost:9091/gatekeeper/',
# project='d42bd9c3-63db-474c-a36f-b87e1eb9e2d3',
# keystore='http://localhost:9090/keystore/')
# %% [markdown]
### Collect words
# The cell below searches for all `chatMessage` traces in the current project. The `chatMessage.message` property contains the actual message. All found messages are concatenated in a single long string.
# %% [python]
words = ""
with context.search("type:chatMessage") as searchResult:
for result in searchResult:
message = result.get("chatMessage.message")
if message is not None:
words += " " + message
with context.search("type:chatMessage") as search_result:
for result in search_result:
message = result.get("chatMessage.message")
if message is not None:
words += " " + message
words
# %% [markdown]
### Draw Wordcloud
# The cell below draws a wordcloud using the words occurring in the messages. `STOPWORDS` is used to ignore common english words.
# %% [python]
# draw word cloud
wc = WordCloud(stopwords=STOPWORDS, width=600, height=400).generate(words)

View File

@@ -35,10 +35,11 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
# Group the number of searches by the accessedOn property on a scale of a day. A Facet on a date requires a min and max
facet = RangeFacet('browserHistory.accessedOn', scale='day', min="2022-01-01", max="2023-01-01")
# Perform search using the facet, set count=0 to prevent hansken returning traces
with context.search("browserHistory.accessedOn=2022", facets=facet, count=0) as searchResult:
# Convert to dataframe
dateFacetResult = searchResult.facets[0]
df = pd.DataFrame([[counter.value, counter.count] for _, counter in searchResult.facets[0].items()], columns=['Day', 'Count'])
with context.search("browserHistory.accessedOn=2022", facets=facet, count=0) as search_result:
# Convert to dataframe
dateFacetResult = search_result.facets[0]
df = pd.DataFrame([[counter.value, counter.count] for _, counter in search_result.facets[0].items()],
columns=['Day', 'Count'])
# make sure pandas knows this is a timestamp
df['Day'] = pd.to_datetime(df['Day'])
df

View File

@@ -1,17 +1,17 @@
# %% [markdown]
# Plot searches over time
## Initialize Hansken connection
import sys
import pandas as pd
from types import SimpleNamespace
from matplotlib import pyplot
from matplotlib import pyplot as plt
import seaborn as sns
from matplotlib.colors import LogNorm, Normalize
from matplotlib.colors import LogNorm
from hansken.connect import connect_project
from hansken.query import RangeFacet
# %% [python]
# setup Hansken project context
@@ -39,7 +39,7 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
start = '2022-7-1T00:00Z'
end = '2022-7-31T23:59Z'
#search_query = "type:chatMessage"
# search_query = "type:chatMessage"
search_query = "type:browserHistory"
# Group the number of searches by the accessedOn property on a scale of a day. A Facet on a date requires a min and max
@@ -47,19 +47,19 @@ facet = RangeFacet('dates', scale='hour', min=start, max=end)
# Create a dataframe with entries per hour for the period indicated by start and end
df = pd.DataFrame()
df['Time'] = pd.date_range(start,end,freq='1H')
df['Time'] = pd.date_range(start, end, freq='1H')
df['Count'] = 0
df.set_index('Time',inplace=True)
df.set_index('Time', inplace=True)
# Perform search using the facet
with context.search(search_query, facets=facet, count=0 ) as searchResult:
for _, result in searchResult.facets[0].items():
df.loc[pd.to_datetime(result.value),'Count']=result.count
with context.search(search_query, facets=facet, count=0) as search_result:
for _, result in search_result.facets[0].items():
df.loc[pd.to_datetime(result.value), 'Count'] = result.count
# So that we can pivot and prepare a dataframe for our heatmap
df_map = pd.pivot_table( df, fill_value=0.0, columns=df.index.date, index=df.index.hour, aggfunc="sum")['Count']
sns.heatmap(df_map, cmap="Greens",norm=LogNorm())
df_map = pd.pivot_table(df, fill_value=0.0, columns=df.index.date, index=df.index.hour, aggfunc="sum")['Count']
sns.heatmap(df_map, cmap="Greens", norm=LogNorm())
plt.show()
# %%

View File

@@ -35,13 +35,13 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
# %% [python]
facet = TermFacet('type', size=40)
# Perform search using the facet, set count=0 to prevent hansken returning traces
with context.search("*", facets=facet, count=0) as searchResult:
with context.search("*", facets=facet, count=0) as search_result:
# ignore origin because it is a metatype and compressed to limit the total number of types
ignoreable_types = {'origin', 'compressed'}
typeFacet = [bucket for bucket in searchResult.facets[0].values()
if bucket.value not in ignoreable_types]
counts = [bucket.count for bucket in typeFacet]
names = [bucket.value for bucket in typeFacet]
type_facet = [bucket for bucket in search_result.facets[0].values()
if bucket.value not in ignoreable_types]
counts = [bucket.count for bucket in type_facet]
names = [bucket.value for bucket in type_facet]
fig = px.pie(values=counts, names=names, title=f'Trace types found in project')
fig.show()

View File

@@ -1,6 +1,7 @@
# %% [markdown]
## Plot the distribution of senders of chat messages
### Setup Hansken connection
# %% [python]
import sys
import squarify
@@ -29,7 +30,8 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
# %% [markdown]
### Retrieve all senders
# The `unique_values` function returns all values for a given property within a project. In this case, we retrieve all values for `chatMessage.from`.
# The `unique_values` function returns all values and the number of occurrences for a given property within a project.
# In this case, we retrieve all values for `chatMessage.from`.
# %% [python]
sizes = []
@@ -42,7 +44,7 @@ for sender in context.unique_values("chatMessage.from"):
### Use a treemap visualization to plot the distribution of senders.
# %% [python]
fig = plt.figure(figsize=(12,6))
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111)
squarify.plot(sizes=sizes, label=labels, alpha=.6, ax=ax)
plt.axis('off')