Reformat code files

2026-02-14 13:39:50 +00:00 · 2023-07-08 17:17:32 +02:00
parent b46baf7541
commit 6f267feb12
5 changed files with 42 additions and 35 deletions
--- a/query_word_cloud.py
+++ b/query_word_cloud.py
@@ -11,32 +11,36 @@ from hansken.connect import connect_project
 in_browser = 'js' in sys.modules
 hansken_host = ''
 context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
-                           project='5ee273fd-0978-4a0a-b8b0-2af2f8479214',
-                           keystore=f'http://{hansken_host}:9091/keystore/',
-                           # Authentication is faked if we run in the browser,
-                           # because an authenticated session should already be present
-                           auth=SimpleNamespace() if in_browser else None,
-                           interactive=True)
-                         
+                          project='5ee273fd-0978-4a0a-b8b0-2af2f8479214',
+                          keystore=f'http://{hansken_host}:9091/keystore/',
+                          # Authentication is faked if we run in the browser,
+                          # because an authenticated session should already be present
+                          auth=SimpleNamespace() if in_browser else None,
+                          interactive=True)
+
 # Hansken SDK running on localhost

 # context = connect_project(endpoint='http://localhost:9091/gatekeeper/',
 #                           project='d42bd9c3-63db-474c-a36f-b87e1eb9e2d3',
 #                           keystore='http://localhost:9090/keystore/')
+
 # %% [markdown]
 ### Collect words
 # The cell below searches for all `chatMessage` traces in the current project. The `chatMessage.message` property contains the actual message. All found messages are concatenated in a single long string.
+
 # %% [python]
 words = ""
-with context.search("type:chatMessage") as searchResult:
-  for result in searchResult:
-    message = result.get("chatMessage.message")
-    if message is not None:
-      words += " " + message
+with context.search("type:chatMessage") as search_result:
+    for result in search_result:
+        message = result.get("chatMessage.message")
+        if message is not None:
+            words += " " + message
 words
+
 # %% [markdown]
 ### Draw Wordcloud
 # The cell below draws a wordcloud using the words occurring in the messages. `STOPWORDS` is used to ignore common english words.
+
 # %% [python]
 # draw word cloud
 wc = WordCloud(stopwords=STOPWORDS, width=600, height=400).generate(words)
--- a/searches_time.py
+++ b/searches_time.py
@@ -35,10 +35,11 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
 # Group the number of searches by the accessedOn property on a scale of a day. A Facet on a date requires a min and max
 facet = RangeFacet('browserHistory.accessedOn', scale='day', min="2022-01-01", max="2023-01-01")
 # Perform search using the facet, set count=0 to prevent hansken returning traces
-with context.search("browserHistory.accessedOn=2022", facets=facet, count=0) as searchResult:
-  # Convert to dataframe
-  dateFacetResult = searchResult.facets[0]
-  df = pd.DataFrame([[counter.value, counter.count] for _, counter in searchResult.facets[0].items()], columns=['Day', 'Count'])
+with context.search("browserHistory.accessedOn=2022", facets=facet, count=0) as search_result:
+    # Convert to dataframe
+    dateFacetResult = search_result.facets[0]
+    df = pd.DataFrame([[counter.value, counter.count] for _, counter in search_result.facets[0].items()],
+                      columns=['Day', 'Count'])
 # make sure pandas knows this is a timestamp
 df['Day'] = pd.to_datetime(df['Day'])
 df
--- a/hansken_facet_heatmap.py
+++ b/hansken_facet_heatmap.py
@@ -1,17 +1,17 @@
 # %% [markdown]
 # Plot searches over time

-## Initialize Hansken connection
 import sys
 import pandas as pd

 from types import SimpleNamespace
-from matplotlib import pyplot
+from matplotlib import pyplot as plt
 import seaborn as sns
-from matplotlib.colors import LogNorm, Normalize
+from matplotlib.colors import LogNorm

 from hansken.connect import connect_project
 from hansken.query import RangeFacet
+
 # %% [python]

 # setup Hansken project context 
@@ -39,7 +39,7 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',

 start = '2022-7-1T00:00Z'
 end = '2022-7-31T23:59Z'
-#search_query = "type:chatMessage"
+# search_query = "type:chatMessage"
 search_query = "type:browserHistory"

 # Group the number of searches by the accessedOn property on a scale of a day. A Facet on a date requires a min and max
@@ -47,19 +47,19 @@ facet = RangeFacet('dates', scale='hour', min=start, max=end)

 # Create a dataframe with entries per hour for the period indicated by start and end
 df = pd.DataFrame()
-df['Time'] = pd.date_range(start,end,freq='1H')
+df['Time'] = pd.date_range(start, end, freq='1H')
 df['Count'] = 0
-df.set_index('Time',inplace=True)
+df.set_index('Time', inplace=True)

 # Perform search using the facet
-with context.search(search_query, facets=facet, count=0 ) as searchResult:
-  for _, result in searchResult.facets[0].items():
-    df.loc[pd.to_datetime(result.value),'Count']=result.count
+with context.search(search_query, facets=facet, count=0) as search_result:
+    for _, result in search_result.facets[0].items():
+        df.loc[pd.to_datetime(result.value), 'Count'] = result.count

 # So that we can pivot and prepare a dataframe for our heatmap
-df_map = pd.pivot_table( df, fill_value=0.0, columns=df.index.date, index=df.index.hour, aggfunc="sum")['Count']
-
-sns.heatmap(df_map, cmap="Greens",norm=LogNorm())
+df_map = pd.pivot_table(df, fill_value=0.0, columns=df.index.date, index=df.index.hour, aggfunc="sum")['Count']

+sns.heatmap(df_map, cmap="Greens", norm=LogNorm())
+plt.show()

 # %%
--- a/types_in_piechart.py
+++ b/types_in_piechart.py
@@ -35,13 +35,13 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',
 # %% [python]
 facet = TermFacet('type', size=40)
 # Perform search using the facet, set count=0 to prevent hansken returning traces
-with context.search("*", facets=facet, count=0) as searchResult:
+with context.search("*", facets=facet, count=0) as search_result:
    # ignore origin because it is a metatype and compressed to limit the total number of types
    ignoreable_types = {'origin', 'compressed'}
-    typeFacet = [bucket for bucket  in searchResult.facets[0].values() 
-                 if bucket.value not in ignoreable_types]
-    counts = [bucket.count for bucket in typeFacet]
-    names = [bucket.value for bucket in typeFacet]
+    type_facet = [bucket for bucket in search_result.facets[0].values()
+                  if bucket.value not in ignoreable_types]
+    counts = [bucket.count for bucket in type_facet]
+    names = [bucket.value for bucket in type_facet]

 fig = px.pie(values=counts, names=names, title=f'Trace types found in project')
 fig.show()
--- a/unique_values_treemap_chatmessage.py
+++ b/unique_values_treemap_chatmessage.py
@@ -1,6 +1,7 @@
 # %% [markdown]
 ## Plot the distribution of senders of chat messages
 ### Setup Hansken connection
+
 # %% [python]
 import sys
 import squarify
@@ -29,7 +30,8 @@ context = connect_project(endpoint=f'http://{hansken_host}:9091/gatekeeper/',

 # %% [markdown]
 ### Retrieve all senders
-# The `unique_values` function returns all values for a given property within a project. In this case, we retrieve all values for `chatMessage.from`.
+# The `unique_values` function returns all values and the number of occurrences for a given property within a project.
+# In this case, we retrieve all values for `chatMessage.from`.

 # %% [python]
 sizes = []
@@ -42,7 +44,7 @@ for sender in context.unique_values("chatMessage.from"):
 ### Use a treemap visualization to plot the distribution of senders.

 # %% [python]
-fig = plt.figure(figsize=(12,6))
+fig = plt.figure(figsize=(12, 6))
 ax = fig.add_subplot(111)
 squarify.plot(sizes=sizes, label=labels, alpha=.6, ax=ax)
 plt.axis('off')