mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-02-14 17:49:59 +00:00
For automatic transcription and chaptering, we'll need to know exactly at which slide each section starts. This we already had the count-slides.py script to count how many slides each section had, and count the number of slides per part. The new script does the same but also gives accurately the first slide of each section.
76 lines
2.3 KiB
Python
Executable File
76 lines
2.3 KiB
Python
Executable File
#!/usr/bin/env python
|
|
import re
|
|
import sys
|
|
import yaml
|
|
|
|
FIRST_SLIDE_MARKER = "name: toc-"
|
|
PART_PREFIX = "part-"
|
|
|
|
filename = sys.argv[1]
|
|
if filename.endswith(".html"):
|
|
html_file = filename
|
|
yaml_file = filename[: -len(".html")]
|
|
else:
|
|
html_file = filename + ".html"
|
|
yaml_file = filename
|
|
excluded_classes = yaml.safe_load(open(yaml_file))["exclude"]
|
|
|
|
|
|
class State(object):
|
|
def __init__(self):
|
|
self.current_slide = -1
|
|
self.parts = {}
|
|
|
|
def end_section(self):
|
|
if state.section_title:
|
|
print(
|
|
"{0.section_start}\t{0.section_slides}\t{0.section_title}".format(self)
|
|
)
|
|
if self.section_part:
|
|
if self.section_part not in self.parts:
|
|
self.parts[self.section_part] = 0
|
|
self.parts[self.section_part] += self.section_slides
|
|
|
|
def new_section(self, slide):
|
|
# Normally, the title should be prefixed by a space
|
|
# (because section titles are first-level titles in markdown,
|
|
# e.g. "# Introduction", and markmaker removes the # but leaves
|
|
# the leading space).
|
|
self.section_title = None
|
|
if "\n " in slide:
|
|
self.section_title = slide.split("\n ")[1].split("\n")[0]
|
|
toc_links = re.findall("\(#toc-(.*)\)", slide)
|
|
self.section_part = None
|
|
for toc_link in toc_links:
|
|
if toc_link.startswith(PART_PREFIX):
|
|
self.section_part = toc_link
|
|
self.section_start = self.current_slide
|
|
self.section_slides = 0
|
|
|
|
|
|
state = State()
|
|
state.new_section("")
|
|
print("{}\t{}\t{}".format("index", "size", "title"))
|
|
|
|
for slide in open(html_file).read().split("\n---\n"):
|
|
excluded = False
|
|
for line in slide.split("\n"):
|
|
if line.startswith("class:"):
|
|
for klass in excluded_classes:
|
|
if klass in line.split():
|
|
excluded = True
|
|
if excluded:
|
|
continue
|
|
if FIRST_SLIDE_MARKER in slide:
|
|
# A new section starts. Show info about the part that just ended.
|
|
state.end_section()
|
|
state.new_section(slide)
|
|
state.section_slides += 1
|
|
for sub_slide in slide.split("\n--\n"):
|
|
state.current_slide += 1
|
|
else:
|
|
state.end_section()
|
|
|
|
for part in sorted(state.parts, key=lambda f: int(f.split("-")[1])):
|
|
print("{}\t{}\t{}".format(0, state.parts[part], "total size for " + part))
|