2019-12-06 14:34:36 +00:00
|
|
|
#+TITLE: Optimize the size of no js websites
|
|
|
|
#+AUTHOR: Yann Esposito
|
|
|
|
#+EMAIL: yann@esposito.host
|
|
|
|
#+DATE: [2019-12-06 Fri]
|
|
|
|
#+KEYWORDS: blog, shell, script
|
|
|
|
#+DESCRIPTION: How to optimize the size of a full website by using information in both HTML and CSS.
|
|
|
|
#+OPTIONS: auto-id:t toc:nil
|
|
|
|
|
|
|
|
One of the major problem with CSS and HTML is that they are highly
|
|
|
|
dependent from each other.
|
|
|
|
For example, if you want to minimize your CSS, you are still forced to use
|
|
|
|
the same class names even if they are long.
|
|
|
|
Because the HTML uses them.
|
|
|
|
And the same problem arise when you want to minimize the size of your HTML
|
|
|
|
files.
|
|
|
|
|
|
|
|
It means that if you want to minimize a full website you must take care at
|
|
|
|
the same time of HTML pages as well as CSS pages.
|
|
|
|
And this is totally impossible to achieve if JS is involved because there
|
|
|
|
is always the risk the JS code generate class names to manipulate the DOM.
|
|
|
|
|
|
|
|
So here is a small script I wanted to write from a long time that do the following:
|
|
|
|
|
|
|
|
1. retrieve all class names in the HTML and in the CSS
|
2019-12-06 18:05:06 +00:00
|
|
|
2. create a map from those long names to shorter names
|
2019-12-06 14:34:36 +00:00
|
|
|
3. replace the class names in the HTML and CSS files.
|
|
|
|
|
2019-12-06 18:05:06 +00:00
|
|
|
So if you have multiple HTML files with:
|
|
|
|
|
|
|
|
#+begin_src html
|
|
|
|
<div class="long-org-class-generated-by-org-mode">...</div>
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
and CSS files with:
|
|
|
|
|
|
|
|
#+begin_src css
|
|
|
|
pre .long-org-class-generated-by-org-mode { ... }
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
Those will be replaced by something like:
|
|
|
|
|
|
|
|
#+begin_src html
|
|
|
|
<div class="av">...</div>
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
and CSS files with:
|
|
|
|
|
|
|
|
#+begin_src css
|
|
|
|
pre .av { ... }
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
And thus removing many superfluous bytes.
|
|
|
|
|
2019-12-06 18:14:53 +00:00
|
|
|
In my personal website, I run this script after minifying my HTML and CSS
|
2019-12-06 18:05:06 +00:00
|
|
|
with classical tools.
|
|
|
|
And I still get up to 32% smaller HTML and 22% smaller CSS.
|
|
|
|
|
|
|
|
Many 25% smaller HTML if there are a lot of code, because org-mode use very
|
|
|
|
long class names when generating the code.
|
|
|
|
|
|
|
|
Not bad for a very basic solution.
|
|
|
|
|
|
|
|
If you want to try it; here is the quick and dirty script I use:
|
2019-12-06 14:34:36 +00:00
|
|
|
|
|
|
|
#+name: optim-classes.sh
|
|
|
|
#+begin_src bash
|
2019-12-06 18:05:06 +00:00
|
|
|
#!/usr/bin/env zsh
|
2019-12-06 14:34:36 +00:00
|
|
|
|
|
|
|
webdir="_site"
|
|
|
|
|
|
|
|
retrieve_classes_in_html () {
|
|
|
|
cat $webdir/**/*.html(N) | \
|
|
|
|
perl -pe 's/class="?([a-zA-Z0-9_-]*)/\nCLASS: $1\n/g'
|
|
|
|
}
|
|
|
|
|
|
|
|
retrieve_classes_in_css () {
|
|
|
|
cat $webdir/**/*.css(N) | \
|
|
|
|
perl -pe 's/\.([a-zA-Z-_][a-zA-Z0-9-_]*)/\nCLASS: $1\n/g'
|
|
|
|
}
|
|
|
|
|
|
|
|
classes=( $( {retrieve_classes_in_html; retrieve_classes_in_css}| \
|
|
|
|
egrep "^CLASS: [^ ]*$" |\
|
|
|
|
sort -u | \
|
|
|
|
awk 'length($2)>2 {print length($2),$2}'|\
|
|
|
|
sort -rn | \
|
|
|
|
awk '{print $2}') )
|
|
|
|
|
|
|
|
chr() {
|
|
|
|
[ "$1" -lt 26 ] || return 1
|
|
|
|
printf "\\$(printf '%03o' $(( 97 + $1 )))"
|
|
|
|
}
|
|
|
|
|
|
|
|
shortName() {
|
|
|
|
if [ "$1" -gt 25 ]; then
|
|
|
|
print -- $(shortName $(( ( $1 / 26 ) - 1 )))$(shortName $(( $1 % 26 )))
|
|
|
|
else
|
|
|
|
chr $1
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
i=0;
|
|
|
|
typeset -A assoc
|
|
|
|
for c in $classes; do
|
|
|
|
sn=$(shortName $i)
|
2019-12-06 18:05:06 +00:00
|
|
|
print -- "$c -> $sn"
|
2019-12-06 14:34:36 +00:00
|
|
|
assoc[$c]=$sn
|
|
|
|
((i++))
|
|
|
|
done
|
|
|
|
|
|
|
|
htmlreplacer=''
|
|
|
|
cssreplacer=''
|
|
|
|
for long in $classes; do
|
|
|
|
htmlreplacer=$htmlreplacer's#class=("?)'${long}'#class=$1'${assoc[$long]}'#g;'
|
|
|
|
cssreplacer=$cssreplacer's#\.'${long}'#.'${assoc[$long]}'#g;'
|
|
|
|
done
|
|
|
|
|
2019-12-06 18:05:06 +00:00
|
|
|
sizeof() {
|
|
|
|
stat --format="%s" "$*"
|
|
|
|
}
|
2019-12-06 14:34:36 +00:00
|
|
|
|
|
|
|
for fic in $webdir/**/*.{html,xml}(N); do
|
2019-12-06 18:05:06 +00:00
|
|
|
before=$(sizeof $fic)
|
|
|
|
print -n -- "$fic ($before"
|
2019-12-06 14:34:36 +00:00
|
|
|
perl -pi -e $htmlreplacer $fic
|
2019-12-06 18:05:06 +00:00
|
|
|
after=$(sizeof $fic)
|
|
|
|
print -- " => $after [$(( ((before - after) * 100) / before ))])"
|
2019-12-06 14:34:36 +00:00
|
|
|
done
|
|
|
|
for fic in $webdir/**/*.css(N); do
|
2019-12-06 18:05:06 +00:00
|
|
|
before=$(sizeof $fic)
|
|
|
|
print -n -- "$fic ($before"
|
2019-12-06 14:34:36 +00:00
|
|
|
perl -pi -e $cssreplacer $fic
|
2019-12-06 18:05:06 +00:00
|
|
|
after=$(sizeof $fic)
|
|
|
|
print -- " => $after [$(( ((before - after) * 100) / before ))])"
|
2019-12-06 14:34:36 +00:00
|
|
|
done
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
A few remarks:
|
|
|
|
|
|
|
|
- to prevent doing the work twice, the script only takes care for classe
|
|
|
|
names longer or equal to 3 chars. (=awk 'length($2)>2 {print
|
|
|
|
length($2),$2}'=). As consequence take care that your website does not
|
|
|
|
use class name shorter than 3 chars otherwise it could mess with your css.
|
|
|
|
- The script do not change ids because those can be used for anchors and
|
|
|
|
thus can be part of public URLs.
|
|
|
|
- The script replace the classes with the longuest name first to prevent
|
|
|
|
bug if one class name is a prefix of another one.
|
|
|
|
- We generate a long perl script to launch perl just once, this make the
|
|
|
|
full find and replace way faster.
|
|
|
|
|
|
|
|
Of course this could be improved by providing the shortest name to the most
|
|
|
|
used classes, and also by using a better =shortName= function that could
|
|
|
|
use more chars.
|
|
|
|
But just this quick and dirty script already does a better work than
|
|
|
|
existing methods that do not take into account all the CSS and HTML files.
|