Removed obsolete stuff and add some scripts
This commit is contained in:
parent
419cc6cc73
commit
855e889966
8 changed files with 4548 additions and 135 deletions
33
recup.pl
33
recup.pl
|
@ -1,33 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
BEGIN{
|
||||
$/="";
|
||||
}
|
||||
|
||||
sub replaceminim {
|
||||
my $begin = $_[0] ;
|
||||
my $end = $_[1] ;
|
||||
my $keep = $_[2] ;
|
||||
|
||||
# protection des X et Y pour qu'ils disparaissent
|
||||
s/X/_wasx_/g;
|
||||
s/Y/_wasy_/g;
|
||||
|
||||
# remplacement de la chaine en un seul caractère
|
||||
s/$begin/X/g; # begin
|
||||
s/$end/Y/g; # end
|
||||
|
||||
# A partir de maintenant tous les X sont les begin
|
||||
# tous les Y sont les ends
|
||||
# on veut récupérer ce qu'il y a à l'intérieur
|
||||
if ($keep) {
|
||||
s/X(([^XY]|\n)*?)Y/$1/gm; # suppression begin..end
|
||||
} else {
|
||||
s/X([^XY]|\n)*?Y//gm; # suppression begin..end
|
||||
}
|
||||
|
||||
# recuperation des X, Y, et begin et end non traites
|
||||
s/X/$begin/g;
|
||||
s/Y/$end/g;
|
||||
s/_wasx_/X/g;
|
||||
s/_wasy_/Y/g;
|
||||
}
|
22
recupall
22
recupall
|
@ -1,22 +0,0 @@
|
|||
#!/usr/bin/env zsh
|
||||
|
||||
root=/home/e640846/Sites/n3blog
|
||||
cd $root
|
||||
for langue in fr en; do
|
||||
\cp -r ../webroot/content/* content/html/$langue
|
||||
for fic in content/html/$langue/**/*.html; do
|
||||
mv -f $fic $(dirname $fic)/$(basename $fic .html).md
|
||||
done
|
||||
\rm -f content/html/$langue/**/*.xml
|
||||
cp recup.pl recup$langue.pl
|
||||
[[ $langue = "fr" ]] && other=en
|
||||
[[ $langue = "en" ]] && other=fr
|
||||
echo 'replaceminim( "'$langue':","::", 1);' >> recup$langue.pl
|
||||
echo 'replaceminim( "'$other':","::", 0);' >> recup$langue.pl
|
||||
echo 'replaceminim( "<'$langue'>", "</'$langue'>", 1);' >> recup$langue.pl
|
||||
echo 'replaceminim( "<'$other'>", "</'$other'>", 0);' >> recup$langue.pl
|
||||
echo 'replaceminim( "!!'$langue'!!", "!!!!", 1);' >> recup$langue.pl
|
||||
echo 'replaceminim( "!!'$other'!!", "!!!!", 0);' >> recup$langue.pl
|
||||
echo 's#date:#created_at:#' >> recup$langue.pl
|
||||
perl -pi recup$langue.pl content/html/$langue/**/*(.)
|
||||
done
|
40
recupen.pl
40
recupen.pl
|
@ -1,40 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
BEGIN{
|
||||
$/="";
|
||||
}
|
||||
|
||||
sub replaceminim {
|
||||
my $begin = $_[0] ;
|
||||
my $end = $_[1] ;
|
||||
my $keep = $_[2] ;
|
||||
|
||||
# protection des X et Y pour qu'ils disparaissent
|
||||
s/X/_wasx_/g;
|
||||
s/Y/_wasy_/g;
|
||||
|
||||
# remplacement de la chaine en un seul caractère
|
||||
s/$begin/X/g; # begin
|
||||
s/$end/Y/g; # end
|
||||
|
||||
# A partir de maintenant tous les X sont les begin
|
||||
# tous les Y sont les ends
|
||||
# on veut récupérer ce qu'il y a à l'intérieur
|
||||
if ($keep) {
|
||||
s/X(([^XY]|\n)*?)Y/$1/gm; # suppression begin..end
|
||||
} else {
|
||||
s/X([^XY]|\n)*?Y//gm; # suppression begin..end
|
||||
}
|
||||
|
||||
# recuperation des X, Y, et begin et end non traites
|
||||
s/X/$begin/g;
|
||||
s/Y/$end/g;
|
||||
s/_wasx_/X/g;
|
||||
s/_wasy_/Y/g;
|
||||
}
|
||||
replaceminim( "en:","::", 1);
|
||||
replaceminim( "fr:","::", 0);
|
||||
replaceminim( "<en>", "</en>", 1);
|
||||
replaceminim( "<fr>", "</fr>", 0);
|
||||
replaceminim( "!!en!!", "!!!!", 1);
|
||||
replaceminim( "!!fr!!", "!!!!", 0);
|
||||
s#date:#created_at:#
|
40
recupfr.pl
40
recupfr.pl
|
@ -1,40 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
BEGIN{
|
||||
$/="";
|
||||
}
|
||||
|
||||
sub replaceminim {
|
||||
my $begin = $_[0] ;
|
||||
my $end = $_[1] ;
|
||||
my $keep = $_[2] ;
|
||||
|
||||
# protection des X et Y pour qu'ils disparaissent
|
||||
s/X/_wasx_/g;
|
||||
s/Y/_wasy_/g;
|
||||
|
||||
# remplacement de la chaine en un seul caractère
|
||||
s/$begin/X/g; # begin
|
||||
s/$end/Y/g; # end
|
||||
|
||||
# A partir de maintenant tous les X sont les begin
|
||||
# tous les Y sont les ends
|
||||
# on veut récupérer ce qu'il y a à l'intérieur
|
||||
if ($keep) {
|
||||
s/X(([^XY]|\n)*?)Y/$1/gm; # suppression begin..end
|
||||
} else {
|
||||
s/X([^XY]|\n)*?Y//gm; # suppression begin..end
|
||||
}
|
||||
|
||||
# recuperation des X, Y, et begin et end non traites
|
||||
s/X/$begin/g;
|
||||
s/Y/$end/g;
|
||||
s/_wasx_/X/g;
|
||||
s/_wasy_/Y/g;
|
||||
}
|
||||
replaceminim( "fr:","::", 1);
|
||||
replaceminim( "en:","::", 0);
|
||||
replaceminim( "<fr>", "</fr>", 1);
|
||||
replaceminim( "<en>", "</en>", 0);
|
||||
replaceminim( "!!fr!!", "!!!!", 1);
|
||||
replaceminim( "!!en!!", "!!!!", 0);
|
||||
s#date:#created_at:#
|
57
tasks/auto_tags/all_frequencies.rb
Executable file
57
tasks/auto_tags/all_frequencies.rb
Executable file
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
class Frequencies
|
||||
|
||||
def initialize
|
||||
@nbwords=0
|
||||
@tag=Hash.new
|
||||
end
|
||||
|
||||
def nbwords
|
||||
@nbwords
|
||||
end
|
||||
|
||||
def is_bad_word(w)
|
||||
if w.length < 3
|
||||
return true
|
||||
end
|
||||
if w !~ /\A(\w|\d)*\Z/
|
||||
return true
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
def update_density(content)
|
||||
content.split(/\b/).each do |m|
|
||||
if is_bad_word(m)
|
||||
next
|
||||
end
|
||||
@nbwords += 1
|
||||
if @tag.has_key?(m)
|
||||
@tag[m]+=1
|
||||
else
|
||||
@tag[m]=1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def show_tags
|
||||
@tag.keys.sort_by do |k|
|
||||
@tag[k]
|
||||
end.each do |k|
|
||||
puts %{#{@tag[k]} #{k}}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
f=Frequencies.new
|
||||
|
||||
ARGV.each do |file|
|
||||
puts '['+file+']'
|
||||
fic=File.open(file)
|
||||
fic.each_line do |l|
|
||||
f.update_density(l)
|
||||
end
|
||||
fic.close
|
||||
end
|
||||
f.show_tags
|
69
tasks/auto_tags/density.rb
Executable file
69
tasks/auto_tags/density.rb
Executable file
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
class Density
|
||||
def density(content)
|
||||
tag=Hash.new
|
||||
nbwords=0
|
||||
content.split(/\b/).each do |m|
|
||||
if m.length < 3
|
||||
next
|
||||
end
|
||||
if m !~ /\A(\w|\d)*\Z/
|
||||
next
|
||||
end
|
||||
nbwords += 1
|
||||
if tag.has_key?(m)
|
||||
tag[m]+=1
|
||||
else
|
||||
tag[m]=1
|
||||
end
|
||||
end
|
||||
nbwords=nbwords.to_f
|
||||
res=Hash.new
|
||||
tag.each do |w,v|
|
||||
if v == 1
|
||||
next
|
||||
end
|
||||
res[w]=v/nbwords
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
|
||||
def initialize
|
||||
fic=File.open('frequencies.val')
|
||||
@sum=0
|
||||
@all_freq=Hash.new
|
||||
fic.each_line do |l|
|
||||
l =~ /([^ ]*) (.*)/
|
||||
nb=$1.to_i
|
||||
@all_freq[$2]=nb
|
||||
@sum+=nb
|
||||
end
|
||||
fic.close
|
||||
end
|
||||
|
||||
|
||||
def show_tags(hash)
|
||||
hash.keys.sort_by do |k|
|
||||
-hash[k]/( @all_freq[k] / @sum )
|
||||
end[0..10].each do |k|
|
||||
puts %{#{hash[k]}: #{k}}
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
freq_for={}
|
||||
d=Density.new
|
||||
ARGV.each do |file|
|
||||
puts '['+file+']'
|
||||
fic=File.open(file)
|
||||
content=""
|
||||
fic.each_line do |l|
|
||||
content <<= l
|
||||
end
|
||||
fic.close
|
||||
freq_for[file]=d.density(content)
|
||||
d.show_tags(freq_for[file])
|
||||
end
|
4350
tasks/auto_tags/frequencies.val
Normal file
4350
tasks/auto_tags/frequencies.val
Normal file
File diff suppressed because it is too large
Load diff
72
tasks/auto_tags/tf.rb
Executable file
72
tasks/auto_tags/tf.rb
Executable file
|
@ -0,0 +1,72 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
def tf(content)
|
||||
tag=Hash.new
|
||||
nbwords=0
|
||||
content.split(/\b/).each do |m|
|
||||
if m.length<3
|
||||
next
|
||||
end
|
||||
if m !~ /\A(\w|\d)*\Z/
|
||||
next
|
||||
end
|
||||
nbwords += 1
|
||||
if tag.has_key?(m)
|
||||
tag[m]+=1
|
||||
else
|
||||
tag[m]=1
|
||||
end
|
||||
end
|
||||
nbwords=nbwords.to_f
|
||||
res=Hash.new
|
||||
tag.each do |w,v|
|
||||
if v == 1
|
||||
next
|
||||
end
|
||||
res[w]=v/nbwords
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
def show_tags(hash)
|
||||
hash.keys.sort_by do |k|
|
||||
-hash[k]
|
||||
end[0..10].each do |k|
|
||||
puts %{#{hash[k]}: #{k}}
|
||||
end
|
||||
end
|
||||
|
||||
freq_for={}
|
||||
allcontent=""
|
||||
ARGV.each do |file|
|
||||
puts '['+file+']'
|
||||
fic=File.open(file)
|
||||
content=""
|
||||
fic.each_line do |l|
|
||||
content <<= l
|
||||
end
|
||||
fic.close
|
||||
freq_for[file]=tf(content)
|
||||
# show_tags(freq_for[file])
|
||||
# puts '==============='
|
||||
allcontent <<= content
|
||||
end
|
||||
|
||||
# puts 'ALL FREQ'
|
||||
all_freq=tf(allcontent)
|
||||
# show_tags(all_freq)
|
||||
# puts '==============='
|
||||
|
||||
res_for={}
|
||||
freq_for.each do |file,hash|
|
||||
puts %{==== Frequencie for #{file} =====}
|
||||
res_for[file]=Hash.new
|
||||
hash.each do |k,v|
|
||||
if all_freq[k] == 0
|
||||
next
|
||||
end
|
||||
res_for[file][k] = v/all_freq[k]
|
||||
# puts %{#{v} / #{all_freq[k]} : #{k}}
|
||||
end
|
||||
show_tags(res_for[file])
|
||||
end
|
Loading…
Reference in a new issue