Removed obsolete stuff and add some scripts

This commit is contained in:
Yann Esposito (Yogsototh) 2010-04-19 16:10:13 +02:00
parent 419cc6cc73
commit 855e889966
8 changed files with 4548 additions and 135 deletions

View file

@ -1,33 +0,0 @@
#!/usr/bin/perl
BEGIN{
$/="";
}
sub replaceminim {
my $begin = $_[0] ;
my $end = $_[1] ;
my $keep = $_[2] ;
# protection des X et Y pour qu'ils disparaissent
s/X/_wasx_/g;
s/Y/_wasy_/g;
# remplacement de la chaine en un seul caractère
s/$begin/X/g; # begin
s/$end/Y/g; # end
# A partir de maintenant tous les X sont les begin
# tous les Y sont les ends
# on veut récupérer ce qu'il y a à l'intérieur
if ($keep) {
s/X(([^XY]|\n)*?)Y/$1/gm; # suppression begin..end
} else {
s/X([^XY]|\n)*?Y//gm; # suppression begin..end
}
# recuperation des X, Y, et begin et end non traites
s/X/$begin/g;
s/Y/$end/g;
s/_wasx_/X/g;
s/_wasy_/Y/g;
}

View file

@ -1,22 +0,0 @@
#!/usr/bin/env zsh
root=/home/e640846/Sites/n3blog
cd $root
for langue in fr en; do
\cp -r ../webroot/content/* content/html/$langue
for fic in content/html/$langue/**/*.html; do
mv -f $fic $(dirname $fic)/$(basename $fic .html).md
done
\rm -f content/html/$langue/**/*.xml
cp recup.pl recup$langue.pl
[[ $langue = "fr" ]] && other=en
[[ $langue = "en" ]] && other=fr
echo 'replaceminim( "'$langue':","::", 1);' >> recup$langue.pl
echo 'replaceminim( "'$other':","::", 0);' >> recup$langue.pl
echo 'replaceminim( "<'$langue'>", "</'$langue'>", 1);' >> recup$langue.pl
echo 'replaceminim( "<'$other'>", "</'$other'>", 0);' >> recup$langue.pl
echo 'replaceminim( "!!'$langue'!!", "!!!!", 1);' >> recup$langue.pl
echo 'replaceminim( "!!'$other'!!", "!!!!", 0);' >> recup$langue.pl
echo 's#date:#created_at:#' >> recup$langue.pl
perl -pi recup$langue.pl content/html/$langue/**/*(.)
done

View file

@ -1,40 +0,0 @@
#!/usr/bin/perl
BEGIN{
$/="";
}
sub replaceminim {
my $begin = $_[0] ;
my $end = $_[1] ;
my $keep = $_[2] ;
# protection des X et Y pour qu'ils disparaissent
s/X/_wasx_/g;
s/Y/_wasy_/g;
# remplacement de la chaine en un seul caractère
s/$begin/X/g; # begin
s/$end/Y/g; # end
# A partir de maintenant tous les X sont les begin
# tous les Y sont les ends
# on veut récupérer ce qu'il y a à l'intérieur
if ($keep) {
s/X(([^XY]|\n)*?)Y/$1/gm; # suppression begin..end
} else {
s/X([^XY]|\n)*?Y//gm; # suppression begin..end
}
# recuperation des X, Y, et begin et end non traites
s/X/$begin/g;
s/Y/$end/g;
s/_wasx_/X/g;
s/_wasy_/Y/g;
}
replaceminim( "en:","::", 1);
replaceminim( "fr:","::", 0);
replaceminim( "<en>", "</en>", 1);
replaceminim( "<fr>", "</fr>", 0);
replaceminim( "!!en!!", "!!!!", 1);
replaceminim( "!!fr!!", "!!!!", 0);
s#date:#created_at:#

View file

@ -1,40 +0,0 @@
#!/usr/bin/perl
BEGIN{
$/="";
}
sub replaceminim {
my $begin = $_[0] ;
my $end = $_[1] ;
my $keep = $_[2] ;
# protection des X et Y pour qu'ils disparaissent
s/X/_wasx_/g;
s/Y/_wasy_/g;
# remplacement de la chaine en un seul caractère
s/$begin/X/g; # begin
s/$end/Y/g; # end
# A partir de maintenant tous les X sont les begin
# tous les Y sont les ends
# on veut récupérer ce qu'il y a à l'intérieur
if ($keep) {
s/X(([^XY]|\n)*?)Y/$1/gm; # suppression begin..end
} else {
s/X([^XY]|\n)*?Y//gm; # suppression begin..end
}
# recuperation des X, Y, et begin et end non traites
s/X/$begin/g;
s/Y/$end/g;
s/_wasx_/X/g;
s/_wasy_/Y/g;
}
replaceminim( "fr:","::", 1);
replaceminim( "en:","::", 0);
replaceminim( "<fr>", "</fr>", 1);
replaceminim( "<en>", "</en>", 0);
replaceminim( "!!fr!!", "!!!!", 1);
replaceminim( "!!en!!", "!!!!", 0);
s#date:#created_at:#

View file

@ -0,0 +1,57 @@
#!/usr/bin/env ruby
class Frequencies
def initialize
@nbwords=0
@tag=Hash.new
end
def nbwords
@nbwords
end
def is_bad_word(w)
if w.length < 3
return true
end
if w !~ /\A(\w|\d)*\Z/
return true
end
return false
end
def update_density(content)
content.split(/\b/).each do |m|
if is_bad_word(m)
next
end
@nbwords += 1
if @tag.has_key?(m)
@tag[m]+=1
else
@tag[m]=1
end
end
end
def show_tags
@tag.keys.sort_by do |k|
@tag[k]
end.each do |k|
puts %{#{@tag[k]} #{k}}
end
end
end
f=Frequencies.new
ARGV.each do |file|
puts '['+file+']'
fic=File.open(file)
fic.each_line do |l|
f.update_density(l)
end
fic.close
end
f.show_tags

69
tasks/auto_tags/density.rb Executable file
View file

@ -0,0 +1,69 @@
#!/usr/bin/env ruby
class Density
def density(content)
tag=Hash.new
nbwords=0
content.split(/\b/).each do |m|
if m.length < 3
next
end
if m !~ /\A(\w|\d)*\Z/
next
end
nbwords += 1
if tag.has_key?(m)
tag[m]+=1
else
tag[m]=1
end
end
nbwords=nbwords.to_f
res=Hash.new
tag.each do |w,v|
if v == 1
next
end
res[w]=v/nbwords
end
return res
end
def initialize
fic=File.open('frequencies.val')
@sum=0
@all_freq=Hash.new
fic.each_line do |l|
l =~ /([^ ]*) (.*)/
nb=$1.to_i
@all_freq[$2]=nb
@sum+=nb
end
fic.close
end
def show_tags(hash)
hash.keys.sort_by do |k|
-hash[k]/( @all_freq[k] / @sum )
end[0..10].each do |k|
puts %{#{hash[k]}: #{k}}
end
end
end
freq_for={}
d=Density.new
ARGV.each do |file|
puts '['+file+']'
fic=File.open(file)
content=""
fic.each_line do |l|
content <<= l
end
fic.close
freq_for[file]=d.density(content)
d.show_tags(freq_for[file])
end

File diff suppressed because it is too large Load diff

72
tasks/auto_tags/tf.rb Executable file
View file

@ -0,0 +1,72 @@
#!/usr/bin/env ruby
def tf(content)
tag=Hash.new
nbwords=0
content.split(/\b/).each do |m|
if m.length<3
next
end
if m !~ /\A(\w|\d)*\Z/
next
end
nbwords += 1
if tag.has_key?(m)
tag[m]+=1
else
tag[m]=1
end
end
nbwords=nbwords.to_f
res=Hash.new
tag.each do |w,v|
if v == 1
next
end
res[w]=v/nbwords
end
return res
end
def show_tags(hash)
hash.keys.sort_by do |k|
-hash[k]
end[0..10].each do |k|
puts %{#{hash[k]}: #{k}}
end
end
freq_for={}
allcontent=""
ARGV.each do |file|
puts '['+file+']'
fic=File.open(file)
content=""
fic.each_line do |l|
content <<= l
end
fic.close
freq_for[file]=tf(content)
# show_tags(freq_for[file])
# puts '==============='
allcontent <<= content
end
# puts 'ALL FREQ'
all_freq=tf(allcontent)
# show_tags(all_freq)
# puts '==============='
res_for={}
freq_for.each do |file,hash|
puts %{==== Frequencie for #{file} =====}
res_for[file]=Hash.new
hash.each do |k,v|
if all_freq[k] == 0
next
end
res_for[file][k] = v/all_freq[k]
# puts %{#{v} / #{all_freq[k]} : #{k}}
end
show_tags(res_for[file])
end