Removed obsolete stuff and add some scripts

2010-04-19 16:10:13 +02:00 · 2010-04-19 16:10:13 +02:00 · 855e889966
commit 855e889966
parent 419cc6cc73
8 changed files with 4548 additions and 135 deletions
--- a/recup.pl
+++ b/recup.pl
@ -1,33 +0,0 @@
 #!/usr/bin/perl
 BEGIN{
    $/="";
 }
 sub replaceminim {
    my $begin = $_[0] ;
    my $end = $_[1] ;
    my $keep = $_[2] ;
    # protection des X et Y pour qu'ils disparaissent
    s/X/_wasx_/g;
    s/Y/_wasy_/g;
    # remplacement de la chaine en un seul caractère
    s/$begin/X/g;          # begin
    s/$end/Y/g;               # end
    # A partir de maintenant tous les X sont les begin
    # tous les Y sont les ends
    # on veut récupérer ce qu'il y a à l'intérieur
    if ($keep) {
        s/X(([^XY]|\n)*?)Y/$1/gm;   # suppression begin..end
    } else {
        s/X([^XY]|\n)*?Y//gm;   # suppression begin..end
    }
    # recuperation des X, Y, et begin et end non traites
    s/X/$begin/g;
    s/Y/$end/g;
    s/_wasx_/X/g;
    s/_wasy_/Y/g;
 }
--- a/22
+++ b/22
@ -1,22 +0,0 @@
 #!/usr/bin/env zsh
 root=/home/e640846/Sites/n3blog
 cd $root
 for langue in fr en; do
    \cp -r ../webroot/content/* content/html/$langue
    for fic in content/html/$langue/**/*.html; do
        mv -f $fic $(dirname $fic)/$(basename $fic .html).md
    done
    \rm -f content/html/$langue/**/*.xml
    cp recup.pl recup$langue.pl
    [[ $langue = "fr" ]] && other=en
    [[ $langue = "en" ]] && other=fr
    echo 'replaceminim( "'$langue':","::", 1);' >> recup$langue.pl
    echo 'replaceminim( "'$other':","::", 0);' >> recup$langue.pl
    echo 'replaceminim( "<'$langue'>", "</'$langue'>", 1);' >> recup$langue.pl
    echo 'replaceminim( "<'$other'>", "</'$other'>", 0);' >> recup$langue.pl
    echo 'replaceminim( "!!'$langue'!!", "!!!!", 1);' >> recup$langue.pl
    echo 'replaceminim( "!!'$other'!!", "!!!!", 0);' >> recup$langue.pl
    echo 's#date:#created_at:#' >> recup$langue.pl
    perl -pi recup$langue.pl content/html/$langue/**/*(.)
 done
--- a/recupen.pl
+++ b/recupen.pl
@ -1,40 +0,0 @@
 #!/usr/bin/perl
 BEGIN{
    $/="";
 }
 sub replaceminim {
    my $begin = $_[0] ;
    my $end = $_[1] ;
    my $keep = $_[2] ;
    # protection des X et Y pour qu'ils disparaissent
    s/X/_wasx_/g;
    s/Y/_wasy_/g;
    # remplacement de la chaine en un seul caractère
    s/$begin/X/g;          # begin
    s/$end/Y/g;               # end
    # A partir de maintenant tous les X sont les begin
    # tous les Y sont les ends
    # on veut récupérer ce qu'il y a à l'intérieur
    if ($keep) {
        s/X(([^XY]|\n)*?)Y/$1/gm;   # suppression begin..end
    } else {
        s/X([^XY]|\n)*?Y//gm;   # suppression begin..end
    }
    # recuperation des X, Y, et begin et end non traites
    s/X/$begin/g;
    s/Y/$end/g;
    s/_wasx_/X/g;
    s/_wasy_/Y/g;
 }
 replaceminim( "en:","::", 1);
 replaceminim( "fr:","::", 0);
 replaceminim( "<en>", "</en>", 1);
 replaceminim( "<fr>", "</fr>", 0);
 replaceminim( "!!en!!", "!!!!", 1);
 replaceminim( "!!fr!!", "!!!!", 0);
 s#date:#created_at:#
--- a/recupfr.pl
+++ b/recupfr.pl
@ -1,40 +0,0 @@
 #!/usr/bin/perl
 BEGIN{
    $/="";
 }
 sub replaceminim {
    my $begin = $_[0] ;
    my $end = $_[1] ;
    my $keep = $_[2] ;
    # protection des X et Y pour qu'ils disparaissent
    s/X/_wasx_/g;
    s/Y/_wasy_/g;
    # remplacement de la chaine en un seul caractère
    s/$begin/X/g;          # begin
    s/$end/Y/g;               # end
    # A partir de maintenant tous les X sont les begin
    # tous les Y sont les ends
    # on veut récupérer ce qu'il y a à l'intérieur
    if ($keep) {
        s/X(([^XY]|\n)*?)Y/$1/gm;   # suppression begin..end
    } else {
        s/X([^XY]|\n)*?Y//gm;   # suppression begin..end
    }
    # recuperation des X, Y, et begin et end non traites
    s/X/$begin/g;
    s/Y/$end/g;
    s/_wasx_/X/g;
    s/_wasy_/Y/g;
 }
 replaceminim( "fr:","::", 1);
 replaceminim( "en:","::", 0);
 replaceminim( "<fr>", "</fr>", 1);
 replaceminim( "<en>", "</en>", 0);
 replaceminim( "!!fr!!", "!!!!", 1);
 replaceminim( "!!en!!", "!!!!", 0);
 s#date:#created_at:#
--- a/tasks/auto_tags/all_frequencies.rb
+++ b/tasks/auto_tags/all_frequencies.rb
@ -0,0 +1,57 @@
 #!/usr/bin/env ruby
 class Frequencies
    def initialize
        @nbwords=0
        @tag=Hash.new
    end
    def nbwords
        @nbwords
    end
    def is_bad_word(w)
        if w.length < 3
            return true
        end
        if  w !~ /\A(\w|\d)*\Z/
            return true
        end
        return false
    end
    def update_density(content)
        content.split(/\b/).each do |m|
            if is_bad_word(m)
                next
            end
            @nbwords += 1
            if @tag.has_key?(m)
                @tag[m]+=1
            else
                @tag[m]=1
            end
        end
    end
    def show_tags
        @tag.keys.sort_by do |k| 
            @tag[k]
        end.each do |k| 
            puts %{#{@tag[k]} #{k}}
        end
    end
 end
 f=Frequencies.new
 ARGV.each do |file|
    puts '['+file+']'
    fic=File.open(file)
    fic.each_line do |l|
        f.update_density(l)
    end
    fic.close
 end
 f.show_tags
--- a/tasks/auto_tags/density.rb
+++ b/tasks/auto_tags/density.rb
@ -0,0 +1,69 @@
 #!/usr/bin/env ruby
 class Density
    def density(content)
        tag=Hash.new
        nbwords=0
        content.split(/\b/).each do |m|
            if m.length < 3
                next
            end
            if  m !~ /\A(\w|\d)*\Z/
                next
            end
            nbwords += 1
            if tag.has_key?(m)
                tag[m]+=1
            else
                tag[m]=1
            end
        end
        nbwords=nbwords.to_f
        res=Hash.new
        tag.each do |w,v|
            if v == 1
                next
            end
            res[w]=v/nbwords
        end
        return res
    end
    def initialize
        fic=File.open('frequencies.val')
        @sum=0
        @all_freq=Hash.new
        fic.each_line do |l|
            l =~ /([^ ]*) (.*)/
                nb=$1.to_i
            @all_freq[$2]=nb
            @sum+=nb
        end
        fic.close
    end
    def show_tags(hash)
        hash.keys.sort_by do |k| 
            -hash[k]/( @all_freq[k] / @sum )
        end[0..10].each do |k| 
            puts %{#{hash[k]}: #{k}}
        end
    end
 end
 freq_for={}
 d=Density.new
 ARGV.each do |file|
    puts '['+file+']'
    fic=File.open(file)
    content=""
    fic.each_line do |l|
        content <<= l
    end
    fic.close
    freq_for[file]=d.density(content)
    d.show_tags(freq_for[file])
 end
--- a/tasks/auto_tags/frequencies.val
+++ b/tasks/auto_tags/frequencies.val
--- a/tasks/auto_tags/tf.rb
+++ b/tasks/auto_tags/tf.rb
@ -0,0 +1,72 @@
 #!/usr/bin/env ruby
 def tf(content)
    tag=Hash.new
    nbwords=0
    content.split(/\b/).each do |m|
        if m.length<3
           next 
        end
        if  m !~ /\A(\w|\d)*\Z/
            next
        end
        nbwords += 1
        if tag.has_key?(m)
            tag[m]+=1
        else
            tag[m]=1
        end
    end
    nbwords=nbwords.to_f
    res=Hash.new
    tag.each do |w,v|
        if v == 1
            next
        end
        res[w]=v/nbwords
    end
    return res
 end
 def show_tags(hash)
    hash.keys.sort_by do |k| 
        -hash[k]
    end[0..10].each do |k| 
        puts %{#{hash[k]}: #{k}}
    end
 end
 freq_for={}
 allcontent=""
 ARGV.each do |file|
    puts '['+file+']'
    fic=File.open(file)
    content=""
    fic.each_line do |l|
        content <<= l
    end
    fic.close
    freq_for[file]=tf(content)
    # show_tags(freq_for[file])
    # puts '==============='
    allcontent <<= content
 end
 # puts 'ALL FREQ'
 all_freq=tf(allcontent)
 # show_tags(all_freq)
 # puts '==============='
 res_for={}
 freq_for.each do |file,hash|
    puts %{==== Frequencie for #{file} =====}
    res_for[file]=Hash.new
    hash.each do |k,v| 
        if all_freq[k] == 0
            next
        end
        res_for[file][k] = v/all_freq[k]
        # puts %{#{v} / #{all_freq[k]} : #{k}}
    end
    show_tags(res_for[file])
 end