Commit bdfa81e3 authored by Nicolas Sannier's avatar Nicolas Sannier
Browse files

Pushing ARMLET for legislative text conversion

parent 8cbef046
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleExecutable</key>
<string>GATE</string>
<key>CFBundleGetInfoString</key>
<string>GATE 8.1 - http://gate.ac.uk/</string>
<key>CFBundleIconFile</key>
<string>gate-icon.icns</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>8.0</string>
<key>CFBundleName</key>
<string>GATE</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>8.0</string>
</dict>
</plist>
#!/bin/sh
# Main.java
#
# Copyright (c) 1995-2012, The University of Sheffield. See the file
# COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
#
# This file is part of GATE (see http://gate.ac.uk/), and is free
# software, licenced under the GNU Library General Public License,
# Version 2, June 1991 (in the distribution as file licence.html,
# and also available at http://gate.ac.uk/gate/licence.html).
#
# Hamish Cunningham, 1/Nov/00
#
# $Id: Main.java 15843 2012-05-25 09:50:41Z markagreenwood $
PRG=$0
while [ -h "$PRG" ]; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '^.*-> \(.*\)$' 2>/dev/null`
if expr "$link" : '^/' 2> /dev/null >/dev/null; then
PRG="$link"
else
PRG="`dirname "$PRG"`/$link"
fi
done
progdir=`dirname "$PRG"`
exec "$progdir/../../../bin/gate.sh" \
-Dapple.laf.useScreenMenuBar=true \
-Dcom.apple.mrj.application.growbox.intrudes=true \
-Dapple.awt.antialiasing=true \
-Dcom.apple.mrj.application.live-resize=true \
-Dsun.java2d.opengl=true \
-Xdock:name=GATE \
-Xdock:icon=$progdir/../Resources/gate-icon.icns
#! /bin/sh
# keep only the document content part
# remove XML tags
# remove empty lines
# remove spaces starting a line
# remove spaces ending a line
cat "$1" |
tr '\n\r' ' ' |
sed -r \
-e 's/^.*<TextWithNodes>//g' \
-e 's/<\/TextWithNodes>.*$//g' |
sed -r \
-e 's/<[^>]+>//g' \
-e '/^\s*$/d' \
-e 's/^\s+//g'\
-e 's/\s+$//g'
#! /bin/sh
if [ "$1" = "" -o "$2" = "" -o "$1" = "--help" -o "$2" = "--help" ]; then
cat << eof
Extract statistics from a Gate XML document file.
Usage: $0 gate_document.xml annotation_set_name [feature_name]
To run on a directory use:
for f in \`ls directory\`; do
sh $0 directory/\$f annotation_set_name;
done > results.txt
eof
exit
fi
file=`basename $1`
# keep only the document content part
# remove XML tags
# remove empty lines
# remove spaces starting a line
# remove spaces ending a line
# calculate number of lines, words and characters
# format the result
cat "$1" |
tr '\n\r' ' ' |
sed -r \
-e 's/^.*<TextWithNodes>//g' \
-e 's/<\/TextWithNodes>.*$//g' |
sed -r \
-e 's/<[^>]+>//g' \
-e '/^\s*$/d' \
-e 's/^\s+//g'\
-e 's/\s+$//g' |
wc --lines --words --chars |
sed -r -e 's/^\s*([0-9]+)\s+([0-9]+)\s+([0-9]+)$/'$file' _Lines_ \1\n'$file' _Words_ \2\n'$file' _Characters_ \3/'
echo $file _AnnotationSet_ $2
# keep only the annotation set given in second parameter
# put one annotation type per line
# optionnaly get the name of the feature given in third parameter
# remove all other XML tag
# sort lines
# count each annotation type [and feature]
# format the result
cat "$1" |
tr '\n\r' ' ' |
sed -r \
-e 's/^.*<AnnotationSet Name="'$2'">//g' \
-e 's/<\/AnnotationSet>.*$//g' \
-e 's/<Annotation [^>]+Type="([^"]+)" [^>]+>/\n\1/g' |
sed -r \
-e 's/\s+<Feature>.+<Name [^>]+>('$3')<\/Name>\s+<Value [^>]+>([^<]+).+$/ \1=\2/g' \
-e 's/\s+<Feature>.+$//g' \
-e 's/\s+<\/Annotation>\s+//g' \
-e 's/^<\?xml .+$/This annotation set do not exist !!!/g' \
-e '/^\s*$/d' |
sort --field-separator=' ' --key=1,1 --key=2,2 |
uniq --count |
sort --reverse --numeric-sort |
sed -r -e 's/^\s*([0-9]+)\s+(.+)$/'$file' \2 \1/g'
@echo off
%~dp0..\gate.exe
#!/bin/bash
# Parameters passed to the GATE process
# This array gets populated from the command line parameters given to the
# script. If required, you can set the initial list of parameters here.
gateparams=()
############################################################################
# USERS SHOULD NOT NEED TO MAKE ANY CHANGES BELOW THIS LINE #
############################################################################
#set -x
PRG="$0"
CURDIR="`pwd`"
# need this for relative symlinks
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
GATE_HOME=`dirname "$PRG"`/..
# make it fully qualified
# When CDPATH is set, the cd command prints out the dir name. Because of this
# wee need to execute the cd command separately, and only then get the value
# via `pwd`
cd "$GATE_HOME"
export GATE_HOME="`pwd`"
export ANT_HOME=$GATE_HOME
cd "$CURDIR"
# pull in JVM settings from the Launch4J ini file so we have a single place
# where these things can be set across operarting systems which makes things
# so much easier to document/explain/teach
IFS=$'\r\n'
vmparams=($(cat $GATE_HOME/gate.l4j.ini))
unset IFS
vmparams=( "${vmparams[@]}" "-splash:$GATE_HOME/bin/splash.png" )
DEFAULTSDIR=
tmpbase="config_files_with_this_base_should_never_really_exist_so_we_can_test_for_existence_later"
## function that copies over any config files that do not already exist
## from the DEFAULTSDIR, if the DEFAULTSDIR has been specified
## Expected parms: $1 is base name of config files to create
copy_default_files() {
base=$1
echo copying default files base=$base
if [ -f "$CURDIR/$base.session" ]
then
echo using existing "$CURDIR/$base.session"
else
if [ -f "${DEFAULTSDIR}"/default.session ]
then
echo copying default session from "${DEFAULTSDIR}"/default.session to "$CURDIR/$base.session"
cp "${DEFAULTSDIR}"/default.session "$CURDIR/$base.session"
fi
fi
if [ -f "$CURDIR/$base.xml" ]
then
echo using existing "$CURDIR/$base.xml"
else
if [ -f "${DEFAULTSDIR}"/default.xml ]
then
echo copying default config from "${DEFAULTSDIR}"/default.xml to "$CURDIR/$base.xml"
cp "${DEFAULTSDIR}"/default.xml "$CURDIR/$base.xml"
fi
fi
}
## function that returns, for a root directory and an absolute or
## relative path, the absolute path.
function abs_path() {
rootdir="$1"
path="$2"
case "$path" in
/*)
## this is already an absolute path, use it
path="$path"
;;
*)
path="${rootdir}"/"${path}"
;;
esac
echo "$path"
}
while test "$1" != "";
do
case "$1" in
-h)
cat <<EOF
Run GATE Developer
The following options can be passed immediately after the command name:
-ld ... create or use the GATE default configuration and session files
in the current directory
-ln name ... create or use a config file name.xml and session file name.session
in the current directory
-ll ... if the current directory contains a file log4j.properties use
this file to configure the logging
-rh path ... set the resources home path, this is a shortcut for
-Druntime.gate.user.resourceshome=path
-d URL ... register the plugin at URL. Can be used multiple times.
-i path ... use the file at path as the site configuration file
-dc dir ... copy default.xml and/or default.session from this dir when
creating a new config or session file
(must come before -ld,-ld,-tmp)
-tmp ... use temporary config and session files (-dc option works)
-h ... show this help
All other options will be passed on to the "java" command, for example:
-Djava.io.tmpdir=<somedir>
-Xmx<memorysize>
For more information see the user manual in your GATE distribution or at
http://gate.ac.uk/userguide/
EOF
exit 0
;;
-ld)
shift
vmparams=( "${vmparams[@]}" "-Dgate.user.config=$(abs_path $CURDIR .gate.xml)" )
vmparams=( "${vmparams[@]}" "-Dgate.user.session=$(abs_path $CURDIR .gate.session)" )
vmparams=( "${vmparams[@]}" "-Dgate.user.filechooser.defaultdir=$CURDIR" )
copy_default_files ".gate"
;;
-ln)
shift
base=$1
shift
vmparams=( "${vmparams[@]}" "-Dgate.user.config=$(abs_path $CURDIR $base.xml)" )
vmparams=( "${vmparams[@]}" "-Dgate.user.session=$(abs_path $CURDIR $base.session)" )
vmparams=( "${vmparams[@]}" "-Dgate.user.filechooser.defaultdir=$CURDIR" )
copy_default_files "$base"
;;
-ll)
shift
if [ -f "$CURDIR/log4j.properties" ]
then
vmparams=( "${vmparams[@]}" "-Dlog4j.configuration=file://$CURDIR/log4j.properties" )
fi
;;
-rh)
shift
resourceshome=$1
resourceshome=`cd "$resourceshome"; pwd -P`
shift
vmparams=( "${vmparams[@]}" "-Dgate.user.resourceshome=$resourceshome" )
;;
-d)
shift
gateparams=( "${gateparams[@]}" "-d" "$1" )
shift
;;
-i)
shift
gateparams=( "${gateparams[@]}" "-i" "$1" )
shift
;;
-tmp)
shift
tmpbase=GATE$$
vmparams=( "${vmparams[@]}" "-Dgate.user.config=$CURDIR/$tmpbase.xml" )
vmparams=( "${vmparams[@]}" "-Dgate.user.session=$CURDIR/$tmpbase.session" )
vmparams=( "${vmparams[@]}" "-Dgate.user.filechooser.defaultdir=$CURDIR" )
copy_default_files "$tmpbase"
;;
-dc)
shift
DEFAULTSDIR=$1
shift
;;
*)
vmparams=( "${vmparams[@]}" "$1" )
shift
;;
esac
done
# Locate JAVA
if [ -n "$JAVA_HOME" ]; then
if [ -x "$JAVA_HOME/bin/java" ]; then
JAVACMD="$JAVA_HOME/bin/java"
elif [ -x "$JAVA_HOME/jre/bin/java" ]; then
JAVACMD="$JAVA_HOME/jre/bin/java"
fi
elif ( which java 2>&1 > /dev/null ); then
JAVACMD="`which java`"
elif [ -x /usr/libexec/java_home ]; then
# Mac OS X - use /usr/libexec/java_home -R --exec java ...
JAVACMD=/usr/libexec/java_home
vmparams=( "-R" "--exec" "java" "${vmparams[@]}" )
else
echo "Couldn't find java, please set JAVA_HOME"
exit 1
fi
echo "Running GATE using Java at $JAVACMD"
echo "$JAVACMD" "${vmparams[@]}" -jar "$GATE_HOME/bin/gateLauncher.jar" "${gateparams[@]}"
"$JAVACMD" "${vmparams[@]}" -jar "$GATE_HOME/bin/gateLauncher.jar" "${gateparams[@]}"
## clean up temporary config files if -tmp had been specified
if [ -f "$CURDIR"/"$tmpbase".xml ]
then
rm "$CURDIR"/"$tmpbase".xml
fi
if [ -f "$CURDIR"/"$tmpbase".session ]
then
rm "$CURDIR"/"$tmpbase".session
fi
# Log entries on console and into a file.
log4j.rootLogger=DEBUG, generalfile, stdout
# Console appender that outputs messages of INFO level or higher to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Threshold=INFO
log4j.appender.stdout.Follow=true
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
# we print message type, message and new line.
# The message itself would have information such as
# benchmark ID, code for CheckPoint or ProcessFinished, time stamp, and any features
# provided within the message
log4j.appender.stdout.layout.ConversionPattern=%m%n
# File appender that outputs to a file called logfile.txt
log4j.appender.generalfile=org.apache.log4j.RollingFileAppender
log4j.appender.generalfile.Threshold=DEBUG
log4j.appender.generalfile.File=${user.home}/.gate/logfile.txt
log4j.appender.generalfile.MaxFileSize=5MB
log4j.appender.generalfile.MaxBackupIndex=1
log4j.appender.generalfile.layout=org.apache.log4j.PatternLayout
log4j.appender.generalfile.layout.ConversionPattern=%d{dd HH:mm:ss} %-5.5p %-30.30c{5}: %m%n
# This is another option for appender output. Provides much more detail when needed.
# Comment out the previous line and then uncomment the following one to use it.
#log4j.appender.generalfile.layout.ConversionPattern=%d{dd HH:mm:ss.SSS} %-4.4p [%8.8t] %-30.30c{5}.%15.15M[%4L]: %m%n
# File appender that outputs only benchmark messages
log4j.appender.benchmarklog=org.apache.log4j.RollingFileAppender
log4j.appender.benchmarklog.Threshold=DEBUG
log4j.appender.benchmarklog.File=${user.home}/.gate/benchmark.txt
log4j.appender.benchmarklog.MaxFileSize=5MB
log4j.appender.benchmarklog.MaxBackupIndex=1
log4j.appender.benchmarklog.layout=org.apache.log4j.PatternLayout
log4j.appender.benchmarklog.layout.ConversionPattern=%m%n
# Configure the Benchmark logger so that it only goes to the benchmark log file
log4j.logger.gate.util.Benchmark=DEBUG, benchmarklog
log4j.additivity.gate.util.Benchmark=false
# File appender that outputs only OntoRootGaz messages
log4j.appender.ontorootgazlog=org.apache.log4j.RollingFileAppender
log4j.appender.ontorootgazlog.Threshold=DEBUG
log4j.appender.ontorootgazlog.File=${user.home}/.gate/ontoRootGaz.txt
log4j.appender.ontorootgazlog.MaxFileSize=5MB
log4j.appender.ontorootgazlog.MaxBackupIndex=1
log4j.appender.ontorootgazlog.layout=org.apache.log4j.PatternLayout
log4j.appender.ontorootgazlog.layout.ConversionPattern=%m%n
# Configure the OntoRootGaz logger so that it only goes to the ontoRootGaz log file
log4j.logger.gate.clone.ql.OntoRootGaz=DEBUG, ontorootgazlog
log4j.additivity.gate.clone.ql.OntoRootGaz=false
###############################################################################
# Logger configs - most classes stay at INFO level by default so the
# log isn't flooded. Add on individual classes or packages as needed.
###############################################################################
log4j.logger.gate=INFO
log4j.logger.gate.jape=DEBUG
#log4j.logger.gate.creole.SerialController=DEBUG
#log4j.logger.gate.util.profile.Profiler=DEBUG
# For reference - here are the primary options for the PatternLayout
# %c category name; {n} only uses rightmost n components
# %C fully qualified class. SLOW!
# %d date
# %F file. SLOW!
# %l JVM specific location info. SLOW!
# %L line number. SLOW!
# %p priority
# %m message
# %M method name. SLOW!
# %n newline
# %p priority
# %r program's running time in milliseconds
# %t thread
# %x nested diagnostic context
# %-20.30m will output the message right-padded to 20 spaces, truncated to a max of 30.
#!/usr/bin/perl
# Script to read all the creole.xml files for every plugin
# to produce a summary HTML page (GATE/doc/plugins.html)
# by Andrew Golightly
#
# DO NOT RUN THIS SCRIPT FROM THE COMMAND LINE, use "ant plugins.html" in the
# top-level GATE directory instead.
use strict;
use warnings;
use XML::Simple;
use XML::XPath;
use XML::XPath::XMLParser;
use File::Find;
if(!@ARGV || $ARGV[0] ne "runningFromAnt") {
print "This script should not be run directly. Instead, you should do\n";
print "\"ant plugins.html\" in the top-level GATE directory.\n";
exit 1;
}
# ********** Some constants **********
my $internalPluginsTitle = "Plugins included in the GATE distribution";
my $externalPluginsTitle = "Other contributed plugins";
# Grab all the creole filenames for all the plugins
my @creoleFileList = ();
File::Find::find(
sub {
push (@creoleFileList, $File::Find::name) if $_ eq 'creole.xml';
},
qw(../build/plugins));
# Sort alphabetically, case insensitive
@creoleFileList = sort {uc($a) cmp uc($b)} @creoleFileList;
# **************************************************
print "Extracting information on GATE plugins\n";
print "--------------------------------------\n\n";
# ********** Write HTML for the top of the plugins page **********
# Open file handle to the HTML file we are creating
my $htmlFilename = '../doc/plugins.html';
open(HTMLFILE , ">:utf8", $htmlFilename) || die("Cannot Open File $htmlFilename");
print HTMLFILE <<ENDHTML;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<!--
****** This page is generated automatically by plugins-info-to-HTML.pl. ******
****** Do not edit it manually. To modify the external plugins list at ******
****** the bottom of this page edit external-plugins.html and run ******
****** "ant plugins.html" from the top-level GATE directory. ******
-->
<html>
<head>
<title>List of plugins available to GATE</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="gate.css">
<style type="text/css">
a img {border: none;}
th {background-color: #A0D0F0;}
</style>
</head>
<body>
<center>
<a href="http://gate.ac.uk/"><img src="http://www.gate.ac.uk/gateHeader.gif" alt="GATE" height="76" width="356"/></a>
</center>
<br/>
<!-- top banner ****************************************************** -->
<div class="banner">
<p>Plugins for GATE</p>
</div>
<p>This page lists some of the plugins that are currently available with GATE:</p>
<ul>
<li><a href="#internal-plugins">
ENDHTML
print HTMLFILE "$internalPluginsTitle";
print HTMLFILE <<ENDHTML;
</a></li>
<li><a href="#external-plugins">
ENDHTML
print HTMLFILE $externalPluginsTitle,
<<ENDHTML;
</a></li>
</ul>
<p>For more information on how the plugins work, see the online user guide "<a href="http://gate.ac.uk/sale/tao/#sec:howto:plugins">Developing Language Processing Components with GATE</a>".</p>
<p>To submit a plugin, please contact us via the <a
href="http://www.gate.ac.uk/mail/index.html">gate-users mailing
list</a>.</p>
<hr/>
ENDHTML
# **************************************************
# ********** Write internal plugin information to the HTML file **********
print "Extracting internal plugins information..\n";
print HTMLFILE "<a name='internal-plugins'></a>\n",
"<h2>$internalPluginsTitle</h2>\n",
"<ul type='circle'>";
my @creoleFileData = ();
foreach my $creoleFileName (@creoleFileList)
{
$creoleFileName =~ /plugins\/(.+)\/creole.xml/;
my $xp = XML::XPath->new(filename => $creoleFileName); # parse the XML file
my $nodeset = $xp->find('//RESOURCE'); # find all resources in this creole.xml file..
my @nodes = $nodeset->get_nodelist;
# Ignore plugins that do not define any RESOURCEs
if(@nodes) {
print HTMLFILE "<li><a href='#$1'>$1</a></li>\n";
push @creoleFileData, { NAME => $1,
DATA => $nodeset,
XPATH => $xp };
}
}
print HTMLFILE "</ul>\n",
"<table border='1'>\n";
# foreach plugin creole.xml file...
foreach my $creoleFile (@creoleFileData)
{
my $creoleFileName = $creoleFile->{NAME};
print "$creoleFileName\n";
print HTMLFILE "\t<tr>\n\t\t<th colspan='3'><a name='$creoleFileName'>$creoleFileName</a></th>\n\t</tr>\n";
foreach my $node ($creoleFile->{DATA}->get_nodelist)
{
my $creoleFragment = XML::XPath::XMLParser::as_string($node);
print HTMLFILE "\t<tr>\n";
# NAME
print HTMLFILE "\t\t<td>", $creoleFile->{XPATH}->findvalue('NAME', $node), "&nbsp;</td>\n";
# COMMENT and HELPURL
print HTMLFILE "\t\t<td>", $creoleFile->{XPATH}->findvalue('COMMENT', $node);
if($creoleFile->{XPATH}->exists('HELPURL', $node)) {
print HTMLFILE " (<a href=\"", $creoleFile->{XPATH}->findvalue('HELPURL', $node), "\">docs</a>)";
}
print HTMLFILE "&nbsp;</td>\n";
# CLASS
print HTMLFILE "\t\t<td>", $creoleFile->{XPATH}->findvalue('CLASS', $node), "</td>\n";
print HTMLFILE "\t</tr>\n";
}
}