M
OK, ich habs jetzt:
// Lizenz: GPL
// Aufruf: javac Indexer.java && java -Dorg.xml.sax.driver=org.apache.crimson.parser.XMLReaderImpl -DentityExpansionLimit=512000 Indexer wiktionary.xml > wiktionary.idx
import java.io.*;
import org.xml.sax.*;
import org.xml.sax.ext.*;
import org.xml.sax.helpers.*;
public class Indexer extends DefaultHandler
{
static File f2;
static FileReader fr;
static File f;
static FileInputStream fis;
static InputSource is;
static boolean openTag = false;
static long pos = 0;
static Locator lastPos;
static long counter = 0;
static long bytesRead = 0;
public static void main(String args[]) throws Exception
{
f2 = new File(args[0]);
fr = new FileReader(f2);
XMLReader xr = XMLReaderFactory.createXMLReader();
Indexer handler = new Indexer();
xr.setContentHandler(handler);
xr.setErrorHandler(handler);
f = new File(args[0]);
fis = new FileInputStream(f);
is = new InputSource(fis);
xr.parse(is);
}
public void setDocumentLocator(Locator locator)
{
lastPos = locator;
}
public void startElement (String uri, String name, String qName, Attributes atts)
{
try
{
String tag;
if (name.equals ("")) tag = qName;
else tag = name;
if (tag.equals("page"))
{
int ch = 0;
do
{
ch = fr.read();
if (ch == '\n') counter++;
bytesRead++;
}
while(counter < lastPos.getLineNumber() - 1);
pos = bytesRead;
}
else if (tag.equals("title"))
{
System.out.print("<page>\n<title>");
openTag = true;
}
}
catch (Exception e)
{
System.err.println(e.getMessage());
}
}
public void endElement (String uri, String name, String qName)
{
String tag;
if (name.equals ("")) tag = qName;
else tag = name;
if (tag.equals("title"))
{
openTag = false;
System.out.print("</title>\n");
System.out.print("<pos>" + pos + "</pos>\n</page>\n");
}
}
public void characters (char ch[], int start, int length)
{
if (openTag)
{
System.out.print(new String(ch, start, length));
}
}
public Indexer()
{
super();
}
}
Vielen Dank für deine Hilfe, Roar!