Tags

,

My Solr client talks with a proxy application which talks with remote Solr Server to get data. 
In previous post, Solr: Use JSON(GSon) Streaming to Reduce Memory Usage

I described the problem we faced, how to use JSON(GSon) Streaming, and also some other approaches to reduce memory usage. In this post I will use XML SAX Parser to iterative xml response stream. In next post I will introduce how to use Stax Parser to parse XML response.
Implementation
The code to use SAX to read document one by one from http stream:
— Use SAX parser and Java Executors Future to wait all thread finished: all docs imported.

private static ImportedResult handleXMLResponseViaSax(
      SolrQueryRequest request, InputStream in, int fetchSize)
      throws IOException, ParserConfigurationException, SAXException {
    ImportedResult importedResult = new ImportedResult();
    SAXParserFactory parserFactor = SAXParserFactory.newInstance();
    SAXParser parser = parserFactor.newSAXParser();
    SolrResponseHandler handler = new SolrResponseHandler(request);
    parser.parse(in, handler);
    
    importedResult.setFetched(handler.fetchedSize);
    importedResult
        .setHasMore((handler.fetchedSize + handler.start) < handler.numFound);
    importedResult.setImportedData((handler.fetchedSize != 0));
    
    System.out.println("fetchedSize: " + handler.fetchedSize);
    return importedResult;
  }
  
  private static class SolrResponseHandler extends DefaultHandler {
    protected int fetchedSize = 0;
    protected int numFound = -1, start = -1;
    protected String contentid, bindoc = null;
    protected List<Future<Void>> futures = new ArrayList<Future<Void>>();
    
    String curName, curValue;
    private SolrQueryRequest request;
    
    public SolrResponseHandler(SolrQueryRequest request) {
      this.request = request;
    }
    
    @Override
    public void startElement(String uri, String localName, String qName,
        Attributes attributes) throws SAXException {
      
      switch (qName) {
        case "result": {
          numFound = Integer.valueOf(attributes.getValue("numFound"));
          start = Integer.valueOf(attributes.getValue("start"));
          break;
        }
        case "str": {
          String name = attributes.getValue("name");
          if ("contentid".equals(name)) {
            curName = "contentid";
          } else if ("bindoc".equals(name)) {
            curName = "bindoc";
          }
          break;
        }
        default:
          break;
      }
    }
    
    @Override
    public void endElement(String uri, String localName, String qName)
        throws SAXException {
      switch (qName) {
        case "str": {
          if ("contentid".equals(curName)) {
            contentid = curValue;
          } else if ("bindoc".equals(curName)) {
            bindoc = curValue;
          }
          break;
        }
        case "doc": {
          ++fetchedSize;
          futures.add(CVSyncDataImporter.getInstance().importData(request,
              contentid, bindoc));
          break;
        }
        default:
          break;
      }
    }
    @Override
    public void characters(char[] ch, int start, int length)
        throws SAXException {
      curValue = String.copyValueOf(ch, start, length).trim();
    }
  }

Resources
Parsing XML using DOM, SAX and StAX Parser in Java
Java SAX vs. StAX

via Blogger http://lifelongprogrammer.blogspot.com/2013/10/solr-use-sax-parser-to-reduce-memory-usage.html

Advertisements