001package votorola.a; // Copyright 2012, Michael Allan.  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Votorola Software"), to deal in the Votorola Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicence, and/or sell copies of the Votorola Software, and to permit persons to whom the Votorola Software is furnished to do so, subject to the following conditions: The preceding copyright notice and this permission notice shall be included in all copies or substantial portions of the Votorola Software. THE VOTOROLA SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE VOTOROLA SOFTWARE OR THE USE OR OTHER DEALINGS IN THE VOTOROLA SOFTWARE.
002
003import com.google.gson.stream.*;
004import java.io.*;
005import java.net.*;
006import java.util.*;
007import votorola.g.lang.*;
008
009
010/** A reader of Semantic MediaWiki properties for a cached pollwiki page.  It reads the
011  * ordinary properties of the page, as well as the special "URL" property (undocumented
012  * as of Semantic MediaWiki 1.7.1).
013  */
014public final class PagePropertyReader implements Closeable
015{
016
017    // FIX add constructor for cacheless query via SMW 1.7 query API, per WikiCache.churn
018
019
020    /** Creates a PagePropertyReader.
021      *
022      *     @param _fullPageName the full name of the page, including any namespace.
023      *
024      *     @see #properties()
025      */
026    public PagePropertyReader( final WikiCache wikiCache, String _fullPageName,
027      final PageProperty... _properties ) throws IOException
028    {
029        fullPageName = _fullPageName;
030        properties = _properties;
031        if( fullPageName == null ) throw new NullPointerException(); // fail fast
032
033        in = new JsonReader( new BufferedReader( new InputStreamReader(
034          wikiCache.openRDF_JSON(fullPageName), "UTF-8" )));
035        in.beginObject();
036        init: while( in.hasNext() )
037        {
038            String name = in.nextName();
039            if( "results".equals( name ))
040            {
041                in.beginObject();
042                while( in.hasNext() )
043                {
044                    name = in.nextName();
045                    if( "bindings".equals( name ))
046                    {
047                        in.beginArray();
048                        next();
049                        break init;
050                    }
051                    else in.skipValue();
052                }
053            }
054            else in.skipValue();
055        }
056    }
057
058
059
060   // ------------------------------------------------------------------------------------
061
062
063    /** Answers whether a property remains to be read.  Returns true if the page has an
064      * unread property that matches one of {@linkplain #properties() those requested}.
065      */
066    public boolean hasNext() { return hasNext; }
067
068
069        private boolean hasNext;
070
071
072
073    /** The properties to read.
074      */
075    private PageProperty[] properties() { return properties; }
076
077
078        private final PageProperty[] properties;
079
080
081
082    /** Reads the next property from the page that matches one of {@linkplain
083      * #properties() those requested}.  Returns the property with the value filled in
084      * from the page.
085      *
086      *     @throws NoSuchElementException if no property remains to be read.
087      */
088    public PageProperty read() throws IOException
089    {
090        if( !hasNext ) throw new NoSuchElementException();
091
092        final PageProperty p = nextProperty;
093        p.setValue( nextValue );
094        next();
095        return p;
096    }
097
098
099
100    /** Reads the page properties until either each of {@linkplain #properties() those
101      * requested} has a value, or no more properties remain in the page.  Use this method
102      * when all requested properties are single valued, or when you intend to ignore any
103      * additional values they might have.  In the latter case, which of the multiple
104      * values is filled in by this method is undefined; viz. it is not guaranteed to be
105      * the first value.
106      */
107    public void readAllRequested() throws IOException
108    {
109        if( !hasNext ) return;
110
111        readAll: for( ;; )
112        {
113            int valuelessCount = 0;
114            for( PageProperty p: properties ) if( p.getValue() == null ) ++valuelessCount;
115            if( valuelessCount == 0 ) break readAll; // all have values filled in
116
117            readMin: while( valuelessCount > 0 ) // read the minimum that *might* fill all values
118            {
119                read();
120                if( !hasNext ) break readAll; // no more properties in page
121
122                --valuelessCount;
123            }
124            // loop back to recount; a property may have been read twice, leaving an empty value
125        }
126    }
127
128
129
130   // - C l o s e a b l e ----------------------------------------------------------------
131
132
133    /** Does nothing but close the underlying reader.  This reader itself need not be
134      * closed.
135      */
136    public void close() throws IOException { in.close(); }
137
138
139
140//// P r i v a t e ///////////////////////////////////////////////////////////////////////
141
142
143    /** Attempts to decode a Semantic MediaWiki URIResolver subject value in the form
144      * Special:URIResolver/SMW_ENCODED_FULLPAGE_NAME.  The formatting and encoding is
145      * apparently done in <code>includes/export/SMWExporter</code>.
146      *
147      *     @return the full page name, or null if none can be decoded.
148      */
149    private static String decodedFullPageName( final String subjectValue )
150    {
151        return decodedPageName( subjectValue, "/Special:URIResolver/" );
152    }
153
154
155
156    private static String decodedPageName( final String value, final String marker )
157    {
158        int c = value.indexOf( marker );
159        if( c == -1 ) return null; // not so encoded
160
161        c += marker.length();
162        final int cN = value.length();
163        final StringBuilder b = new StringBuilder( cN - c );
164        for(; c < cN; ++c )
165        {
166            char ch = value.charAt( c );
167            if( ch == '-' ) ch = '%';
168            else if( ch == '_' ) ch = ' ';
169            b.append( ch );
170        }
171        try{ return URLDecoder.decode( b.toString(), "UTF-8" ); }
172        catch( UnsupportedEncodingException x ) { throw new RuntimeException( x ); }
173    }
174
175
176
177    /** Attempts to decode a Semantic MediaWiki URIResolver predicate value in the form
178      * Special:URIResolver/SMW_ENCODED_FULLPAGE_NAME.  The formatting and encoding is
179      * apparently done in <code>includes/export/SMWExporter</code>.
180      *
181      *     @return the short property name, or null if none can be decoded.
182      */
183    private static String decodedPropertyName( final String predicateValue )
184    {
185        final String name;
186        if( predicateValue.endsWith( "#specialProperty_uri" ))
187        {
188            // ensure this robust, short encoding^ maps 1:1 with full encoding:
189            assert predicateValue.equals( "http://semantic-mediawiki.org/swivt/1.0#specialProperty_uri" );
190
191            name = "URL"; /* for some undocumented reason, what was formerly an ordinary
192              property "URL" received a special encoding at or before SMW 1.7.1 */
193        }
194        else name = decodedPageName( predicateValue, "/Special:URIResolver/Property-3A" );
195        return name;
196    }
197
198
199
200    private String findValue() throws IOException
201    {
202        while( in.hasNext() )
203        {
204            final String name = in.nextName();
205            if( "value".equals( name )) return in.nextString();
206
207            in.skipValue();
208        }
209        throw new IllegalStateException();
210    }
211
212
213
214    private final String fullPageName;
215
216
217
218    private final JsonReader in;
219
220
221
222    private void next() throws IOException
223    {
224        while( in.hasNext() )
225        {
226            in.beginObject(); // single binding
227            try
228            {
229                String name;
230                String value;
231
232              // Subject.
233              // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
234                name = in.nextName();
235                assert "s".equals( name );
236                in.beginObject();
237                value = decodedFullPageName( findValue() );
238                skipAndEndObject(); // s
239                if( fullPageName.equals( value ))
240                {
241                  // Predicate.
242                  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
243                    name = in.nextName();
244                    assert "p".equals( name );
245                    in.beginObject();
246                    value = decodedPropertyName( findValue() );
247                    skipAndEndObject(); // p
248                    for( PageProperty property: properties ) if( property.name().equals( value ))
249                    {
250                        nextProperty = property;
251
252                      // Object.
253                      // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
254                        name = in.nextName();
255                        assert "o".equals( name );
256                        in.beginObject();
257                        name = in.nextName();
258                        assert "type".equals( name );
259                        final String type = in.nextString();
260                        value = findValue();
261                        skipAndEndObject(); // o
262                        if( "uri".equals( type ))
263                        {
264                            nextValue = decodedFullPageName( value );
265                            if( nextValue == null ) nextValue = value; // undecodeable, leave as such
266                        }
267                        else nextValue = value;
268                        hasNext = true;
269                        return;
270                    }
271                }
272            }
273            finally{ skipAndEndObject(); } // single binding
274        }
275        hasNext = false;
276    }
277
278
279
280    private PageProperty nextProperty;
281
282
283
284    private String nextValue;
285
286
287
288    private void skipAndEndObject() throws IOException
289    {
290        while( in.hasNext() ) in.skipValue(); in.endObject();
291    }
292
293
294}