001package votorola.a; // Copyright 2012, Michael Allan. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Votorola Software"), to deal in the Votorola Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicence, and/or sell copies of the Votorola Software, and to permit persons to whom the Votorola Software is furnished to do so, subject to the following conditions: The preceding copyright notice and this permission notice shall be included in all copies or substantial portions of the Votorola Software. THE VOTOROLA SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE VOTOROLA SOFTWARE OR THE USE OR OTHER DEALINGS IN THE VOTOROLA SOFTWARE. 002 003import com.google.gson.stream.*; 004import java.io.*; 005import java.net.*; 006import java.util.*; 007import votorola.g.lang.*; 008 009 010/** A reader of Semantic MediaWiki properties for a cached pollwiki page. It reads the 011 * ordinary properties of the page, as well as the special "URL" property (undocumented 012 * as of Semantic MediaWiki 1.7.1). 013 */ 014public final class PagePropertyReader implements Closeable 015{ 016 017 // FIX add constructor for cacheless query via SMW 1.7 query API, per WikiCache.churn 018 019 020 /** Creates a PagePropertyReader. 021 * 022 * @param _fullPageName the full name of the page, including any namespace. 023 * 024 * @see #properties() 025 */ 026 public PagePropertyReader( final WikiCache wikiCache, String _fullPageName, 027 final PageProperty... _properties ) throws IOException 028 { 029 fullPageName = _fullPageName; 030 properties = _properties; 031 if( fullPageName == null ) throw new NullPointerException(); // fail fast 032 033 in = new JsonReader( new BufferedReader( new InputStreamReader( 034 wikiCache.openRDF_JSON(fullPageName), "UTF-8" ))); 035 in.beginObject(); 036 init: while( in.hasNext() ) 037 { 038 String name = in.nextName(); 039 if( "results".equals( name )) 040 { 041 in.beginObject(); 042 while( in.hasNext() ) 043 { 044 name = in.nextName(); 045 if( "bindings".equals( name )) 046 { 047 in.beginArray(); 048 next(); 049 break init; 050 } 051 else in.skipValue(); 052 } 053 } 054 else in.skipValue(); 055 } 056 } 057 058 059 060 // ------------------------------------------------------------------------------------ 061 062 063 /** Answers whether a property remains to be read. Returns true if the page has an 064 * unread property that matches one of {@linkplain #properties() those requested}. 065 */ 066 public boolean hasNext() { return hasNext; } 067 068 069 private boolean hasNext; 070 071 072 073 /** The properties to read. 074 */ 075 private PageProperty[] properties() { return properties; } 076 077 078 private final PageProperty[] properties; 079 080 081 082 /** Reads the next property from the page that matches one of {@linkplain 083 * #properties() those requested}. Returns the property with the value filled in 084 * from the page. 085 * 086 * @throws NoSuchElementException if no property remains to be read. 087 */ 088 public PageProperty read() throws IOException 089 { 090 if( !hasNext ) throw new NoSuchElementException(); 091 092 final PageProperty p = nextProperty; 093 p.setValue( nextValue ); 094 next(); 095 return p; 096 } 097 098 099 100 /** Reads the page properties until either each of {@linkplain #properties() those 101 * requested} has a value, or no more properties remain in the page. Use this method 102 * when all requested properties are single valued, or when you intend to ignore any 103 * additional values they might have. In the latter case, which of the multiple 104 * values is filled in by this method is undefined; viz. it is not guaranteed to be 105 * the first value. 106 */ 107 public void readAllRequested() throws IOException 108 { 109 if( !hasNext ) return; 110 111 readAll: for( ;; ) 112 { 113 int valuelessCount = 0; 114 for( PageProperty p: properties ) if( p.getValue() == null ) ++valuelessCount; 115 if( valuelessCount == 0 ) break readAll; // all have values filled in 116 117 readMin: while( valuelessCount > 0 ) // read the minimum that *might* fill all values 118 { 119 read(); 120 if( !hasNext ) break readAll; // no more properties in page 121 122 --valuelessCount; 123 } 124 // loop back to recount; a property may have been read twice, leaving an empty value 125 } 126 } 127 128 129 130 // - C l o s e a b l e ---------------------------------------------------------------- 131 132 133 /** Does nothing but close the underlying reader. This reader itself need not be 134 * closed. 135 */ 136 public void close() throws IOException { in.close(); } 137 138 139 140//// P r i v a t e /////////////////////////////////////////////////////////////////////// 141 142 143 /** Attempts to decode a Semantic MediaWiki URIResolver subject value in the form 144 * Special:URIResolver/SMW_ENCODED_FULLPAGE_NAME. The formatting and encoding is 145 * apparently done in <code>includes/export/SMWExporter</code>. 146 * 147 * @return the full page name, or null if none can be decoded. 148 */ 149 private static String decodedFullPageName( final String subjectValue ) 150 { 151 return decodedPageName( subjectValue, "/Special:URIResolver/" ); 152 } 153 154 155 156 private static String decodedPageName( final String value, final String marker ) 157 { 158 int c = value.indexOf( marker ); 159 if( c == -1 ) return null; // not so encoded 160 161 c += marker.length(); 162 final int cN = value.length(); 163 final StringBuilder b = new StringBuilder( cN - c ); 164 for(; c < cN; ++c ) 165 { 166 char ch = value.charAt( c ); 167 if( ch == '-' ) ch = '%'; 168 else if( ch == '_' ) ch = ' '; 169 b.append( ch ); 170 } 171 try{ return URLDecoder.decode( b.toString(), "UTF-8" ); } 172 catch( UnsupportedEncodingException x ) { throw new RuntimeException( x ); } 173 } 174 175 176 177 /** Attempts to decode a Semantic MediaWiki URIResolver predicate value in the form 178 * Special:URIResolver/SMW_ENCODED_FULLPAGE_NAME. The formatting and encoding is 179 * apparently done in <code>includes/export/SMWExporter</code>. 180 * 181 * @return the short property name, or null if none can be decoded. 182 */ 183 private static String decodedPropertyName( final String predicateValue ) 184 { 185 final String name; 186 if( predicateValue.endsWith( "#specialProperty_uri" )) 187 { 188 // ensure this robust, short encoding^ maps 1:1 with full encoding: 189 assert predicateValue.equals( "http://semantic-mediawiki.org/swivt/1.0#specialProperty_uri" ); 190 191 name = "URL"; /* for some undocumented reason, what was formerly an ordinary 192 property "URL" received a special encoding at or before SMW 1.7.1 */ 193 } 194 else name = decodedPageName( predicateValue, "/Special:URIResolver/Property-3A" ); 195 return name; 196 } 197 198 199 200 private String findValue() throws IOException 201 { 202 while( in.hasNext() ) 203 { 204 final String name = in.nextName(); 205 if( "value".equals( name )) return in.nextString(); 206 207 in.skipValue(); 208 } 209 throw new IllegalStateException(); 210 } 211 212 213 214 private final String fullPageName; 215 216 217 218 private final JsonReader in; 219 220 221 222 private void next() throws IOException 223 { 224 while( in.hasNext() ) 225 { 226 in.beginObject(); // single binding 227 try 228 { 229 String name; 230 String value; 231 232 // Subject. 233 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 234 name = in.nextName(); 235 assert "s".equals( name ); 236 in.beginObject(); 237 value = decodedFullPageName( findValue() ); 238 skipAndEndObject(); // s 239 if( fullPageName.equals( value )) 240 { 241 // Predicate. 242 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 243 name = in.nextName(); 244 assert "p".equals( name ); 245 in.beginObject(); 246 value = decodedPropertyName( findValue() ); 247 skipAndEndObject(); // p 248 for( PageProperty property: properties ) if( property.name().equals( value )) 249 { 250 nextProperty = property; 251 252 // Object. 253 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 254 name = in.nextName(); 255 assert "o".equals( name ); 256 in.beginObject(); 257 name = in.nextName(); 258 assert "type".equals( name ); 259 final String type = in.nextString(); 260 value = findValue(); 261 skipAndEndObject(); // o 262 if( "uri".equals( type )) 263 { 264 nextValue = decodedFullPageName( value ); 265 if( nextValue == null ) nextValue = value; // undecodeable, leave as such 266 } 267 else nextValue = value; 268 hasNext = true; 269 return; 270 } 271 } 272 } 273 finally{ skipAndEndObject(); } // single binding 274 } 275 hasNext = false; 276 } 277 278 279 280 private PageProperty nextProperty; 281 282 283 284 private String nextValue; 285 286 287 288 private void skipAndEndObject() throws IOException 289 { 290 while( in.hasNext() ) in.skipValue(); in.endObject(); 291 } 292 293 294}