class MediaWiki::Gateway
Constants
- USER_AGENT
Attributes
Public Class Methods
Set up a MediaWiki::Gateway
for a given MediaWiki
installation
- url
-
Path to API of target
MediaWiki
(eg. 'en.wikipedia.org/w/api.php') - options
-
Hash of options
- http_options
-
Hash of options for RestClient::Request (via
http_send
)
Options:
- :bot
-
When set to true, executes API queries with the bot parameter (see www.mediawiki.org/wiki/API:Edit#Parameters). Defaults to false.
- :ignorewarnings
-
Log API warnings and invalid page titles, instead throwing
MediaWiki::APIError
- :limit
-
Maximum number of results returned per search (see www.mediawiki.org/wiki/API:Query_-_Lists#Limits), defaults to the
MediaWiki
default of 500. - :logdevice
-
Log device to use. Defaults to STDERR
- :loglevel
-
Log level to use, defaults to Logger::WARN. Set to Logger::DEBUG to dump every request and response to the log.
- :maxlag
-
Maximum allowed server lag (see www.mediawiki.org/wiki/Manual:Maxlag_parameter), defaults to 5 seconds.
- :retry_count
-
Number of times to try before giving up if
MediaWiki
returns 503 Service Unavailable, defaults to 3 (original request plus two retries). - :retry_delay
-
Seconds to wait before retry if
MediaWiki
returns 503 Service Unavailable, defaults to 10 seconds. - :user_agent
-
User-Agent header to send with requests, defaults to
::default_user_agent
or nil.
# File lib/media_wiki/gateway.rb 34 def initialize(url, options = {}, http_options = {}) 35 @options = { 36 bot: false, 37 limit: 500, 38 logdevice: STDERR, 39 loglevel: Logger::WARN, 40 max_results: 500, 41 maxlag: 5, 42 retry_count: 3, 43 retry_delay: 10, 44 user_agent: self.class.default_user_agent 45 }.merge(options) 46 47 @log = Logger.new(@options[:logdevice]) 48 @log.level = @options[:loglevel] 49 50 @http_options, @wiki_url, @cookies, @headers = http_options, url, {}, { 51 'User-Agent' => [@options[:user_agent], USER_AGENT].compact.join(' '), 52 'Accept-Encoding' => 'gzip' 53 } 54 end
Public Instance Methods
Make generic request to API
- form_data
-
hash of attributes to post
- continue_xpath
-
XPath selector for query continue parameter
Returns XML document
# File lib/media_wiki/gateway.rb 64 def send_request(form_data, continue_xpath = nil) 65 make_api_request(form_data, continue_xpath).first 66 end
Private Instance Methods
Get API XML response If there are errors or warnings, raise APIError
Otherwise return XML root
# File lib/media_wiki/gateway.rb 202 def get_response(res) 203 begin 204 res = res.force_encoding('UTF-8') if res.respond_to?(:force_encoding) 205 doc = REXML::Document.new(res).root 206 rescue REXML::ParseException 207 raise MediaWiki::Exception.new('Response is not XML. Are you sure you are pointing to api.php?') 208 end 209 210 log.debug("RES: #{doc}") 211 212 unless %w[api mediawiki].include?(doc.name) 213 raise MediaWiki::Exception.new("Response does not contain Mediawiki API XML: #{res}") 214 end 215 216 if error = doc.elements['error'] 217 raise APIError.new(*error.attributes.values_at(*%w[code info])) 218 end 219 220 if warnings = doc.elements['warnings'] 221 warning("API warning: #{warnings.children.map(&:text).join(', ')}") 222 end 223 224 doc 225 end
Fetch token (type 'delete', 'edit', 'email', 'import', 'move', 'protect')
# File lib/media_wiki/gateway.rb 71 def get_token(type, page_titles) 72 res = send_request( 73 'action' => 'query', 74 'prop' => 'info', 75 'intoken' => type, 76 'titles' => page_titles 77 ) 78 79 unless token = res.elements['query/pages/page'].attributes[type + 'token'] 80 raise Unauthorized.new "User is not permitted to perform this operation: #{type}" 81 end 82 83 token 84 end
Execute the HTTP request using either GET or POST as appropriate. @yieldparam response
# File lib/media_wiki/gateway.rb 176 def http_send url, form_data, headers, &block 177 opts = @http_options.merge(url: url, headers: headers) 178 opts[:method] = form_data['action'] == 'query' ? :get : :post 179 opts[:method] == :get ? headers[:params] = form_data : opts[:payload] = form_data 180 181 log.debug("#{opts[:method].upcase}: #{form_data.inspect}, #{@cookies.inspect}") 182 183 RestClient::Request.execute(opts) do |response, request, result| 184 # When a block is passed to RestClient::Request.execute, we must 185 # manually handle response codes ourselves. If no block is passed, 186 # then redirects are automatically handled, but HTTP errors also 187 # result in exceptions being raised. For now, we manually check for 188 # HTTP 503 errors (see: #make_api_request), but we must also manually 189 # handle HTTP redirects. 190 if [301, 302, 307].include?(response.code) && request.method == :get 191 response = response.follow_redirection(request, result) 192 end 193 194 block.call(response) 195 end 196 197 end
Iterate over query results
- list
-
list name to query
- res_xpath
-
XPath selector for results
- attr
-
attribute name to extract, if any
- param
-
parameter name to continue query
- options
-
additional query options
Yields each attribute value, or, if attr
is nil, each REXML::Element.
# File lib/media_wiki/gateway.rb 95 def iterate_query(list, res_xpath, attr, param, options, &block) 96 items, block = [], lambda { |item| items << item } unless block 97 98 attribute_names = %w[from continue].map { |name| 99 "name()='#{param[0, 2]}#{name}'" 100 } 101 102 req_xpath = "//query-continue/#{list}/@*[#{attribute_names.join(' or ')}]" 103 res_xpath = "//query/#{list}/#{res_xpath}" unless res_xpath.start_with?('/') 104 105 options, continue = options.merge('action' => 'query', 'list' => list), nil 106 107 loop { 108 res, continue = make_api_request(options, req_xpath) 109 110 REXML::XPath.match(res, res_xpath).each { |element| 111 block[attr ? element.attributes[attr] : element] 112 } 113 114 continue ? options[param] = continue : break 115 } 116 117 items 118 end
Make generic request to API
- form_data
-
hash of attributes to post
- continue_xpath
-
XPath selector for query continue parameter
- retry_count
-
Counter for retries
Returns array of XML document and query continue parameter.
# File lib/media_wiki/gateway.rb 127 def make_api_request(form_data, continue_xpath = nil, retry_count = 1) 128 form_data.update('format' => 'xml', 'maxlag' => @options[:maxlag]) 129 130 http_send(@wiki_url, form_data, @headers.merge(cookies: @cookies)) do |response| 131 if response.code == 503 && retry_count < @options[:retry_count] 132 log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.") 133 sleep(@options[:retry_delay]) 134 make_api_request(form_data, continue_xpath, retry_count + 1) 135 end 136 137 # Check response for errors and return XML 138 unless response.code >= 200 && response.code < 300 139 raise MediaWiki::Exception.new("Bad response: #{response}") 140 end 141 142 doc = get_response(response.dup) 143 144 # login and createaccount actions require a second request with a token received on the first request 145 if %w[login createaccount].include?(action = form_data['action']) 146 action_result = doc.elements[action].attributes['result'] 147 @cookies.update(response.cookies) 148 149 case action_result.downcase 150 when 'success' 151 return [doc, false] 152 when 'needtoken' 153 token = doc.elements[action].attributes['token'] 154 155 if action == 'login' 156 return make_api_request(form_data.merge('lgtoken' => token)) 157 elsif action == 'createaccount' 158 return make_api_request(form_data.merge('token' => token)) 159 end 160 else 161 if action == 'login' 162 raise Unauthorized.new("Login failed: #{action_result}") 163 elsif action == 'createaccount' 164 raise Unauthorized.new("Account creation failed: #{action_result}") 165 end 166 end 167 end 168 169 return [doc, (continue_xpath && doc.elements['query-continue']) ? 170 REXML::XPath.first(doc, continue_xpath) : nil] 171 end 172 end
# File lib/media_wiki/gateway.rb 235 def valid_page?(page) 236 page && !page.attributes['missing'] && (!page.attributes['invalid'] || 237 warning("Invalid title '#{page.attributes['title']}'")) 238 end
# File lib/media_wiki/gateway.rb 227 def validate_options(options, valid_options) 228 options.each_key { |opt| 229 unless valid_options.include?(opt.to_s) 230 raise ArgumentError, "Unknown option '#{opt}'", caller(1) 231 end 232 } 233 end
# File lib/media_wiki/gateway.rb 240 def warning(msg) 241 raise APIError.new('warning', msg) unless @options[:ignorewarnings] 242 log.warn(msg) 243 false 244 end