001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.cache;
003
004import java.io.FileNotFoundException;
005import java.io.IOException;
006import java.net.HttpURLConnection;
007import java.net.URL;
008import java.security.SecureRandom;
009import java.util.HashSet;
010import java.util.List;
011import java.util.Map;
012import java.util.Set;
013import java.util.concurrent.ConcurrentHashMap;
014import java.util.concurrent.ConcurrentMap;
015import java.util.concurrent.LinkedBlockingDeque;
016import java.util.concurrent.ThreadPoolExecutor;
017import java.util.concurrent.TimeUnit;
018import java.util.regex.Matcher;
019
020import org.apache.commons.jcs.access.behavior.ICacheAccess;
021import org.apache.commons.jcs.engine.behavior.ICacheElement;
022import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult;
023import org.openstreetmap.josm.data.preferences.IntegerProperty;
024import org.openstreetmap.josm.tools.CheckParameterUtil;
025import org.openstreetmap.josm.tools.HttpClient;
026import org.openstreetmap.josm.tools.Logging;
027import org.openstreetmap.josm.tools.Utils;
028
029/**
030 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired
031 * according to HTTP headers sent with tile. If so, it tries to verify using Etags
032 * or If-Modified-Since / Last-Modified.
033 *
034 * If the tile is not valid, it will try to download it from remote service and put it
035 * to cache. If remote server will fail it will try to use stale entry.
036 *
037 * This class will keep only one Job running for specified tile. All others will just finish, but
038 * listeners will be gathered and notified, once download job will be finished
039 *
040 * @author Wiktor Niesiobędzki
041 * @param <K> cache entry key type
042 * @param <V> cache value type
043 * @since 8168
044 */
045public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> {
046    protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7);
047    // Limit for the max-age value send by the server.
048    protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28);
049    // Absolute expire time limit. Cached tiles that are older will not be used,
050    // even if the refresh from the server fails.
051    protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365);
052
053    /**
054     * maximum download threads that will be started
055     */
056    public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10);
057
058    /*
059     * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque.
060     *
061     * The queue works FIFO, so one needs to take care about ordering of the entries submitted
062     *
063     * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache
064     * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity
065     * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer)
066     */
067
068    private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor(
069            1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full)
070            THREAD_LIMIT.get(), // do not this number of threads
071            30, // keepalive for thread
072            TimeUnit.SECONDS,
073            // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see)
074            new LinkedBlockingDeque<Runnable>(),
075            Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY)
076            );
077
078    private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>();
079    private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>();
080
081    protected final long now; // when the job started
082
083    private final ICacheAccess<K, V> cache;
084    private ICacheElement<K, V> cacheElement;
085    protected V cacheData;
086    protected CacheEntryAttributes attributes;
087
088    // HTTP connection parameters
089    private final int connectTimeout;
090    private final int readTimeout;
091    private final Map<String, String> headers;
092    private final ThreadPoolExecutor downloadJobExecutor;
093    private Runnable finishTask;
094    private boolean force;
095
096    /**
097     * @param cache cache instance that we will work on
098     * @param headers HTTP headers to be sent together with request
099     * @param readTimeout when connecting to remote resource
100     * @param connectTimeout when connecting to remote resource
101     * @param downloadJobExecutor that will be executing the jobs
102     */
103    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
104            int connectTimeout, int readTimeout,
105            Map<String, String> headers,
106            ThreadPoolExecutor downloadJobExecutor) {
107        CheckParameterUtil.ensureParameterNotNull(cache, "cache");
108        this.cache = cache;
109        this.now = System.currentTimeMillis();
110        this.connectTimeout = connectTimeout;
111        this.readTimeout = readTimeout;
112        this.headers = headers;
113        this.downloadJobExecutor = downloadJobExecutor;
114    }
115
116    /**
117     * @param cache cache instance that we will work on
118     * @param headers HTTP headers to be sent together with request
119     * @param readTimeout when connecting to remote resource
120     * @param connectTimeout when connecting to remote resource
121     */
122    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
123            int connectTimeout, int readTimeout,
124            Map<String, String> headers) {
125        this(cache, connectTimeout, readTimeout,
126                headers, DEFAULT_DOWNLOAD_JOB_DISPATCHER);
127    }
128
129    private void ensureCacheElement() {
130        if (cacheElement == null && getCacheKey() != null) {
131            cacheElement = cache.getCacheElement(getCacheKey());
132            if (cacheElement != null) {
133                attributes = (CacheEntryAttributes) cacheElement.getElementAttributes();
134                cacheData = cacheElement.getVal();
135            }
136        }
137    }
138
139    @Override
140    public V get() {
141        ensureCacheElement();
142        return cacheData;
143    }
144
145    @Override
146    public void submit(ICachedLoaderListener listener, boolean force) throws IOException {
147        this.force = force;
148        boolean first = false;
149        URL url = getUrl();
150        String deduplicationKey = null;
151        if (url != null) {
152            // url might be null, for example when Bing Attribution is not loaded yet
153            deduplicationKey = url.toString();
154        }
155        if (deduplicationKey == null) {
156            Logging.warn("No url returned for: {0}, skipping", getCacheKey());
157            throw new IllegalArgumentException("No url returned");
158        }
159        synchronized (inProgress) {
160            Set<ICachedLoaderListener> newListeners = inProgress.get(deduplicationKey);
161            if (newListeners == null) {
162                newListeners = new HashSet<>();
163                inProgress.put(deduplicationKey, newListeners);
164                first = true;
165            }
166            newListeners.add(listener);
167        }
168
169        if (first || force) {
170            // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk
171            Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException());
172            downloadJobExecutor.execute(this);
173        }
174    }
175
176    /**
177     * This method is run when job has finished
178     */
179    protected void executionFinished() {
180        if (finishTask != null) {
181            finishTask.run();
182        }
183    }
184
185    /**
186     *
187     * @return checks if object from cache has sufficient data to be returned
188     */
189    protected boolean isObjectLoadable() {
190        if (cacheData == null) {
191            return false;
192        }
193        return cacheData.getContent().length > 0;
194    }
195
196    /**
197     * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example)
198     * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException
199     *
200     * @return true if we should put empty object into cache, regardless of what remote resource has returned
201     */
202    protected boolean cacheAsEmpty() {
203        return attributes.getResponseCode() < 500;
204    }
205
206    /**
207     * @return key under which discovered server settings will be kept
208     */
209    protected String getServerKey() {
210        try {
211            return getUrl().getHost();
212        } catch (IOException e) {
213            Logging.trace(e);
214            return null;
215        }
216    }
217
218    @Override
219    public void run() {
220        final Thread currentThread = Thread.currentThread();
221        final String oldName = currentThread.getName();
222        currentThread.setName("JCS Downloading: " + getUrlNoException());
223        Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException());
224        ensureCacheElement();
225        try {
226            // try to fetch from cache
227            if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) {
228                // we got something in cache, and it's valid, so lets return it
229                Logging.debug("JCS - Returning object from cache: {0}", getCacheKey());
230                finishLoading(LoadResult.SUCCESS);
231                return;
232            }
233
234            // try to load object from remote resource
235            if (loadObject()) {
236                finishLoading(LoadResult.SUCCESS);
237            } else {
238                // if loading failed - check if we can return stale entry
239                if (isObjectLoadable()) {
240                    // try to get stale entry in cache
241                    finishLoading(LoadResult.SUCCESS);
242                    Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException());
243                } else {
244                    // failed completely
245                    finishLoading(LoadResult.FAILURE);
246                }
247            }
248        } finally {
249            executionFinished();
250            currentThread.setName(oldName);
251        }
252    }
253
254    private void finishLoading(LoadResult result) {
255        Set<ICachedLoaderListener> listeners;
256        synchronized (inProgress) {
257            try {
258                listeners = inProgress.remove(getUrl().toString());
259            } catch (IOException e) {
260                listeners = null;
261                Logging.trace(e);
262            }
263        }
264        if (listeners == null) {
265            Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException());
266            return;
267        }
268        for (ICachedLoaderListener l: listeners) {
269            l.loadingFinished(cacheData, attributes, result);
270        }
271    }
272
273    protected boolean isCacheElementValid() {
274        long expires = attributes.getExpirationTime();
275
276        // check by expire date set by server
277        if (expires != 0L) {
278            // put a limit to the expire time (some servers send a value
279            // that is too large)
280            expires = Math.min(expires, attributes.getCreateTime() + EXPIRE_TIME_SERVER_LIMIT);
281            if (now > expires) {
282                Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}",
283                        new Object[]{getUrlNoException(), Long.toString(expires), Long.toString(now)});
284                return false;
285            }
286        } else if (attributes.getLastModification() > 0 &&
287                now - attributes.getLastModification() > DEFAULT_EXPIRE_TIME) {
288            // check by file modification date
289            Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException());
290            return false;
291        } else if (now - attributes.getCreateTime() > DEFAULT_EXPIRE_TIME) {
292            Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException());
293            return false;
294        }
295        return true;
296    }
297
298    /**
299     * @return true if object was successfully downloaded, false, if there was a loading failure
300     */
301    private boolean loadObject() {
302        if (attributes == null) {
303            attributes = new CacheEntryAttributes();
304        }
305        try {
306            // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match
307            // then just use HEAD request and check returned values
308            if (isObjectLoadable() &&
309                    Boolean.TRUE.equals(useHead.get(getServerKey())) &&
310                    isCacheValidUsingHead()) {
311                Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl());
312                return true;
313            }
314
315            Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl());
316            final HttpClient request = getRequest("GET", true);
317
318            if (isObjectLoadable() &&
319                    (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) {
320                request.setIfModifiedSince(attributes.getLastModification());
321            }
322            if (isObjectLoadable() && attributes.getEtag() != null) {
323                request.setHeader("If-None-Match", attributes.getEtag());
324            }
325
326            final HttpClient.Response urlConn = request.connect();
327
328            if (urlConn.getResponseCode() == 304) {
329                // If isModifiedSince or If-None-Match has been set
330                // and the server answers with a HTTP 304 = "Not Modified"
331                Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl());
332                return true;
333            } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code
334                    && (
335                            (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
336                            attributes.getLastModification() == urlConn.getLastModified())
337                    ) {
338                // we sent ETag or If-Modified-Since, but didn't get 304 response code
339                // for further requests - use HEAD
340                String serverKey = getServerKey();
341                Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers",
342                        serverKey);
343                useHead.put(serverKey, Boolean.TRUE);
344            }
345
346            attributes = parseHeaders(urlConn);
347
348            for (int i = 0; i < 5; ++i) {
349                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
350                    Thread.sleep(5000L+new SecureRandom().nextInt(5000));
351                    continue;
352                }
353
354                attributes.setResponseCode(urlConn.getResponseCode());
355                byte[] raw;
356                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) {
357                    raw = Utils.readBytesFromStream(urlConn.getContent());
358                } else {
359                    raw = new byte[]{};
360                    try {
361                        String data = urlConn.fetchContent();
362                        if (!data.isEmpty()) {
363                            Matcher m = HttpClient.getTomcatErrorMatcher(data);
364                            if (m.matches()) {
365                                attributes.setErrorMessage(m.group(1).replace("'", "''"));
366                            }
367                        }
368                    } catch (IOException e) {
369                        Logging.warn(e);
370                    }
371                }
372
373                if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) {
374                    // we need to check cacheEmpty, so for cases, when data is returned, but we want to store
375                    // as empty (eg. empty tile images) to save some space
376                    cacheData = createCacheEntry(raw);
377                    cache.put(getCacheKey(), cacheData, attributes);
378                    Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}",
379                            new Object[] {getCacheKey(), raw.length, getUrl()});
380                    return true;
381                } else if (cacheAsEmpty()) {
382                    cacheData = createCacheEntry(new byte[]{});
383                    cache.put(getCacheKey(), cacheData, attributes);
384                    Logging.debug("JCS - Caching empty object {0}", getUrl());
385                    return true;
386                } else {
387                    Logging.debug("JCS - failure during load - reponse is not loadable nor cached as empty");
388                    return false;
389                }
390            }
391        } catch (FileNotFoundException e) {
392            Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException());
393            attributes.setResponseCode(404);
394            attributes.setError(e);
395            boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty();
396            if (doCache) {
397                cacheData = createCacheEntry(new byte[]{});
398                cache.put(getCacheKey(), cacheData, attributes);
399            }
400            return doCache;
401        } catch (IOException e) {
402            Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException());
403            if (isObjectLoadable()) {
404                return true;
405            } else {
406                attributes.setError(e);
407                attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached
408                return false;
409            }
410
411        } catch (InterruptedException e) {
412            attributes.setError(e);
413            Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException());
414            Thread.currentThread().interrupt();
415        }
416        Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException());
417        return false;
418    }
419
420    /**
421     * Check if the object is loadable. This means, if the data will be parsed, and if this response
422     * will finish as successful retrieve.
423     *
424     * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors
425     *
426     * @param headerFields headers sent by server
427     * @param responseCode http status code
428     * @param raw data read from server
429     * @return true if object should be cached and returned to listener
430     */
431    protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) {
432        return raw != null && raw.length != 0 && responseCode < 400;
433    }
434
435    protected abstract V createCacheEntry(byte[] content);
436
437    protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) {
438        CacheEntryAttributes ret = new CacheEntryAttributes();
439
440        Long lng = urlConn.getExpiration();
441        if (lng.equals(0L)) {
442            try {
443                String str = urlConn.getHeaderField("Cache-Control");
444                if (str != null) {
445                    for (String token: str.split(",")) {
446                        if (token.startsWith("max-age=")) {
447                            lng = TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring(8))) + System.currentTimeMillis();
448                        }
449                    }
450                }
451            } catch (NumberFormatException e) {
452                // ignore malformed Cache-Control headers
453                Logging.trace(e);
454            }
455        }
456
457        ret.setExpirationTime(lng);
458        ret.setLastModification(now);
459        ret.setEtag(urlConn.getHeaderField("ETag"));
460
461        return ret;
462    }
463
464    private HttpClient getRequest(String requestMethod, boolean noCache) throws IOException {
465        final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod);
466        urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*");
467        urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout
468        urlConn.setConnectTimeout(connectTimeout);
469        if (headers != null) {
470            urlConn.setHeaders(headers);
471        }
472
473        if (force || noCache) {
474            urlConn.useCache(false);
475        }
476        return urlConn;
477    }
478
479    private boolean isCacheValidUsingHead() throws IOException {
480        final HttpClient.Response urlConn = getRequest("HEAD", false).connect();
481        long lastModified = urlConn.getLastModified();
482        return (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
483                (lastModified != 0 && lastModified <= attributes.getLastModification());
484    }
485
486    /**
487     * TODO: move to JobFactory
488     * cancels all outstanding tasks in the queue.
489     */
490    public void cancelOutstandingTasks() {
491        for (Runnable r: downloadJobExecutor.getQueue()) {
492            if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) {
493                ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation();
494            }
495        }
496    }
497
498    /**
499     * Sets a job, that will be run, when job will finish execution
500     * @param runnable that will be executed
501     */
502    public void setFinishedTask(Runnable runnable) {
503        this.finishTask = runnable;
504
505    }
506
507    /**
508     * Marks this job as canceled
509     */
510    public void handleJobCancellation() {
511        finishLoading(LoadResult.CANCELED);
512    }
513
514    private URL getUrlNoException() {
515        try {
516            return getUrl();
517        } catch (IOException e) {
518            return null;
519        }
520    }
521}