providers_skohub-provider.js
import BaseProvider from "./base-provider.js"
import * as _ from "../utils/lodash.js"
import * as errors from "../errors/index.js"
import jskos from "jskos-tools"
import FlexSearch from "flexsearch"
// from https://stackoverflow.com/a/22021709
function decodeUnicode(text) {
return text.replace(/\\u[\dA-F]{4}/gi,
function (match) {
return String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16))
},
)
}
// Data by registry URI
const data = {}
/**
* SkoHub Vocabs
*
* [SkoHub Vocabs](https://blog.lobid.org/2019/09/27/presenting-skohub-vocabs.html) is a static site generator that provides HTML/JSON versions of vocabularies as static files.
*
* Example vocabulary (configured below): https://w3id.org/class/esc/scheme
*
* It is important that a dereferenceable scheme URI is used. When configured via [BARTOC](https://bartoc.org/) in the `API` field, "http://bartoc.org/api-type/skohub" needs to be given as the type and the aforementioned dereferenceable scheme URI needs to be given as the URL.
*
* See also: https://github.com/gbv/cocoda-sdk/issues/29
*
* ```json
* {
* "uri": "http://coli-conc.gbv.de/registry/skohub.io",
* "provider": "Skohub",
* "schemes": [
* {
* "uri": "https://w3id.org/class/esc/scheme",
* }
* ]
* }
*/
export default class SkohubProvider extends BaseProvider {
static supports = {
schemes: true,
top: true,
data: true,
concepts: true,
narrower: true,
ancestors: true,
suggest: true,
search: true,
}
_prepare() {
data[this.uri] = {
index: {},
conceptCache: {},
schemeCache: {},
}
}
get _index() {
return data[this.uri] && data[this.uri].index
}
get _conceptCache() {
return data[this.uri] && data[this.uri].conceptCache
}
get _schemeCache() {
return data[this.uri] && data[this.uri].schemeCache
}
/**
* Used by `registryForScheme` (see src/lib/CocodaSDK.js) to determine a provider config for a concept schceme.
*
* @param {Object} options
* @param {Object} options.url API URL for BARTOC instance
* @param {Object} options.scheme scheme for which the config is requested
* @returns {Object} provider configuration
*/
static _registryConfigForBartocApiConfig({ url, scheme } = {}) {
if (!url || !scheme) {
return null
}
// Save scheme with "url" as main URI, add other identifier
const newScheme = { uri: url, identifier: jskos.getAllUris(scheme).filter(uri => uri !== url) }
return { schemes: [newScheme] }
}
async _loadScheme({ scheme, ...config }) {
let uris = jskos.getAllUris(scheme)
for (let uri of uris) {
if (this._schemeCache[uri]) {
return this._schemeCache[uri]
}
}
// Find main URI from this.schemes
const schemeFromList = this.schemes.find(s => jskos.compare(s, scheme))
if (!schemeFromList || !schemeFromList.uri) {
throw new errors.InvalidRequestError({ message: `Tried to load unsupported scheme (${scheme && scheme.uri})` })
}
const uri = schemeFromList.uri
uris = _.uniq(uris.concat(jskos.getAllUris(schemeFromList)))
let postfix = ".json"
if (uri.endsWith("/")) {
postfix = "index.json"
}
// Errors for this request will trickle upwards of the call chain
const data = await this.axios({ ...config, url: `${uri}${postfix}`, _skipAdditionalParameters: true })
if (data.id !== uri) {
throw new errors.InvalidRequestError({ message: "Skohub URL did not return expected concept scheme" })
}
const { title, preferredNamespaceUri, hasTopConcept, description } = data
scheme = { uri, identifier: uris.filter(u => u !== uri) }
scheme.prefLabel = title
Object.keys(scheme.prefLabel || {}).forEach(key => {
scheme.prefLabel[key] = decodeUnicode(scheme.prefLabel[key])
})
scheme.namespace = preferredNamespaceUri
scheme.topConcepts = (hasTopConcept || []).map(c => this._toJskosConcept(c))
scheme.concepts = [null]
if (description) {
scheme.definition = description
// scopeNote values in JSKOS are arrays
Object.keys(scheme.definition).forEach(key => {
scheme.definition[key] = [decodeUnicode(scheme.definition[key])]
})
}
// Remove fields without value
for (let key of Object.keys(scheme).filter(key => !scheme[key])) {
delete scheme[key]
}
// Add to cache
for (let uri of uris) {
this._schemeCache[uri] = scheme
}
return scheme
}
async _loadConcept({ uri, ...config }) {
// Use cache first
if (this._conceptCache[uri]) {
return this._conceptCache[uri]
}
try {
const data = await this.axios({ ...config, url: `${uri}.json`, _skipAdditionalParameters: true })
if (data.id !== uri) {
throw new errors.InvalidRequestError({ message: "Skohub URL did not return expected concept URI" })
}
const concept = this._toJskosConcept(data)
this._conceptCache[uri] = concept
return concept
} catch (error) {
// Return null on error
return null
}
}
_toJskosConcept(data) {
const concept = { uri: data.id }
concept.prefLabel = data.prefLabel
Object.keys(concept.prefLabel || {}).forEach(key => {
concept.prefLabel[key] = decodeUnicode(concept.prefLabel[key])
})
concept.narrower = (data.narrower || []).map(c => this._toJskosConcept(c))
concept.notation = data.notation || []
if (data.broader && data.broader.id) {
concept.broader = [{ uri: data.broader.id }]
}
if (data.inScheme && data.inScheme.id) {
concept.inScheme = [{ uri: data.inScheme.id }]
}
if (data.scopeNote) {
concept.scopeNote = data.scopeNote
// scopeNote values in JSKOS are arrays
Object.keys(concept.scopeNote).forEach(key => {
concept.scopeNote[key] = [decodeUnicode(concept.scopeNote[key])]
})
}
return concept
}
async getSchemes({ ...config } = {}) {
return Promise.all(this.schemes.map(scheme => this._loadScheme({ ...config, scheme })))
}
async getTop({ scheme, ...config }) {
if (!scheme || !scheme.uri) {
throw new errors.InvalidOrMissingParameterError({ parameter: "scheme", message: "Missing scheme URI" })
}
scheme = await this._loadScheme({ scheme, ...config })
return scheme.topConcepts || []
}
async getConcepts({ concepts, ...config }) {
if (!_.isArray(concepts)) {
concepts = [concepts]
}
// Concepts have to be loaded separately, so we parallelize it with Promise.all
return (await Promise.all(concepts.map(({ uri }) => this._loadConcept({ ...config, uri })))).filter(Boolean)
}
async getAncestors({ concept, ...config }) {
if (!concept || !concept.uri) {
throw new errors.InvalidOrMissingParameterError({ parameter: "concept" })
}
if (concept.ancestors && concept.ancestors[0] !== null) {
return concept.ancestors
}
concept = await this._loadConcept({ ...config, uri: concept.uri })
if (!concept || !concept.broader || !concept.broader.length) {
return []
}
const broader = concept.broader[0]
return [broader].concat(await this.getAncestors({ concept: broader, ...config })).map(c => ({ uri: c.uri }))
}
async getNarrower({ concept, ...config }) {
if (!concept || !concept.uri) {
throw new errors.InvalidOrMissingParameterError({ parameter: "concept" })
}
if (concept.narrower && concept.narrower[0] !== null) {
return concept.narrower
}
concept = await this._loadConcept({ ...config, uri: concept.uri })
return concept.narrower
}
/**
* Returns concept search results.
*
* @param {Object} config
* @param {string} config.search search string
* @param {Object} [config.scheme] concept scheme to search in
* @param {number} [config.limit=100] maximum number of search results
* @returns {Array} array of JSKOS concept objects
*/
async search({ search, scheme, limit = 100 }) {
scheme = await this._loadScheme({ scheme })
if (!scheme || !scheme.uri) {
throw new errors.InvalidOrMissingParameterError({ parameter: "scheme" })
}
if (!search) {
throw new errors.InvalidOrMissingParameterError({ parameter: "search" })
}
// 1. Load index file if necessary
let index
if (!this._index[scheme.uri]) {
this._index[scheme.uri] = {}
}
// Iterate over languages and use the first one that has an index
for (const lang of [""].concat(this.languages)) {
if (this._index[scheme.uri][lang]) {
index = this._index[scheme.uri][lang]
break
}
// `null` means the request failed before, so we won't try again
if (this._index[scheme.uri][lang] === null) {
continue
}
try {
let postfix = lang ? `.${lang}.index` : ".index"
if (scheme.uri.endsWith("/")) {
postfix = `index${postfix}`
}
const data = await this.axios({ url: `${scheme.uri}${postfix}`, _skipAdditionalParameters: true })
if (data.length < 100) {
// Assume the index is empty and skip it
this._index[scheme.uri][lang] = null
continue
}
index = FlexSearch.create()
index.import(data)
this._index[scheme.uri][lang] = index
break
} catch (error) {
// ? Can we differentiate between errors? error.response is undefined for some reason.
this._index[scheme.uri][lang] = null
}
}
if (!index) {
throw new errors.InvalidRequestError({ message: "Could not find search index for any of the available languages " + this.languages.join(",") })
}
// 2. Use Flexsearch to get result URIs from index
const result = index.search(search)
// 3. Load concept data for results
const concepts = await this.getConcepts({ concepts: result.map(uri => ({ uri })) })
return concepts.slice(0, limit)
}
/**
* Returns suggestion result in OpenSearch Suggest Format.
*
* @param {Object} config
* @param {string} config.search search string
* @param {Object} [config.scheme] concept scheme to search in
* @param {number} [config.limit=100] maximum number of search results
* @returns {Array} result in OpenSearch Suggest Format
*/
async suggest(config) {
config._raw = true
const concepts = await this.search(config)
const result = [config.search, [], [], []]
for (let concept of concepts) {
const notation = jskos.notation(concept)
const label = jskos.prefLabel(concept)
result[1].push((notation ? notation + " " : "") + label)
result[2].push("")
result[3].push(concept.uri)
}
if (concepts._totalCount != undefined) {
result._totalCount = concepts._totalCount
} else {
result._totalCount = concepts.length
}
return result
}
}
SkohubProvider.providerName = "Skohub"
SkohubProvider.providerType = "http://bartoc.org/api-type/skohub"