SPARQLthon/SPARQL samples
提供:TogoWiki
目次 |
SPARQL Queries
たまに、SNS等で、具体的なSPARQL問い合わせに関する質問がされて、誰かが答えるような事があります。具体的な例は貴重なので、とりあえず記録として残すために、ページを作りました。
生物種一覧
# by 千葉さんの回答 # MBGD SPARQL エンドポイントにて。 [1] PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxid: <http://identifiers.org/taxonomy/> PREFIX taxont: <http://ddbj.nig.ac.jp/ontologies/taxonomy/> PREFIX dbo: <http://dbpedia.org/ontology/> SELECT ?wiki_page ?label ?abst WHERE { ?tax_id rdfs:subClassOf* taxid:1117 ; taxont:scientificName ?tax_name ; taxont:rank taxont:Genus . BIND(URI(CONCAT("http://dbpedia.org/resource/", REPLACE(?tax_name, " ", "_"))) AS ?dbpedia) SERVICE <http://dbpedia.org/sparql> { SELECT ?wiki_page ?label ?abst ?dbpedia WHERE { ?dbpedia rdfs:label ?label ; dbo:abstract ?abst ; <http://xmlns.com/foaf/0.1/isPrimaryTopicOf> ?wiki_page . FILTER(lang(?label) = "en" && lang(?abst) = "en") } } }
- run sparql-support
# by OKBPさん # WikiData query.wikidata.org にて。 PREFIX wdt: <http://www.wikidata.org/prop/direct/> SELECT ?descendant ?label ?tax_id ?wikipage { ?taxon wdt:P685 "1117" . ?descendant wdt:P171* ?taxon ; rdfs:label ?label . ?descendant wdt:P105 ?rank . FILTER(LANG(?label) = "en") ?rank rdfs:label "species"@en . OPTIONAL { ?descendant wdt:P685 ?tax_id . } OPTIONAL { ?wikipage schema:about ?descendant ; schema:isPartOf <https://en.wikipedia.org/> . } }
# by 山本さん # WikiData query.wikidata.org にて。 PREFIX schema: <http://schema.org/> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> select * { ?s wdt:P171+ wd:Q764 . ?w schema:about ?s ; schema:isPartOf <https://en.wikipedia.org/> . }
#jgoamakfさん #DBPedia にて https://dbpedia.org/sparql SELECT DISTINCT ?url WHERE { ?resource <http://dbpedia.org/ontology/family> <http://dbpedia.org/resource/Canidae> . ?resource <http://xmlns.com/foaf/0.1/isPrimaryTopicOf> ?url } LIMIT 100
- run sparql-suppor
wwPDB RDF
- 二次構造でβシート→ループ→βシートを持ち、かつヘアピン構造を形成しているPDBエントリー一覧
PREFIX PDBo: <http://rdf.wwpdb.org/schema/pdbx-v40.owl#> PREFIX dc: <http://purl.org/dc/elements/1.1/> SELECT ?pdb ?pdb_id ?title ?sheet ?sheet_range_1 ?sheet_range_1_beg ?sheet_range_1_end ?sheet_range_2 ?sheet_range_2_beg ?sheet_range_2_end ?dist1 ?dist2 FROM <http://rdf.integbio.jp/dataset/pdbj> WHERE { ?pdb dcterms:identifier ?pdb_id . ?pdb dc:title ?title . ?pdb PDBo:has_struct_sheetCategory ?sheet_category . ?sheet_category a PDBo:struct_sheetCategory . ?sheet_category PDBo:has_struct_sheet ?sheet . ?sheet a PDBo:struct_sheet . ?sheet PDBo:struct_sheet.id ?sheet_id . ?sheet PDBo:referenced_by_struct_sheet_order ?sheet_order . ?sheet_order a PDBo:struct_sheet_order . ?sheet_order PDBo:struct_sheet_order.sense "anti-parallel" . ?sheet_order PDBo:struct_sheet_order.range_id_1 ?sheet_range_1_id . ?sheet_order PDBo:struct_sheet_order.range_id_2 ?sheet_range_2_id . ?sheet PDBo:referenced_by_struct_sheet_range ?sheet_range_1 . ?sheet_range_1 PDBo:struct_sheet_range.id ?sheet_range_1_id . ?sheet_range_1 PDBo:struct_sheet_range.beg_auth_seq_id ?sheet_range_1_beg . ?sheet_range_1 PDBo:struct_sheet_range.end_auth_seq_id ?sheet_range_1_end . ?sheet PDBo:referenced_by_struct_sheet_range ?sheet_range_2 . ?sheet_range_2 PDBo:struct_sheet_range.id ?sheet_range_2_id . ?sheet_range_2 PDBo:struct_sheet_range.beg_auth_seq_id ?sheet_range_2_beg . ?sheet_range_2 PDBo:struct_sheet_range.end_auth_seq_id ?sheet_range_2_end . BIND((xsd:integer(?sheet_range_2_beg) - xsd:integer(?sheet_range_1_end)) AS ?dist1) BIND((xsd:integer(?sheet_range_1_beg) - xsd:integer(?sheet_range_2_end)) AS ?dist2) FILTER((?dist1 < 6 && ?dist1 > 1) || (?dist2 < 6 && ?dist2 > 1)) } LIMIT 1000
タンパク・リガンド相互作用
- ChEMBL RDF に対して、UniProt URI - Ligand - 生物種 - 文献 を取得
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX cco: <http://rdf.ebi.ac.uk/terms/chembl#>
SELECT DISTINCT ?up ?compound ?taxon ?pmid
WHERE {
?doc cco:hasAssay ?assay .
?assay cco:hasTarget ?target .
?assay cco:hasActivity ?activity .
?target cco:hasTargetComponent ?component .
?target cco:taxonomy ?taxon .
?component cco:targetCmptXref ?up .
?doc bibo:pmid ?pmid .
?doc cco:hasMolecule ?molecule .
?molecule cco:moleculeXref ?compound
FILTER(STRSTARTS(STR(?compound), "http://pubchem.ncbi.nlm.nih.gov/compound/"))
FILTER(STRSTARTS(STR(?up), "http://purl.uniprot.org/"))
FILTER(STRSTARTS(STR(?taxon), "http://identifiers.org/taxonomy"))
} LIMIT 100
- run sparql-support
- wwPDB RDF に対して、UniProt URI - Ligand - 生物種 - 文献 を取得
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX PDBo: <https://rdf.wwpdb.org/schema/pdbx-v50.owl#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT DISTINCT ?up ?compound ?taxon ?pubmed
FROM <http://rdf.integbio.jp/dataset/pdbj>
WHERE {
?pdb a PDBo:datablock .
?pdb dcterms:identifier ?pdb_id .
?pdb PDBo:has_entityCategory ?entity_category .
?entity_category PDBo:has_entity ?entity .
?entity PDBo:referenced_by_struct_ref ?struct_ref .
?struct_ref PDBo:link_to_uniprot ?up .
?entity PDBo:referenced_by_entity_src_gen ?entity_src_gen .
?entity_src_gen PDBo:entity_src_gen.pdbx_gene_src_ncbi_taxonomy_id ?ncbi_taxon .
?entity_src_gen rdfs:seeAlso ?taxon .
FILTER(REGEX(?taxon, STR(?ncbi_taxon)))
?entity_nonpoly PDBo:of_datablock ?pdb .
?entity_nonpoly PDBo:pdbx_entity_nonpoly.comp_id ?comp_id .
BIND(CONCAT("https://rdf.wwpdb.org/pdb/", ?pdb_id) AS ?comp_prefix)
BIND(CONCAT(?comp_prefix, "/chem_comp/") AS ?comp_prefix_2)
BIND(CONCAT(?comp_prefix_2, ?comp_id) AS ?compound)
?pdb PDBo:has_citationCategory ?citation_category .
?citation_category PDBo:has_citation ?citation .
?citation dcterms:references ?pubmed .
} LIMIT 100
- run sparql-support
- 数値を一定の階級幅(bins)で数え上げるクエリ
- UniProt を質量で分類して数えるクエリ
## endpoint https://integbio.jp/rdf/mirror/uniprot/sparql
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT ?mass_2 ?label (COUNT(?protein) AS ?EntryNumber)
WHERE {
{
SELECT ?protein ?mass
WHERE {
?protein a up:Protein .
?protein up:organism taxon:9606 .
?protein up:proteome ?proteome .
?protein up:sequence ?sequence .
?sequence up:mass ?mass .
FILTER(REGEX(STR(?proteome), "UP000005640"))
}
}
# BIND ( FLOOR(xsd:float(?mass) / 10000) AS ?mass_2) # (a <= ?x && ?x < b)
# BIND ( CONCAT (?mass_2 * 10, " - ", (?mass_2 + 1) * 10, " kDa") AS ?label)
BIND ( CEIL(xsd:float(?mass) / 10000) AS ?mass_2) # (a < ?x && ?x <= b)
BIND ( CONCAT ((?mass_2 - 1) * 10, " - ", ?mass_2 * 10, " kDa") AS ?label)
}
ORDER BY ?mass_2
- run sparql-support
タンパク・遺伝子相互作用
- REACTOME RDF に対して、BiochemicalReaction・遺伝子・タンパク(遺伝子に作用するもの)を取得
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX dbpedia2: <http://dbpedia.org/property/> PREFIX dbpedia: <http://dbpedia.org/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX biopax3: <http://www.biopax.org/release/biopax-level3.owl#> SELECT DISTINCT ?reaction ?ensembl ?uniprot FROM <http://rdf.ebi.ac.uk/dataset/reactome> WHERE { ?reaction a biopax3:BiochemicalReaction . ?reaction biopax3:left ?gene . ?reaction biopax3:right ?protein . ?gene a biopax3:Dna . ?gene biopax3:entityReference ?ensembl . ?protein a biopax3:Protein . ?protein biopax3:entityReference ?uniprot }
- run sparql-support
Ortholog Cluster
オーソログDB (KEGG OC) のクラスタサイズの分布
## endpoint https://www.genome.jp/oc/proxy/sparql
BASE <http://rdf.genome.jp/keggoc/2017-10-10/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX orth: <http://purl.org/net/orth#>
PREFIX void: <http://rdfs.org/ns/void#>
SELECT ?size (COUNT (?size) AS ?num)
WHERE {
{
SELECT ?oc (COUNT (?genes) AS ?size)
WHERE {
?oc void:inDataset/rdfs:label "Cellular_organisms" ;
orth:hasHomologous+ ?genes .
?genes a orth:Gene .
}
}
}
ORDER BY DESC (?num)
- run sparql-support
種間共通クラスタ(KEGG OC)の数(ベン図)
## endpoint https://www.genome.jp/oc/proxy/sparql
BASE <http://rdf.genome.jp/keggoc/2017-10-10/>
PREFIX orth: <http://purl.org/net/orth#>
PREFIX tax: <http://identifiers.org/taxonomy/>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?orgs (COUNT (?orgs) AS ?c) ?name
WHERE {
{
SELECT ?oc (GROUP_CONCAT(REPLACE(STR(?tax), tax:, "") ; separator=' ') AS ?orgs) (SAMPLE(?org) AS ?name)
WHERE {
{
SELECT DISTINCT ?oc ?tax ?org
WHERE {
VALUES ?tax { tax:9606 tax:9598 tax:10090 tax:10116 }
?oc orth:hasHomologous+/obo:RO_0002162 ?tax ;
void:inDataset/rdfs:label "Cellular_organisms";
a orth:OrthologsCluster .
?tax ^rdfs:seeAlso/skos:altLabel ?org .
} ORDER BY ?tax
}
}
}
}
- run sparql-support