

2013年8月21日 (水) 10:45時点におけるTfuji (トーク | 投稿記録)による版
(差分) ←前の版 | 最新版 (差分) | 次の版→ (差分)
移動: 案内, 検索




% wget

% rapper -i rdfxml -o turtle P16033.rdf > P16033.ttl 
rapper: Parsing URI file:///Users/ktym/P16033.rdf with parser rdfxml
rapper: Serializing with serializer turtle
rapper: Parsing returned 702 triples

% sparql.rb query '
prefix up: <>   
prefix tax: <>
select *
where {                      
  ?s up:locusName "slr1311" .
  ?s ?p ?o .

s	p	o
_5031363033330011	<>	<>
_5031363033330011	<>	slr1311
_5031363033330011	<>	psbA2
_5031363033330011	<>	psbA-2


LOCUS       NC_000911            3573470 bp    DNA     circular BCT 19-JAN-2012
DEFINITION  Synechocystis sp. PCC 6803 chromosome, complete genome.
VERSION     NC_000911.1  GI:16329170
DBLINK      Project: 57659
SOURCE      Synechocystis sp. PCC 6803
  ORGANISM  Synechocystis sp. PCC 6803
            Bacteria; Cyanobacteria; Chroococcales; Synechocystis.
REFERENCE   1  (bases 1 to 3573470)
  AUTHORS   Thelwell,C., Robinson,N.J. and Turner-Cavet,J.S.
  TITLE     An SmtB-like repressor from Synechocystis PCC 6803 regulates a zinc
  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 95 (18), 10728-10733 (1998)
   PUBMED   9724772
REFERENCE   2  (bases 1 to 3573470)
  AUTHORS   Kaneko,T., Sato,S., Kotani,H., Tanaka,A., Asamizu,E., Nakamura,Y.,
            Miyajima,N., Hirosawa,M., Sugiura,M., Sasamoto,S., Kimura,T.,
            Hosouchi,T., Matsuno,A., Muraki,A., Nakazaki,N., Naruo,K.,
            Okumura,S., Shimpo,S., Takeuchi,C., Wada,T., Watanabe,A.,
            Yamada,M., Yasuda,M. and Tabata,S.
  TITLE     Sequence analysis of the genome of the unicellular cyanobacterium
            Synechocystis sp. strain PCC6803. II. Sequence determination of the
            entire genome and assignment of potential protein-coding regions
  JOURNAL   DNA Res. 3 (3), 109-136 (1996)
   PUBMED   8905231
REFERENCE   3  (bases 1 to 3573470)
  AUTHORS   Kaneko,T., Tanaka,A., Sato,S., Kotani,H., Sazuka,T., Miyajima,N.,
            Sugiura,M. and Tabata,S.
  TITLE     Sequence analysis of the genome of the unicellular cyanobacterium
            Synechocystis sp. strain PCC6803. I. Sequence features in the 1 Mb
            region from map positions 64% to 92% of the genome
  JOURNAL   DNA Res. 2 (4), 153-166 (1995)
   PUBMED   8590279
REFERENCE   4  (bases 1 to 3573470)
  CONSRTM   NCBI Genome Project
  TITLE     Direct Submission
  JOURNAL   Submitted (10-SEP-2004) National Center for Biotechnology
            Information, NIH, Bethesda, MD 20894, USA
REFERENCE   5  (bases 1 to 3573470)
  AUTHORS   Tabata,S.
  TITLE     Direct Submission
  JOURNAL   Submitted (28-JUN-1996) Kazusa DNA Research Institute, The First
            Laboratory for Plant Gene Research, Yana 1532-3, Kisarazu, Chiba
            292-0812, Japan
COMMENT     PROVISIONAL REFSEQ: This record has not yet been subject to final
            NCBI review. The reference sequence was derived from BA000022.
            COMPLETENESS: full length.
FEATURES             Location/Qualifiers
     source          1..3573470
                     /organism="Synechocystis sp. PCC 6803"
                     /mol_type="genomic DNA"
                     /strain="PCC 6803"


     gene            7229..8311
     CDS             7229..8311
                     /product="photosystem II D1 protein"


% less wget_prokaryotes/PRJNA57659/NC_000911.1

% less wget_prokaryotes.v5/PRJNA57659/NC_000911.1.ttl

in Virtuoso (w/o predicate paths)

% sparql.rb query '
prefix obo: <>
prefix faldo: <>
prefix insdc: <>
prefix up: <>
prefix tax: <>
select *
from <http://v5.genome.db/>
where {
  ?seq rdfs:seeAlso <> .
  ?gene obo:so_part_of ?seq .
  ?cds obo:so_part_of ?gene .
  ?cds a obo:SO_0000316 .     # CDS
  ?cds rdfs:label "slr1311" .
#  ?cds ?p ?o .
  ?gene ?p2 ?o2 .
} limit 100'

in OWLIM (w/ predicate paths)


% sparql.rb query '
prefix obo: <>
prefix faldo: <>
prefix insdc: <>
select *
where {
  ?seq rdfs:seeAlso <> .
  ?s obo:so_part_of+ ?seq .
  ?s a obo:SO_0000316 .
  ?s rdfs:label "slr1311" .
  ?s ?p ?o .
seq	s	p	o
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	<>
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	<urn:uuid:3114165b-ffee-4816-b9bf-811dbbcb9b06>
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	<>
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	<>
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	<>
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	slr1311
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	psbA2
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	slr1311
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	<urn:uuid:8683a33d-e496-43da-a4ce-a454faeb228c>
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	node9
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	1
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	11
<urn:uuid:182f171a-7928-4324-8d41-f3e820a872fd>	<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116>	<>	photosystem II D1 protein
% sparql.rb query '
select *
where {
  <> ?p ?o .
p	o
<>	<>
<>	Protein:NP_439906.1

in UniProt (P16033.ttl)

    :database <> ;
    a :Resource ;
    rdfs:comment "NC_000911.1" .
    :alternativeName <>, <
A8655BADDABAED4C601851DBB74C938735F74B87EF124B1360F> ;
    :annotation <>, <
6B44C0E58BA2D1BAB3AFA7B90690213AD739E0333AEA99>, <>, <
7B9329143014B76A05C3B0ECE448D7BA2C7C58144C48C4C1C0B95B4B110272E5D0473DD9F0C506B>, <>, <http://purl.unip>, <
B99B212016071>, <>, <
7AC4685BCE363F295FE2A7465ACBD490842C4E796DD070>, <>, <
A451EAD2C53B87C9F6EACCC8FAB4B4A394CD0BA8E7415680D5EBA00C9187BAB64E35B9632DEE4F7>, <>, <http://purl.unip>, <
274BC5DB8F146>, <>, <
1604269DC66845DAE6896B16890981E0D063B6B0790018>, <>, <
E232288948A178235B0A6BC4CB7321156210ABA4DDB093524BC71D117B96A46A49FD7B2533BA13C>, <>, <http://purl.unip>, <
07065325213B7>, <>, <
B96E45ABD9DC6D80FAD0498772BAE34F6D2ED257C12D93>, <>, <
>, <> ;
    :attribution <file:///Users/ktym/P16033.rdf#_5031363033330015>, <file:///Users/ktym/P16033.rdf#_503136303333004F>, <file:///Users/ktym/P16033.rdf#_5031363033330051>, <file:///Users/ktym/P16033.rdf#_5031363033330053>, <file:///U
sers/ktym/P16033.rdf#_5031363033330054>, <file:///Users/ktym/P16033.rdf#_5031363033330056>, <file:///Users/ktym/P16033.rdf#_5031363033330058>, <file:///Users/ktym/P16033.rdf#_503136303333005A>, <file:///Users/ktym/P16033.rdf#_50313
6303333005C>, <file:///Users/ktym/P16033.rdf#_503136303333005E>, <file:///Users/ktym/P16033.rdf#_5031363033330060> ;
    :citation <>, <>, <>, <>, <
05231>, <> ;
    :classifiedWith <>, <>, <>, <>, <>, <http://purl.uniprot.
org/go/0030096>, <>, <>, <>, <>, <>, <http://purl.unipr>, <>, <>, <>, <>, <>, <http://p> ;
    :created "1990-04-01"^^<> ;
    :encodedBy <file:///Users/ktym/P16033.rdf#_5031363033330011>, <file:///Users/ktym/P16033.rdf#_5031363033330012> ;
    :enzyme <> ;
    :existence :Evidence_at_Protein_Level_Existence ;
    :mnemonic "PSBA2_SYNY3" ;
    :modified "2012-11-28"^^<> ;
    :oldMnemonic "PSB2_SYNY3" ;
    :organism <> ;
    :recommendedName <> ;
    :reviewed true ;
    :sequence <> ;
    :version "111"^^<> ;
    a :Protein ;
    rdfs:seeAlso <>, <>, <>, <>, <
cds/CAA39472.1>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <h
ttp://>, <>, <>, <>, <>, <ht
tp://>, <>, <>, <>, <>, <
56>, <>, <>, <>, <>, <
_441550.1>, <>, <>, <>, <>, <>,
 <>, <> .

add some links between and UniProt

% cut -f 3 wget_prokaryotes.v5.ttl | grep ncbiprotein | perl -pe 's/ .*//' | perl -pe 's|(.._.*)>|$1> rdfs:seeAlso <$1> .|' >
<> rdfs:seeAlso <> .
% wc
  6485705  25942820 738098573


% sparql.rb query '
prefix obo: <>
prefix faldo: <>
prefix insdc: <>
prefix up: <>
prefix tax: <>
select count(?cds) as ?num_cds count(?gene) as ?num_genes
from <http://v5.genome.db/>
where {
  ?seq rdfs:seeAlso <> .
  ?gene obo:so_part_of ?seq .
  ?cds obo:so_part_of ?gene .
  ?cds a obo:SO_0000316 .     # CDS
num_cds num_genes
6745    6745
% sparql.rb query '
prefix obo: <>
prefix faldo: <>
prefix insdc: <>
prefix up: <>
prefix tax: <>
select ?cds ?label
from <http://v5.genome.db/>
where {
  ?seq rdfs:seeAlso <> .
  ?gene obo:so_part_of ?seq .
  ?cds obo:so_part_of ?gene .
  ?cds a obo:SO_0000316 .     # CDS
  ?cds rdfs:label ?label .
} limit 10'
cds     label
<urn:uuid:c73b48d2-e65a-43c5-86e1-26945386b6b4> sll5004
<urn:uuid:24ecccd5-bf11-48d0-948f-9bd89f370a91> slr5005
<urn:uuid:52ca1690-60ce-477f-bfe2-022de04c167a> sll5006
<urn:uuid:e551fa78-b90c-435f-b528-9a8b5bac8296> ssl5007
<urn:uuid:fa2a53e8-a500-4b8b-b7ab-5e0623cd72e5> ssl5008
<urn:uuid:125a0827-d81d-47c7-b13b-506cd49e2989> ssr5009
<urn:uuid:ce6e08ac-adbf-429b-a057-2a4284d3e1cd> ssr5011
<urn:uuid:c24d24cd-5a0d-499f-b899-a31415dc9d6b> sll5014
<urn:uuid:e03852ac-57d9-4d6b-b995-0b0ffa33e163> ssl5015
<urn:uuid:6cc65822-e4b1-40a4-82ac-df2d9cc24775> ssr5019
% sparql.rb query '
prefix obo: <>
prefix faldo: <>
prefix insdc: <>
prefix up: <>
prefix tax: <>
select ?cds ?l ?b ?e
from <http://v5.genome.db/>
where {
  ?seq rdfs:seeAlso <> .
  ?gene obo:so_part_of ?seq .
  ?cds obo:so_part_of ?gene .
  ?cds a obo:SO_0000316 .     # CDS
  ?cds rdfs:label "slr1311" .
  ?cds faldo:location ?location .
  ?location insdc:location_string ?l .
  ?location faldo:begin ?begin .
  ?location faldo:end ?end .
  ?begin faldo:position ?b .
  ?end faldo:position ?e .
} limit 100'
cds     l       b       e
<urn:uuid:aaf399d2-f84a-4feb-a689-966311a3b116> 7229..8311      7229    8311
% sparql.rb query '
prefix obo: <>
prefix faldo: <>
prefix insdc: <>
prefix up: <>
prefix tax: <>
select ?label ?l ?b ?e
from <http://v5.genome.db/>
where {
  ?seq rdfs:seeAlso <> .
  ?gene obo:so_part_of ?seq .
  ?cds obo:so_part_of ?gene .
  ?cds a obo:SO_0000316 .     # CDS
  ?cds rdfs:label ?label .
  ?cds faldo:location ?location .
  ?location insdc:location_string ?l .
  ?location faldo:begin ?begin .
  ?location faldo:end ?end .
  ?begin faldo:position ?b .
  ?end faldo:position ?e .
} limit 10'
label   l       b       e
ssl5001 complement(195..374)    195     374
sll5002 complement(574..921)    574     921
sll5003 complement(1136..1540)  1136    1540
slr5005 2528..13921     2528    13921
sll5006 complement(14579..15133)        14579   15133
ssl5008 complement(15639..15812)        15639   15812
slr5012 17880..18800    17880   18800
slr5013 18822..20513    18822   20513
sll5014 complement(20523..21479)        20523   21479
ssr5020 24181..24462    24181   24462
% sparql.rb query '
prefix obo: <>
prefix faldo: <>
prefix insdc: <>
prefix up: <>
prefix tax: <>
select ?label ?l ?b ?e
from <http://v5.genome.db/>
where {
  ?seq rdfs:seeAlso <> .
  ?gene obo:so_part_of ?seq .
  ?cds obo:so_part_of ?gene .
  ?cds a obo:SO_0000316 .     # CDS
  ?cds rdfs:label ?label .
  ?cds faldo:location ?location .
  ?location insdc:location_string ?l .
  ?location faldo:begin ?begin .
  ?location faldo:end ?end .
  ?begin faldo:position ?b .
  ?end faldo:position ?e .
  filter (?b > 10000 && ?e < 20000)
} limit 100'
label   l       b       e
sll5006 complement(14579..15133)        14579   15133
ssl5008 complement(15639..15812)        15639   15812
slr5012 17880..18800    17880   18800
slr7012 11524..12513    11524   12513
slr7013 12674..13438    12674   13438
ssl7021 complement(19594..19779)        19594   19779
slr8016 10998..11867    10998   11867
sll8017 complement(12618..13151)        12618   13151
sll8019 complement(15590..17317)        15590   17317
slr8021 18529..19311    18529   19311
slr8022 19356..19691    19356   19691