CREATE OR REPLACE VIEW itim_search_v AS
SELECT IG.pseq_id,IG.start as ig_start,IG.stop as ig_stop,IG.score,IG.eval,
TM.start as tm_start,TM.stop as tm_stop,
ITIM.start as itim_start,ITIM.stop as itim_stop
FROM pahmm_current_pfam_v IG
JOIN pftmhmm_tms_v TM ON IG.pseq_id=TM.pseq_id AND IG.stop<TM.start
JOIN pfregexp_v ITIM ON TM.pseq_id=ITIM.pseq_id AND TM.stop<ITIM.start
WHERE IG.name='ig'
AND IG.eval<1e-2
AND ITIM.acc='MOD_TYR_ITIM';
The models in the above query are:
Identifying ITIM proteins is now a matter of selecting from that view:
eg=> SELECT DISTINCT ON (pseq_id) *,best_annotation(pseq_id)
FROM itim_search_v WHERE eval<1e-2 ORDER BY pseq_id;
pseq_id | ig_start | ig_stop | score | eval | tm_start | tm_stop | itim_start | itim_stop |
---------+----------+---------+-------+---------+----------+---------+------------+-----------+--------------------------------------------------------------------------------------------------
234 | 157 | 215 | 24 | 0.00068 | 440 | 462 | 518 | 523 | UniProtKB/Swiss-Prot:SIGL5_HUMAN (RecName: Full=Sialic acid-binding Ig-like lectin 5; Short=Sigle
254 | 158 | 213 | 36 | 1.9e-07 | 284 | 306 | 386 | 391 | UniProtKB/Swiss-Prot:VSIG4_HUMAN (RecName: Full=V-set and immunoglobulin domain-containing protei
368 | 49 | 119 | 32 | 3.3e-06 | 450 | 472 | 632 | 637 | RefSeq:NP_068713.2 (AXL receptor tyrosine kinase isoform 1 [Homo sapiens].)
...
itim_search_v are
non-specific, the conjunction of all three is much rarer. Nonetheless, we
can improve our confidence in the human candidates by corroborating them
with chimp or mouse orthologs from HomoloGene that have similar domain
composition.
For speed, we'll first make a temporary materialized table of orthologs using the existing homologene_orthologs_v view.
-- build a table of homologene ortholog pairs from -- the existing view CREATE TABLE homologene_orthologs_mv AS SELECT * FROM homologene_orthologs_v ; CREATE INDEX homologene_orthologs_mv_q ON homologene_orthologs_mv (q_pseq_id ); CREATE INDEX homologene_orthologs_mv_qt ON homologene_orthologs_mv (q_tax_id ); CREATE INDEX homologene_orthologs_mv_t ON homologene_orthologs_mv (t_pseq_id ); CREATE INDEX homologene_orthologs_mv_tt ON homologene_orthologs_mv (t_tax_id ); ANALYZE homologene_orthologs_mv ;
Now, we can use the materialized ortholog data and our canonical ITIM query from the previous section to define a new view to search for Human ITIM proteins and orthologs.
CREATE OR REPLACE VIEW itim_orthologs_search_v AS
SELECT DISTINCT ON (OP.q_pseq_id,OP.t_pseq_id,OP.t_tax_id)
OP.q_pseq_id as h_pseq_id,
HI.ig_start,HI.ig_stop,HI.score,HI.eval,HI.tm_start,HI.tm_stop,HI.itim_start,HI.itim_stop,
OP.t_pseq_id as os_pseq_id,OP.t_tax_id as os_tax_id,tax_id2gs(OP.t_tax_id) as "os_gs"
-- OP: HomoloGene ortholog pairs
FROM homologene_orthologs_mv OP
-- HI & HA: Human ITIM and Human Annotation
JOIN itim_search_v HI ON OP.q_pseq_id=HI.pseq_id
-- OI & OA: Ortholog ITIM and Human Annotation
LEFT JOIN itim_search_v OI ON OP.t_pseq_id=OI.pseq_id
WHERE OP.q_tax_id=gs2tax_id('HUMAN')
ORDER BY OP.q_pseq_id,OP.t_pseq_id,OP.t_tax_id
;
eg=> create table itim_ortho_search as select * from itim_orthologs_search_v where eval<1;
eg=> select * from itim_ortho_search ;
h_pseq_id | ig_start | ig_stop | score | eval | tm_start | tm_stop | itim_start | itim_stop | os_pseq_id | os_tax_id | os_gs
-----------+----------+---------+-------+---------+----------+---------+------------+-----------+------------+-----------+-------
234 | 157 | 215 | 24 | 0.00068 | 440 | 462 | 518 | 523 | 1243401 | 10090 | MOUSE
234 | 262 | 316 | 30 | 7.4e-06 | 440 | 462 | 518 | 523 | 5079208 | 9598 | PANTR
234 | 262 | 316 | 30 | 7.4e-06 | 440 | 462 | 518 | 523 | 5605704 | 10116 | RAT
234 | 157 | 215 | 24 | 0.00068 | 440 | 462 | 518 | 523 | 7203647 | 9615 | CANFA
254 | 158 | 213 | 36 | 1.9e-07 | 284 | 306 | 386 | 391 | 5049591 | 9031 | CHICK
254 | 158 | 213 | 36 | 1.9e-07 | 284 | 306 | 386 | 391 | 5081221 | 9598 | PANTR
[ 151 rows total ]
eg=> CREATE OR REPLACE VIEW itim_ecds_v AS
SELECT count(*),ITIM.acc,ITIM.name,ITIM.descr
FROM pahmm_current_pfam_v IG
JOIN pftmhmm_tms_v TM ON IG.pseq_id=TM.pseq_id AND IG.stop<TM.start
JOIN pfregexp_v ITIM ON TM.pseq_id=ITIM.pseq_id AND TM.stop<ITIM.start
WHERE IG.eval<1e-2 AND ITIM.acc='MOD_TYR_ITIM'
GROUP BY ITIM.acc,ITIM.name,ITIM.descr;
eg=> select * from itim_ecds_v ;