src/share/vm/opto/compile.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 6706829 Sdiff src/share/vm/opto

src/share/vm/opto/compile.cpp

Print this page




1950     fpu.inc_java_call_count(); // Count java call site;
1951   case Op_CallRuntime:
1952   case Op_CallLeaf:
1953   case Op_CallLeafNoFP: {
1954     assert( n->is_Call(), "" );
1955     CallNode *call = n->as_Call();
1956     // Count call sites where the FP mode bit would have to be flipped.
1957     // Do not count uncommon runtime calls:
1958     // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
1959     // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
1960     if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
1961       fpu.inc_call_count();   // Count the call site
1962     } else {                  // See if uncommon argument is shared
1963       Node *n = call->in(TypeFunc::Parms);
1964       int nop = n->Opcode();
1965       // Clone shared simple arguments to uncommon calls, item (1).
1966       if( n->outcnt() > 1 &&
1967           !n->is_Proj() &&
1968           nop != Op_CreateEx &&
1969           nop != Op_CheckCastPP &&

1970           !n->is_Mem() ) {
1971         Node *x = n->clone();
1972         call->set_req( TypeFunc::Parms, x );
1973       }
1974     }
1975     break;
1976   }
1977 
1978   case Op_StoreD:
1979   case Op_LoadD:
1980   case Op_LoadD_unaligned:
1981     fpu.inc_double_count();
1982     goto handle_mem;
1983   case Op_StoreF:
1984   case Op_LoadF:
1985     fpu.inc_float_count();
1986     goto handle_mem;
1987 
1988   case Op_StoreB:
1989   case Op_StoreC:


2058         if (nn != NULL) {
2059           // Decode a narrow oop to match address
2060           // [R12 + narrow_oop_reg<<3 + offset]
2061           nn = new (C,  2) DecodeNNode(nn, t);
2062           n->set_req(AddPNode::Base, nn);
2063           n->set_req(AddPNode::Address, nn);
2064           if (addp->outcnt() == 0) {
2065             addp->disconnect_inputs(NULL);
2066           }
2067         }
2068       }
2069     }
2070 #endif
2071     break;
2072   }
2073 
2074 #ifdef _LP64
2075   case Op_CmpP:
2076     // Do this transformation here to preserve CmpPNode::sub() and
2077     // other TypePtr related Ideal optimizations (for example, ptr nullness).
2078     if( n->in(1)->is_DecodeN() ) {








2079       Compile* C = Compile::current();
2080       Node* in2 = NULL;
2081       if( n->in(2)->is_DecodeN() ) {
2082         in2 = n->in(2)->in(1);
2083       } else if ( n->in(2)->Opcode() == Op_ConP ) {
2084         const Type* t = n->in(2)->bottom_type();
2085         if (t == TypePtr::NULL_PTR) {
2086           Node *in1 = n->in(1);
2087           if (Matcher::clone_shift_expressions) {
2088             // x86, ARM and friends can handle 2 adds in addressing mode.
2089             // Decode a narrow oop and do implicit NULL check in address
2090             // [R12 + narrow_oop_reg<<3 + offset]
2091             in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2092           } else {
2093             // Don't replace CmpP(o ,null) if 'o' is used in AddP
2094             // to generate implicit NULL check on Sparc where
2095             // narrow oops can't be used in address.
2096             uint i = 0;
2097             for (; i < in1->outcnt(); i++) {
2098               if (in1->raw_out(i)->is_AddP())
2099                 break;
2100             }
2101             if (i >= in1->outcnt()) {
2102               in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2103             }
2104           }
2105         } else if (t->isa_oopptr()) {
2106           in2 = ConNode::make(C, t->make_narrowoop());
2107         }
2108       }
2109       if( in2 != NULL ) {
2110         Node* cmpN = new (C, 3) CmpNNode(n->in(1)->in(1), in2);
2111         n->subsume_by( cmpN );


2112       }


2113     }


2114     break;
2115 #endif
2116 
2117   case Op_ModI:
2118     if (UseDivMod) {
2119       // Check if a%b and a/b both exist
2120       Node* d = n->find_similar(Op_DivI);
2121       if (d) {
2122         // Replace them with a fused divmod if supported
2123         Compile* C = Compile::current();
2124         if (Matcher::has_match_rule(Op_DivModI)) {
2125           DivModINode* divmod = DivModINode::make(C, n);
2126           d->subsume_by(divmod->div_proj());
2127           n->subsume_by(divmod->mod_proj());
2128         } else {
2129           // replace a%b with a-((a/b)*b)
2130           Node* mult = new (C, 3) MulINode(d, d->in(2));
2131           Node* sub  = new (C, 3) SubINode(d->in(1), mult);
2132           n->subsume_by( sub );
2133         }


2197       PackNode* p = (PackNode*) n;
2198       Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
2199       n->subsume_by(btp);
2200     }
2201     break;
2202   default:
2203     assert( !n->is_Call(), "" );
2204     assert( !n->is_Mem(), "" );
2205     break;
2206   }
2207 
2208   // Collect CFG split points
2209   if (n->is_MultiBranch())
2210     fpu._tests.push(n);
2211 }
2212 
2213 //------------------------------final_graph_reshaping_walk---------------------
2214 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
2215 // requires that the walk visits a node's inputs before visiting the node.
2216 static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {



2217   fpu._visited.set(root->_idx); // first, mark node as visited
2218   uint cnt = root->req();
2219   Node *n = root;
2220   uint  i = 0;
2221   while (true) {
2222     if (i < cnt) {
2223       // Place all non-visited non-null inputs onto stack
2224       Node* m = n->in(i);
2225       ++i;
2226       if (m != NULL && !fpu._visited.test_set(m->_idx)) {


2227         cnt = m->req();
2228         nstack.push(n, i); // put on stack parent and next input's index
2229         n = m;
2230         i = 0;
2231       }
2232     } else {
2233       // Now do post-visit work
2234       final_graph_reshaping_impl( n, fpu );
2235       if (nstack.is_empty())
2236         break;             // finished
2237       n = nstack.node();   // Get node from stack
2238       cnt = n->req();
2239       i = nstack.index();
2240       nstack.pop();        // Shift to the next node on stack
2241     }
2242   }



































2243 }
2244 
2245 //------------------------------final_graph_reshaping--------------------------
2246 // Final Graph Reshaping.
2247 //
2248 // (1) Clone simple inputs to uncommon calls, so they can be scheduled late
2249 //     and not commoned up and forced early.  Must come after regular
2250 //     optimizations to avoid GVN undoing the cloning.  Clone constant
2251 //     inputs to Loop Phis; these will be split by the allocator anyways.
2252 //     Remove Opaque nodes.
2253 // (2) Move last-uses by commutative operations to the left input to encourage
2254 //     Intel update-in-place two-address operations and better register usage
2255 //     on RISCs.  Must come after regular optimizations to avoid GVN Ideal
2256 //     calls canonicalizing them back.
2257 // (3) Count the number of double-precision FP ops, single-precision FP ops
2258 //     and call sites.  On Intel, we can get correct rounding either by
2259 //     forcing singles to memory (requires extra stores and loads after each
2260 //     FP bytecode) or we can set a rounding mode bit (requires setting and
2261 //     clearing the mode bit around call sites).  The mode bit is only used
2262 //     if the relative frequency of single FP ops to calls is low enough.




1950     fpu.inc_java_call_count(); // Count java call site;
1951   case Op_CallRuntime:
1952   case Op_CallLeaf:
1953   case Op_CallLeafNoFP: {
1954     assert( n->is_Call(), "" );
1955     CallNode *call = n->as_Call();
1956     // Count call sites where the FP mode bit would have to be flipped.
1957     // Do not count uncommon runtime calls:
1958     // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
1959     // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
1960     if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
1961       fpu.inc_call_count();   // Count the call site
1962     } else {                  // See if uncommon argument is shared
1963       Node *n = call->in(TypeFunc::Parms);
1964       int nop = n->Opcode();
1965       // Clone shared simple arguments to uncommon calls, item (1).
1966       if( n->outcnt() > 1 &&
1967           !n->is_Proj() &&
1968           nop != Op_CreateEx &&
1969           nop != Op_CheckCastPP &&
1970           nop != Op_DecodeN &&
1971           !n->is_Mem() ) {
1972         Node *x = n->clone();
1973         call->set_req( TypeFunc::Parms, x );
1974       }
1975     }
1976     break;
1977   }
1978 
1979   case Op_StoreD:
1980   case Op_LoadD:
1981   case Op_LoadD_unaligned:
1982     fpu.inc_double_count();
1983     goto handle_mem;
1984   case Op_StoreF:
1985   case Op_LoadF:
1986     fpu.inc_float_count();
1987     goto handle_mem;
1988 
1989   case Op_StoreB:
1990   case Op_StoreC:


2059         if (nn != NULL) {
2060           // Decode a narrow oop to match address
2061           // [R12 + narrow_oop_reg<<3 + offset]
2062           nn = new (C,  2) DecodeNNode(nn, t);
2063           n->set_req(AddPNode::Base, nn);
2064           n->set_req(AddPNode::Address, nn);
2065           if (addp->outcnt() == 0) {
2066             addp->disconnect_inputs(NULL);
2067           }
2068         }
2069       }
2070     }
2071 #endif
2072     break;
2073   }
2074 
2075 #ifdef _LP64
2076   case Op_CmpP:
2077     // Do this transformation here to preserve CmpPNode::sub() and
2078     // other TypePtr related Ideal optimizations (for example, ptr nullness).
2079     if (n->in(1)->is_DecodeN() || n->in(2)->is_DecodeN()) {
2080       Node* in1 = n->in(1);
2081       Node* in2 = n->in(2);
2082       if (!in1->is_DecodeN()) {
2083         in2 = in1;
2084         in1 = n->in(2);
2085       }
2086       assert(in1->is_DecodeN(), "sanity");
2087 
2088       Compile* C = Compile::current();
2089       Node* new_in2 = NULL;
2090       if (in2->is_DecodeN()) {
2091         new_in2 = in2->in(1);
2092       } else if (in2->Opcode() == Op_ConP) {
2093         const Type* t = in2->bottom_type();
2094         if (t == TypePtr::NULL_PTR) {

2095           if (Matcher::clone_shift_expressions) {
2096             // x86, ARM and friends can handle 2 adds in addressing mode.
2097             // Decode a narrow oop and do implicit NULL check in address
2098             // [R12 + narrow_oop_reg<<3 + offset]
2099             new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2100           } else {
2101             // Don't replace CmpP(o ,null) if 'o' is used in AddP
2102             // to generate implicit NULL check on Sparc where
2103             // narrow oops can't be used in address.
2104             uint i = 0;
2105             for (; i < in1->outcnt(); i++) {
2106               if (in1->raw_out(i)->is_AddP())
2107                 break;
2108             }
2109             if (i >= in1->outcnt()) {
2110               new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2111             }
2112           }
2113         } else if (t->isa_oopptr()) {
2114           new_in2 = ConNode::make(C, t->make_narrowoop());
2115         }
2116       }
2117       if (new_in2 != NULL) {
2118         Node* cmpN = new (C, 3) CmpNNode(in1->in(1), new_in2);
2119         n->subsume_by( cmpN );
2120         if (in1->outcnt() == 0) {
2121           in1->disconnect_inputs(NULL);
2122         }
2123         if (in2->outcnt() == 0) {
2124           in2->disconnect_inputs(NULL);
2125         }
2126       }
2127     }
2128     break;
2129 #endif
2130 
2131   case Op_ModI:
2132     if (UseDivMod) {
2133       // Check if a%b and a/b both exist
2134       Node* d = n->find_similar(Op_DivI);
2135       if (d) {
2136         // Replace them with a fused divmod if supported
2137         Compile* C = Compile::current();
2138         if (Matcher::has_match_rule(Op_DivModI)) {
2139           DivModINode* divmod = DivModINode::make(C, n);
2140           d->subsume_by(divmod->div_proj());
2141           n->subsume_by(divmod->mod_proj());
2142         } else {
2143           // replace a%b with a-((a/b)*b)
2144           Node* mult = new (C, 3) MulINode(d, d->in(2));
2145           Node* sub  = new (C, 3) SubINode(d->in(1), mult);
2146           n->subsume_by( sub );
2147         }


2211       PackNode* p = (PackNode*) n;
2212       Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
2213       n->subsume_by(btp);
2214     }
2215     break;
2216   default:
2217     assert( !n->is_Call(), "" );
2218     assert( !n->is_Mem(), "" );
2219     break;
2220   }
2221 
2222   // Collect CFG split points
2223   if (n->is_MultiBranch())
2224     fpu._tests.push(n);
2225 }
2226 
2227 //------------------------------final_graph_reshaping_walk---------------------
2228 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
2229 // requires that the walk visits a node's inputs before visiting the node.
2230 static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
2231   ResourceArea *area = Thread::current()->resource_area();
2232   Unique_Node_List sfpt(area);
2233 
2234   fpu._visited.set(root->_idx); // first, mark node as visited
2235   uint cnt = root->req();
2236   Node *n = root;
2237   uint  i = 0;
2238   while (true) {
2239     if (i < cnt) {
2240       // Place all non-visited non-null inputs onto stack
2241       Node* m = n->in(i);
2242       ++i;
2243       if (m != NULL && !fpu._visited.test_set(m->_idx)) {
2244         if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
2245           sfpt.push(m);
2246         cnt = m->req();
2247         nstack.push(n, i); // put on stack parent and next input's index
2248         n = m;
2249         i = 0;
2250       }
2251     } else {
2252       // Now do post-visit work
2253       final_graph_reshaping_impl( n, fpu );
2254       if (nstack.is_empty())
2255         break;             // finished
2256       n = nstack.node();   // Get node from stack
2257       cnt = n->req();
2258       i = nstack.index();
2259       nstack.pop();        // Shift to the next node on stack
2260     }
2261   }
2262 
2263   // Go over safepoints nodes to skip DecodeN nodes for debug edges.
2264   // It could be done for an uncommon traps or any safepoints/calls
2265   // if the DecodeN node is referenced only in a debug info.
2266   while (sfpt.size() > 0) {
2267     n = sfpt.pop();
2268     JVMState *jvms = n->as_SafePoint()->jvms();
2269     assert(jvms != NULL, "sanity");
2270     int start = jvms->debug_start();
2271     int end   = n->req();
2272     bool is_uncommon = (n->is_CallStaticJava() &&
2273                         n->as_CallStaticJava()->uncommon_trap_request() != 0);
2274     for (int j = start; j < end; j++) {
2275       Node* in = n->in(j);
2276       if (in->is_DecodeN()) {
2277         bool safe_to_skip = true;
2278         if (!is_uncommon ) {
2279           // Is it safe to skip?
2280           for (uint i = 0; i < in->outcnt(); i++) {
2281             Node* u = in->raw_out(i);
2282             if (!u->is_SafePoint() ||
2283                  u->is_Call() && u->as_Call()->has_non_debug_use(n)) {
2284               safe_to_skip = false;
2285             }
2286           }
2287         }
2288         if (safe_to_skip) {
2289           n->set_req(j, in->in(1));
2290         }
2291         if (in->outcnt() == 0) {
2292           in->disconnect_inputs(NULL);
2293         }
2294       }
2295     }
2296   }
2297 }
2298 
2299 //------------------------------final_graph_reshaping--------------------------
2300 // Final Graph Reshaping.
2301 //
2302 // (1) Clone simple inputs to uncommon calls, so they can be scheduled late
2303 //     and not commoned up and forced early.  Must come after regular
2304 //     optimizations to avoid GVN undoing the cloning.  Clone constant
2305 //     inputs to Loop Phis; these will be split by the allocator anyways.
2306 //     Remove Opaque nodes.
2307 // (2) Move last-uses by commutative operations to the left input to encourage
2308 //     Intel update-in-place two-address operations and better register usage
2309 //     on RISCs.  Must come after regular optimizations to avoid GVN Ideal
2310 //     calls canonicalizing them back.
2311 // (3) Count the number of double-precision FP ops, single-precision FP ops
2312 //     and call sites.  On Intel, we can get correct rounding either by
2313 //     forcing singles to memory (requires extra stores and loads after each
2314 //     FP bytecode) or we can set a rounding mode bit (requires setting and
2315 //     clearing the mode bit around call sites).  The mode bit is only used
2316 //     if the relative frequency of single FP ops to calls is low enough.


src/share/vm/opto/compile.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File