1950 fpu.inc_java_call_count(); // Count java call site;
1951 case Op_CallRuntime:
1952 case Op_CallLeaf:
1953 case Op_CallLeafNoFP: {
1954 assert( n->is_Call(), "" );
1955 CallNode *call = n->as_Call();
1956 // Count call sites where the FP mode bit would have to be flipped.
1957 // Do not count uncommon runtime calls:
1958 // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
1959 // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
1960 if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
1961 fpu.inc_call_count(); // Count the call site
1962 } else { // See if uncommon argument is shared
1963 Node *n = call->in(TypeFunc::Parms);
1964 int nop = n->Opcode();
1965 // Clone shared simple arguments to uncommon calls, item (1).
1966 if( n->outcnt() > 1 &&
1967 !n->is_Proj() &&
1968 nop != Op_CreateEx &&
1969 nop != Op_CheckCastPP &&
1970 !n->is_Mem() ) {
1971 Node *x = n->clone();
1972 call->set_req( TypeFunc::Parms, x );
1973 }
1974 }
1975 break;
1976 }
1977
1978 case Op_StoreD:
1979 case Op_LoadD:
1980 case Op_LoadD_unaligned:
1981 fpu.inc_double_count();
1982 goto handle_mem;
1983 case Op_StoreF:
1984 case Op_LoadF:
1985 fpu.inc_float_count();
1986 goto handle_mem;
1987
1988 case Op_StoreB:
1989 case Op_StoreC:
2058 if (nn != NULL) {
2059 // Decode a narrow oop to match address
2060 // [R12 + narrow_oop_reg<<3 + offset]
2061 nn = new (C, 2) DecodeNNode(nn, t);
2062 n->set_req(AddPNode::Base, nn);
2063 n->set_req(AddPNode::Address, nn);
2064 if (addp->outcnt() == 0) {
2065 addp->disconnect_inputs(NULL);
2066 }
2067 }
2068 }
2069 }
2070 #endif
2071 break;
2072 }
2073
2074 #ifdef _LP64
2075 case Op_CmpP:
2076 // Do this transformation here to preserve CmpPNode::sub() and
2077 // other TypePtr related Ideal optimizations (for example, ptr nullness).
2078 if( n->in(1)->is_DecodeN() ) {
2079 Compile* C = Compile::current();
2080 Node* in2 = NULL;
2081 if( n->in(2)->is_DecodeN() ) {
2082 in2 = n->in(2)->in(1);
2083 } else if ( n->in(2)->Opcode() == Op_ConP ) {
2084 const Type* t = n->in(2)->bottom_type();
2085 if (t == TypePtr::NULL_PTR) {
2086 Node *in1 = n->in(1);
2087 if (Matcher::clone_shift_expressions) {
2088 // x86, ARM and friends can handle 2 adds in addressing mode.
2089 // Decode a narrow oop and do implicit NULL check in address
2090 // [R12 + narrow_oop_reg<<3 + offset]
2091 in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2092 } else {
2093 // Don't replace CmpP(o ,null) if 'o' is used in AddP
2094 // to generate implicit NULL check on Sparc where
2095 // narrow oops can't be used in address.
2096 uint i = 0;
2097 for (; i < in1->outcnt(); i++) {
2098 if (in1->raw_out(i)->is_AddP())
2099 break;
2100 }
2101 if (i >= in1->outcnt()) {
2102 in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2103 }
2104 }
2105 } else if (t->isa_oopptr()) {
2106 in2 = ConNode::make(C, t->make_narrowoop());
2107 }
2108 }
2109 if( in2 != NULL ) {
2110 Node* cmpN = new (C, 3) CmpNNode(n->in(1)->in(1), in2);
2111 n->subsume_by( cmpN );
2112 }
2113 }
2114 break;
2115 #endif
2116
2117 case Op_ModI:
2118 if (UseDivMod) {
2119 // Check if a%b and a/b both exist
2120 Node* d = n->find_similar(Op_DivI);
2121 if (d) {
2122 // Replace them with a fused divmod if supported
2123 Compile* C = Compile::current();
2124 if (Matcher::has_match_rule(Op_DivModI)) {
2125 DivModINode* divmod = DivModINode::make(C, n);
2126 d->subsume_by(divmod->div_proj());
2127 n->subsume_by(divmod->mod_proj());
2128 } else {
2129 // replace a%b with a-((a/b)*b)
2130 Node* mult = new (C, 3) MulINode(d, d->in(2));
2131 Node* sub = new (C, 3) SubINode(d->in(1), mult);
2132 n->subsume_by( sub );
2133 }
2197 PackNode* p = (PackNode*) n;
2198 Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
2199 n->subsume_by(btp);
2200 }
2201 break;
2202 default:
2203 assert( !n->is_Call(), "" );
2204 assert( !n->is_Mem(), "" );
2205 break;
2206 }
2207
2208 // Collect CFG split points
2209 if (n->is_MultiBranch())
2210 fpu._tests.push(n);
2211 }
2212
2213 //------------------------------final_graph_reshaping_walk---------------------
2214 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
2215 // requires that the walk visits a node's inputs before visiting the node.
2216 static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
2217 fpu._visited.set(root->_idx); // first, mark node as visited
2218 uint cnt = root->req();
2219 Node *n = root;
2220 uint i = 0;
2221 while (true) {
2222 if (i < cnt) {
2223 // Place all non-visited non-null inputs onto stack
2224 Node* m = n->in(i);
2225 ++i;
2226 if (m != NULL && !fpu._visited.test_set(m->_idx)) {
2227 cnt = m->req();
2228 nstack.push(n, i); // put on stack parent and next input's index
2229 n = m;
2230 i = 0;
2231 }
2232 } else {
2233 // Now do post-visit work
2234 final_graph_reshaping_impl( n, fpu );
2235 if (nstack.is_empty())
2236 break; // finished
2237 n = nstack.node(); // Get node from stack
2238 cnt = n->req();
2239 i = nstack.index();
2240 nstack.pop(); // Shift to the next node on stack
2241 }
2242 }
2243 }
2244
2245 //------------------------------final_graph_reshaping--------------------------
2246 // Final Graph Reshaping.
2247 //
2248 // (1) Clone simple inputs to uncommon calls, so they can be scheduled late
2249 // and not commoned up and forced early. Must come after regular
2250 // optimizations to avoid GVN undoing the cloning. Clone constant
2251 // inputs to Loop Phis; these will be split by the allocator anyways.
2252 // Remove Opaque nodes.
2253 // (2) Move last-uses by commutative operations to the left input to encourage
2254 // Intel update-in-place two-address operations and better register usage
2255 // on RISCs. Must come after regular optimizations to avoid GVN Ideal
2256 // calls canonicalizing them back.
2257 // (3) Count the number of double-precision FP ops, single-precision FP ops
2258 // and call sites. On Intel, we can get correct rounding either by
2259 // forcing singles to memory (requires extra stores and loads after each
2260 // FP bytecode) or we can set a rounding mode bit (requires setting and
2261 // clearing the mode bit around call sites). The mode bit is only used
2262 // if the relative frequency of single FP ops to calls is low enough.
|
1950 fpu.inc_java_call_count(); // Count java call site;
1951 case Op_CallRuntime:
1952 case Op_CallLeaf:
1953 case Op_CallLeafNoFP: {
1954 assert( n->is_Call(), "" );
1955 CallNode *call = n->as_Call();
1956 // Count call sites where the FP mode bit would have to be flipped.
1957 // Do not count uncommon runtime calls:
1958 // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
1959 // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
1960 if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
1961 fpu.inc_call_count(); // Count the call site
1962 } else { // See if uncommon argument is shared
1963 Node *n = call->in(TypeFunc::Parms);
1964 int nop = n->Opcode();
1965 // Clone shared simple arguments to uncommon calls, item (1).
1966 if( n->outcnt() > 1 &&
1967 !n->is_Proj() &&
1968 nop != Op_CreateEx &&
1969 nop != Op_CheckCastPP &&
1970 nop != Op_DecodeN &&
1971 !n->is_Mem() ) {
1972 Node *x = n->clone();
1973 call->set_req( TypeFunc::Parms, x );
1974 }
1975 }
1976 break;
1977 }
1978
1979 case Op_StoreD:
1980 case Op_LoadD:
1981 case Op_LoadD_unaligned:
1982 fpu.inc_double_count();
1983 goto handle_mem;
1984 case Op_StoreF:
1985 case Op_LoadF:
1986 fpu.inc_float_count();
1987 goto handle_mem;
1988
1989 case Op_StoreB:
1990 case Op_StoreC:
2059 if (nn != NULL) {
2060 // Decode a narrow oop to match address
2061 // [R12 + narrow_oop_reg<<3 + offset]
2062 nn = new (C, 2) DecodeNNode(nn, t);
2063 n->set_req(AddPNode::Base, nn);
2064 n->set_req(AddPNode::Address, nn);
2065 if (addp->outcnt() == 0) {
2066 addp->disconnect_inputs(NULL);
2067 }
2068 }
2069 }
2070 }
2071 #endif
2072 break;
2073 }
2074
2075 #ifdef _LP64
2076 case Op_CmpP:
2077 // Do this transformation here to preserve CmpPNode::sub() and
2078 // other TypePtr related Ideal optimizations (for example, ptr nullness).
2079 if (n->in(1)->is_DecodeN() || n->in(2)->is_DecodeN()) {
2080 Node* in1 = n->in(1);
2081 Node* in2 = n->in(2);
2082 if (!in1->is_DecodeN()) {
2083 in2 = in1;
2084 in1 = n->in(2);
2085 }
2086 assert(in1->is_DecodeN(), "sanity");
2087
2088 Compile* C = Compile::current();
2089 Node* new_in2 = NULL;
2090 if (in2->is_DecodeN()) {
2091 new_in2 = in2->in(1);
2092 } else if (in2->Opcode() == Op_ConP) {
2093 const Type* t = in2->bottom_type();
2094 if (t == TypePtr::NULL_PTR) {
2095 if (Matcher::clone_shift_expressions) {
2096 // x86, ARM and friends can handle 2 adds in addressing mode.
2097 // Decode a narrow oop and do implicit NULL check in address
2098 // [R12 + narrow_oop_reg<<3 + offset]
2099 new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2100 } else {
2101 // Don't replace CmpP(o ,null) if 'o' is used in AddP
2102 // to generate implicit NULL check on Sparc where
2103 // narrow oops can't be used in address.
2104 uint i = 0;
2105 for (; i < in1->outcnt(); i++) {
2106 if (in1->raw_out(i)->is_AddP())
2107 break;
2108 }
2109 if (i >= in1->outcnt()) {
2110 new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
2111 }
2112 }
2113 } else if (t->isa_oopptr()) {
2114 new_in2 = ConNode::make(C, t->make_narrowoop());
2115 }
2116 }
2117 if (new_in2 != NULL) {
2118 Node* cmpN = new (C, 3) CmpNNode(in1->in(1), new_in2);
2119 n->subsume_by( cmpN );
2120 if (in1->outcnt() == 0) {
2121 in1->disconnect_inputs(NULL);
2122 }
2123 if (in2->outcnt() == 0) {
2124 in2->disconnect_inputs(NULL);
2125 }
2126 }
2127 }
2128 break;
2129 #endif
2130
2131 case Op_ModI:
2132 if (UseDivMod) {
2133 // Check if a%b and a/b both exist
2134 Node* d = n->find_similar(Op_DivI);
2135 if (d) {
2136 // Replace them with a fused divmod if supported
2137 Compile* C = Compile::current();
2138 if (Matcher::has_match_rule(Op_DivModI)) {
2139 DivModINode* divmod = DivModINode::make(C, n);
2140 d->subsume_by(divmod->div_proj());
2141 n->subsume_by(divmod->mod_proj());
2142 } else {
2143 // replace a%b with a-((a/b)*b)
2144 Node* mult = new (C, 3) MulINode(d, d->in(2));
2145 Node* sub = new (C, 3) SubINode(d->in(1), mult);
2146 n->subsume_by( sub );
2147 }
2211 PackNode* p = (PackNode*) n;
2212 Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
2213 n->subsume_by(btp);
2214 }
2215 break;
2216 default:
2217 assert( !n->is_Call(), "" );
2218 assert( !n->is_Mem(), "" );
2219 break;
2220 }
2221
2222 // Collect CFG split points
2223 if (n->is_MultiBranch())
2224 fpu._tests.push(n);
2225 }
2226
2227 //------------------------------final_graph_reshaping_walk---------------------
2228 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
2229 // requires that the walk visits a node's inputs before visiting the node.
2230 static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
2231 ResourceArea *area = Thread::current()->resource_area();
2232 Unique_Node_List sfpt(area);
2233
2234 fpu._visited.set(root->_idx); // first, mark node as visited
2235 uint cnt = root->req();
2236 Node *n = root;
2237 uint i = 0;
2238 while (true) {
2239 if (i < cnt) {
2240 // Place all non-visited non-null inputs onto stack
2241 Node* m = n->in(i);
2242 ++i;
2243 if (m != NULL && !fpu._visited.test_set(m->_idx)) {
2244 if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
2245 sfpt.push(m);
2246 cnt = m->req();
2247 nstack.push(n, i); // put on stack parent and next input's index
2248 n = m;
2249 i = 0;
2250 }
2251 } else {
2252 // Now do post-visit work
2253 final_graph_reshaping_impl( n, fpu );
2254 if (nstack.is_empty())
2255 break; // finished
2256 n = nstack.node(); // Get node from stack
2257 cnt = n->req();
2258 i = nstack.index();
2259 nstack.pop(); // Shift to the next node on stack
2260 }
2261 }
2262
2263 // Go over safepoints nodes to skip DecodeN nodes for debug edges.
2264 // It could be done for an uncommon traps or any safepoints/calls
2265 // if the DecodeN node is referenced only in a debug info.
2266 while (sfpt.size() > 0) {
2267 n = sfpt.pop();
2268 JVMState *jvms = n->as_SafePoint()->jvms();
2269 assert(jvms != NULL, "sanity");
2270 int start = jvms->debug_start();
2271 int end = n->req();
2272 bool is_uncommon = (n->is_CallStaticJava() &&
2273 n->as_CallStaticJava()->uncommon_trap_request() != 0);
2274 for (int j = start; j < end; j++) {
2275 Node* in = n->in(j);
2276 if (in->is_DecodeN()) {
2277 bool safe_to_skip = true;
2278 if (!is_uncommon ) {
2279 // Is it safe to skip?
2280 for (uint i = 0; i < in->outcnt(); i++) {
2281 Node* u = in->raw_out(i);
2282 if (!u->is_SafePoint() ||
2283 u->is_Call() && u->as_Call()->has_non_debug_use(n)) {
2284 safe_to_skip = false;
2285 }
2286 }
2287 }
2288 if (safe_to_skip) {
2289 n->set_req(j, in->in(1));
2290 }
2291 if (in->outcnt() == 0) {
2292 in->disconnect_inputs(NULL);
2293 }
2294 }
2295 }
2296 }
2297 }
2298
2299 //------------------------------final_graph_reshaping--------------------------
2300 // Final Graph Reshaping.
2301 //
2302 // (1) Clone simple inputs to uncommon calls, so they can be scheduled late
2303 // and not commoned up and forced early. Must come after regular
2304 // optimizations to avoid GVN undoing the cloning. Clone constant
2305 // inputs to Loop Phis; these will be split by the allocator anyways.
2306 // Remove Opaque nodes.
2307 // (2) Move last-uses by commutative operations to the left input to encourage
2308 // Intel update-in-place two-address operations and better register usage
2309 // on RISCs. Must come after regular optimizations to avoid GVN Ideal
2310 // calls canonicalizing them back.
2311 // (3) Count the number of double-precision FP ops, single-precision FP ops
2312 // and call sites. On Intel, we can get correct rounding either by
2313 // forcing singles to memory (requires extra stores and loads after each
2314 // FP bytecode) or we can set a rounding mode bit (requires setting and
2315 // clearing the mode bit around call sites). The mode bit is only used
2316 // if the relative frequency of single FP ops to calls is low enough.
|