queue_ack:
        spin_lock_irqsave(&qp->s_lock, flags);
+       dev->n_rc_qacks++;
        qp->s_flags |= IPATH_S_ACK_PENDING;
        qp->s_nak_state = qp->r_nak_state;
        qp->s_ack_psn = qp->r_ack_psn;
 
 static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
 {
-       if (qp->s_wait_credit) {
-               qp->s_wait_credit = 0;
-               tasklet_hi_schedule(&qp->s_task);
+       if (qp->s_last_psn != psn) {
+               qp->s_last_psn = psn;
+               if (qp->s_wait_credit) {
+                       qp->s_wait_credit = 0;
+                       tasklet_hi_schedule(&qp->s_task);
+               }
        }
-       qp->s_last_psn = psn;
 }
 
 /**
        case OP(SEND_FIRST):
                if (!ipath_get_rwqe(qp, 0)) {
                rnr_nak:
-                       /*
-                        * A RNR NAK will ACK earlier sends and RDMA writes.
-                        * Don't queue the NAK if a RDMA read or atomic
-                        * is pending though.
-                        */
-                       if (qp->r_nak_state)
-                               goto done;
                        qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
                        qp->r_ack_psn = qp->r_psn;
                        goto send_ack;
 
                while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
                        qp->s_rnr_timeout -= nqp->s_rnr_timeout;
                        l = l->next;
-                       if (l->next == &dev->rnrwait)
+                       if (l->next == &dev->rnrwait) {
+                               nqp = NULL;
                                break;
+                       }
                        nqp = list_entry(l->next, struct ipath_qp,
                                         timerwait);
                }
+               if (nqp)
+                       nqp->s_rnr_timeout -= qp->s_rnr_timeout;
                list_add(&qp->timerwait, l);
        }
        spin_unlock_irqrestore(&dev->pending_lock, flags);