[PATCH-RESEND 06/23] staging/lustre: Revert "LU-2139 osc: Track and limit "unstable" pages"

Mon Jun 3 13:40:43 UTC 2013

From: Oleg Drokin <green at whamcloud.com>

This seems to be causing multiple issues: LU-3274, LU-3277

[The original commit is folded in the large Lusre patch. So we don't
have an exact commit to revert for kernel client -- Peng Tao]
Signed-off-by: Peng Tao <tao.peng at emc.com>
Signed-off-by: Andreas Dilger <andreas.dilger at intel.com>
---
 drivers/staging/lustre/lustre/include/lclient.h    |    8 +-
 drivers/staging/lustre/lustre/include/lustre_net.h |    4 +-
 drivers/staging/lustre/lustre/include/obd.h        |    2 +-
 .../staging/lustre/lustre/include/obd_support.h    |    1 -
 .../staging/lustre/lustre/llite/llite_internal.h   |    4 -
 drivers/staging/lustre/lustre/llite/llite_lib.c    |   20 +----
 drivers/staging/lustre/lustre/llite/lproc_llite.c  |   18 ----
 drivers/staging/lustre/lustre/lov/lov_obd.c        |    2 +-
 drivers/staging/lustre/lustre/obdclass/class_obd.c |    2 -
 drivers/staging/lustre/lustre/osc/osc_cache.c      |   94 ++------------------
 drivers/staging/lustre/lustre/osc/osc_internal.h   |    2 -
 drivers/staging/lustre/lustre/osc/osc_request.c    |   23 +----
 12 files changed, 14 insertions(+), 166 deletions(-)

diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
index d00600c..9d4011f 100644
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ b/drivers/staging/lustre/lustre/include/lclient.h
@@ -422,10 +422,8 @@ struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
 void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
 
 /**
- * Data structure managing a client's cached pages. A count of
- * "unstable" pages is maintained, and an LRU of clean pages is
- * maintained. "unstable" pages are pages pinned by the ptlrpc
- * layer for recovery purposes.
+ * Data structure managing a client's cached clean pages. An LRU of
+ * pages is maintained, along with other statistics.
  */
 struct cl_client_cache {
 	atomic_t	ccc_users;    /* # of users (OSCs) of this data */
@@ -434,8 +432,6 @@ struct cl_client_cache {
 	atomic_t	ccc_lru_left; /* # of LRU entries available */
 	unsigned long	ccc_lru_max;  /* Max # of LRU entries possible */
 	unsigned int	ccc_lru_shrinkers; /* # of threads reclaiming */
-	atomic_t	ccc_unstable_nr;    /* # of unstable pages pinned */
-	wait_queue_head_t	ccc_unstable_waitq; /* Signaled on BRW commit */
 };
 
 #endif /*LCLIENT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index 874412e..293dd90 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -1838,9 +1838,7 @@ struct ptlrpc_request {
 		rq_no_retry_einprogress:1,
 		/* allow the req to be sent if the import is in recovery
 		 * status */
-		rq_allow_replay:1,
-		/* bulk request, sent to server, but uncommitted */
-		rq_unstable:1;
+		rq_allow_replay:1;
 
 	unsigned int rq_nr_resend;
 
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index 98fdb32..d2923b1 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -678,7 +678,7 @@ struct lov_obd {
 	proc_dir_entry_t   *lov_pool_proc_entry;
 	enum lustre_sec_part    lov_sp_me;
 
-	/* Cached LRU and unstable data from upper layer */
+	/* Cached LRU pages from upper layer */
 	void		       *lov_cache;
 
 	struct rw_semaphore     lov_notify_lock;
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index 5f2b4e8..ee70867 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -67,7 +67,6 @@ extern int at_early_margin;
 extern int at_extra;
 extern unsigned int obd_sync_filter;
 extern unsigned int obd_max_dirty_pages;
-extern atomic_t obd_unstable_pages;
 extern atomic_t obd_dirty_pages;
 extern atomic_t obd_dirty_transit_pages;
 extern unsigned int obd_alloc_fail_rate;
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index d8e43bb..e972a82 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -474,10 +474,6 @@ struct ll_sb_info {
 
 	struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 
-	/* Used to track "unstable" pages on a client, and maintain a
-	 * LRU list of clean pages. An "unstable" page is defined as
-	 * any page which is sent to a server as part of a bulk request,
-	 * but is uncommitted to stable storage. */
 	struct cl_client_cache    ll_cache;
 
 	struct lprocfs_stats     *ll_ra_stats;
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 2a4a87d..dbcb7bd 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -99,16 +99,13 @@ static struct ll_sb_info *ll_init_sbi(void)
 		lru_page_max = (pages / 4) * 3;
 	}
 
-	/* initialize ll_cache data */
+	/* initialize lru data */
 	atomic_set(&sbi->ll_cache.ccc_users, 0);
 	sbi->ll_cache.ccc_lru_max = lru_page_max;
 	atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
 	spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
 	INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
 
-	atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0);
-	init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq);
-
 	sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
 					   SBI_DEFAULT_READAHEAD_MAX);
 	sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
@@ -1074,7 +1071,7 @@ void ll_put_super(struct super_block *sb)
 	struct lustre_sb_info *lsi = s2lsi(sb);
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 	char *profilenm = get_profile_name(sb);
-	int ccc_count, next, force = 1, rc = 0;
+	int next, force = 1;
 	ENTRY;
 
 	CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
@@ -1090,19 +1087,6 @@ void ll_put_super(struct super_block *sb)
 			force = obd->obd_force;
 	}
 
-	/* Wait for unstable pages to be committed to stable storage */
-	if (force == 0) {
-		struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
-		rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq,
-			atomic_read(&sbi->ll_cache.ccc_unstable_nr) == 0,
-			&lwi);
-	}
-
-	ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr);
-	if (force == 0 && rc != -EINTR)
-		LASSERTF(ccc_count == 0, "count: %i\n", ccc_count);
-
-
 	/* We need to set force before the lov_disconnect in
 	   lustre_common_put_super, since l_d cleans up osc's as well. */
 	if (force) {
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index f71b15b..6a82505 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -725,23 +725,6 @@ static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
 }
 LPROC_SEQ_FOPS_RO(ll_sbi_flags);
 
-static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
-{
-	struct super_block	*sb    = m->private;
-	struct ll_sb_info	*sbi   = ll_s2sbi(sb);
-	struct cl_client_cache	*cache = &sbi->ll_cache;
-	int pages, mb, rc;
-
-	pages = atomic_read(&cache->ccc_unstable_nr);
-	mb    = (pages * PAGE_CACHE_SIZE) >> 20;
-
-	rc = seq_printf(m, "unstable_pages: %8d\n"
-			   "unstable_mb:    %8d\n", pages, mb);
-
-	return rc;
-}
-LPROC_SEQ_FOPS_RO(ll_unstable_stats);
-
 static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
 	{ "uuid",	  &ll_sb_uuid_fops,	  0, 0 },
 	//{ "mntpt_path",   ll_rd_path,	     0, 0 },
@@ -770,7 +753,6 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
 	{ "lazystatfs",       &ll_lazystatfs_fops, 0 },
 	{ "max_easize",       &ll_maxea_size_fops, 0, 0 },
 	{ "sbi_flags",	      &ll_sbi_flags_fops, 0, 0 },
-	{ "unstable_stats",   &ll_unstable_stats_fops, 0, 0},
 	{ 0 }
 };
 
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index e5369b1..ef7ff09 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -57,7 +57,7 @@
 #include <lprocfs_status.h>
 #include <lustre_param.h>
 #include <cl_object.h>
-#include <lclient.h>
+#include <lclient.h> /* for cl_client_lru */
 #include <lustre/ll_fiemap.h>
 #include <lustre_log.h>
 #include <lustre_fid.h>
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index 20d9eaf..af1c2d0 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -68,8 +68,6 @@ unsigned int obd_dump_on_eviction;
 EXPORT_SYMBOL(obd_dump_on_eviction);
 unsigned int obd_max_dirty_pages = 256;
 EXPORT_SYMBOL(obd_max_dirty_pages);
-atomic_t obd_unstable_pages;
-EXPORT_SYMBOL(obd_unstable_pages);
 atomic_t obd_dirty_pages;
 EXPORT_SYMBOL(obd_dirty_pages);
 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index 2df2810..54770f9 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -1310,12 +1310,10 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
 #define OSC_DUMP_GRANT(cli, fmt, args...) do {				      \
 	struct client_obd *__tmp = (cli);				      \
 	CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d "	      \
-	       "unstable_pages: %d/%d dropped: %ld avail: %ld, "	      \
-	       "reserved: %ld, flight: %d } " fmt,			      \
+	       "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt,   \
 	       __tmp->cl_import->imp_obd->obd_name,			      \
 	       __tmp->cl_dirty, __tmp->cl_dirty_max,			      \
 	       atomic_read(&obd_dirty_pages), obd_max_dirty_pages,	      \
-	       atomic_read(&obd_unstable_pages), obd_max_dirty_pages,     \
 	       __tmp->cl_lost_grant, __tmp->cl_avail_grant,		      \
 	       __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args);      \
 } while (0)
@@ -1465,8 +1463,7 @@ static int osc_enter_cache_try(struct client_obd *cli,
 		return 0;
 
 	if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
-	    atomic_read(&obd_unstable_pages) + 1 +
-	    atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
+	    atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
 		osc_consume_write_grant(cli, &oap->oap_brw_page);
 		if (transient) {
 			cli->cl_dirty_transit += PAGE_CACHE_SIZE;
@@ -1579,9 +1576,9 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 
 		ocw->ocw_rc = -EDQUOT;
 		/* we can't dirty more */
-		if (cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max ||
-		    atomic_read(&obd_unstable_pages) + 1 +
-		    atomic_read(&obd_dirty_pages) > obd_max_dirty_pages) {
+		if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) ||
+		    (atomic_read(&obd_dirty_pages) + 1 >
+		     obd_max_dirty_pages)) {
 			CDEBUG(D_CACHE, "no dirty room: dirty: %ld "
 			       "osc max %ld, sys max %d\n", cli->cl_dirty,
 			       cli->cl_dirty_max, obd_max_dirty_pages);
@@ -1749,84 +1746,6 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
 		ar->ar_force_sync = 0;
 }
 
-/* Performs "unstable" page accounting. This function balances the
- * increment operations performed in osc_inc_unstable_pages. It is
- * registered as the RPC request callback, and is executed when the
- * bulk RPC is committed on the server. Thus at this point, the pages
- * involved in the bulk transfer are no longer considered unstable. */
-void osc_dec_unstable_pages(struct ptlrpc_request *req)
-{
-	struct ptlrpc_bulk_desc *desc       = req->rq_bulk;
-	struct client_obd       *cli	= &req->rq_import->imp_obd->u.cli;
-	obd_count		page_count = desc->bd_iov_count;
-	int i;
-
-	/* No unstable page tracking */
-	if (cli->cl_cache == NULL)
-		return;
-
-	LASSERT(page_count >= 0);
-
-	for (i = 0; i < page_count; i++)
-		dec_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
-
-	atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
-	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
-
-	atomic_sub(page_count, &obd_unstable_pages);
-	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
-
-	spin_lock(&req->rq_lock);
-	req->rq_committed = 1;
-	req->rq_unstable  = 0;
-	spin_unlock(&req->rq_lock);
-
-	wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
-}
-
-/* "unstable" page accounting. See: osc_dec_unstable_pages. */
-void osc_inc_unstable_pages(struct ptlrpc_request *req)
-{
-	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
-	struct client_obd       *cli  = &req->rq_import->imp_obd->u.cli;
-	obd_count		page_count = desc->bd_iov_count;
-	int i;
-
-	/* No unstable page tracking */
-	if (cli->cl_cache == NULL)
-		return;
-
-	LASSERT(page_count >= 0);
-
-	for (i = 0; i < page_count; i++)
-		inc_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
-
-	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
-	atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
-
-	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
-	atomic_add(page_count, &obd_unstable_pages);
-
-	spin_lock(&req->rq_lock);
-
-	/* If the request has already been committed (i.e. brw_commit
-	 * called via rq_commit_cb), we need to undo the unstable page
-	 * increments we just performed because rq_commit_cb wont be
-	 * called again. Otherwise, just set the commit callback so the
-	 * unstable page accounting is properly updated when the request
-	 * is committed */
-	if (req->rq_committed) {
-		/* Drop lock before calling osc_dec_unstable_pages */
-		spin_unlock(&req->rq_lock);
-		osc_dec_unstable_pages(req);
-		spin_lock(&req->rq_lock);
-	} else {
-		req->rq_unstable  = 1;
-		req->rq_commit_cb = osc_dec_unstable_pages;
-	}
-
-	spin_unlock(&req->rq_lock);
-}
 
 /* this must be called holding the loi list lock to give coverage to exit_cache,
  * async_flag maintenance, and oap_request */
@@ -1839,9 +1758,6 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
 
 	ENTRY;
 	if (oap->oap_request != NULL) {
-		if (rc == 0)
-			osc_inc_unstable_pages(oap->oap_request);
-
 		xid = ptlrpc_req_xid(oap->oap_request);
 		ptlrpc_req_finished(oap->oap_request);
 		oap->oap_request = NULL;
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index 5343da2..efc5db4 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -205,6 +205,4 @@ int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
 		   struct obd_quotactl *oqctl);
 int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
 
-void osc_inc_unstable_pages(struct ptlrpc_request *req);
-void osc_dec_unstable_pages(struct ptlrpc_request *req);
 #endif /* OSC_INTERNAL_H */
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 3062e47..d2811d4 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -826,16 +826,13 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 		CERROR("dirty %lu - %lu > dirty_max %lu\n",
 		       cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
 		oa->o_undirty = 0;
-	} else if (unlikely(atomic_read(&obd_unstable_pages) +
-			    atomic_read(&obd_dirty_pages) -
+	} else if (unlikely(atomic_read(&obd_dirty_pages) -
 			    atomic_read(&obd_dirty_transit_pages) >
 			    (long)(obd_max_dirty_pages + 1))) {
 		/* The atomic_read() allowing the atomic_inc() are
 		 * not covered by a lock thus they may safely race and trip
 		 * this CERROR() unless we add in a small fudge factor (+1). */
-		CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
-		       cli->cl_import->imp_obd->obd_name,
-		       atomic_read(&obd_unstable_pages),
+		CERROR("dirty %d - %d > system dirty_max %d\n",
 		       atomic_read(&obd_dirty_pages),
 		       atomic_read(&obd_dirty_transit_pages),
 		       obd_max_dirty_pages);
@@ -1743,7 +1740,6 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
 	aa->aa_resends++;
 	new_req->rq_interpret_reply = request->rq_interpret_reply;
 	new_req->rq_async_args = request->rq_async_args;
-	new_req->rq_commit_cb = request->rq_commit_cb;
 	/* cap resend delay to the current request timeout, this is similar to
 	 * what ptlrpc does (see after_reply()) */
 	if (aa->aa_resends > new_req->rq_timeout)
@@ -2037,20 +2033,6 @@ static int brw_interpret(const struct lu_env *env,
 	RETURN(rc);
 }
 
-static void brw_commit(struct ptlrpc_request *req)
-{
-	spin_lock(&req->rq_lock);
-	/* If osc_inc_unstable_pages (via osc_extent_finish) races with
-	 * this called via the rq_commit_cb, I need to ensure
-	 * osc_dec_unstable_pages is still called. Otherwise unstable
-	 * pages may be leaked. */
-	if (req->rq_unstable)
-		osc_dec_unstable_pages(req);
-	else
-		req->rq_committed = 1;
-	spin_unlock(&req->rq_lock);
-}
-
 /**
  * Build an RPC by the list of extent @ext_list. The caller must ensure
  * that the total pages in this list are NOT over max pages per RPC.
@@ -2156,7 +2138,6 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		GOTO(out, rc);
 	}
 
-	req->rq_commit_cb = brw_commit;
 	req->rq_interpret_reply = brw_interpret;
 
 	if (mem_tight != 0)
-- 
1.7.9.5