I can't come up with a satisfying reason why we still need the memory
segment list. We used to represent in the list:
- boot memory
- standby memory added via add_memory()
- loaded dcss segments
When loading/unloading dcss segments, we already track them in a
separate list and check for overlaps
(arch/s390/mm/extmem.c:segment_overlaps_others()) when loading segments.
The overlap check was introduced for some segments in
commit b2300b9efe1b ("[S390] dcssblk: add >2G DCSSs support and stacked
contiguous DCSSs support.")
and was extended to cover all dcss segments in
commit ca57114609d1 ("s390/extmem: remove code for 31 bit addressing
mode").
Although I doubt that overlaps with boot memory and standby memory
are relevant, let's reshuffle the checks in load_segment() to request
the resource first. This will bail out in case we have overlaps with
other resources (esp. boot memory and standby memory). The order
is now different compared to segment_unload() and segment_unload(), but
that should not matter.
This smells like a leftover from ancient times, let's get rid of it. We
can now convert vmem_remove_mapping() into a void function - everybody
ignored the return value already.
Cc: Heiko Carstens <[email protected]>
Cc: Vasily Gorbik <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Gerald Schaefer <[email protected]>
Signed-off-by: David Hildenbrand <[email protected]>
---
arch/s390/include/asm/pgtable.h | 2 +-
arch/s390/mm/extmem.c | 25 +++----
arch/s390/mm/vmem.c | 115 ++------------------------------
3 files changed, 21 insertions(+), 121 deletions(-)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 19d603bd1f36e..7eb01a5459cdf 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1669,7 +1669,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define kern_addr_valid(addr) (1)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
-extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 9e0aa7aa03ba4..105c09282f8c5 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -313,15 +313,10 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
goto out_free;
}
- rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
-
- if (rc)
- goto out_free;
-
seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
if (seg->res == NULL) {
rc = -ENOMEM;
- goto out_shared;
+ goto out_free;
}
seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
seg->res->start = seg->start_addr;
@@ -335,12 +330,17 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
if (rc == SEG_TYPE_SC ||
((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
seg->res->flags |= IORESOURCE_READONLY;
+
+ /* Check for overlapping resources before adding the mapping. */
if (request_resource(&iomem_resource, seg->res)) {
rc = -EBUSY;
- kfree(seg->res);
- goto out_shared;
+ goto out_free_resource;
}
+ rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ if (rc)
+ goto out_resource;
+
if (do_nonshared)
diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
&start_addr, &end_addr);
@@ -351,14 +351,14 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
rc = diag_cc;
- goto out_resource;
+ goto out_mapping;
}
if (diag_cc > 1) {
pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
- goto out_resource;
+ goto out_mapping;
}
seg->start_addr = start_addr;
seg->end = end_addr;
@@ -377,11 +377,12 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
(void*) seg->end, segtype_string[seg->vm_segtype]);
}
goto out;
+ out_mapping:
+ vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
out_resource:
release_resource(seg->res);
+ out_free_resource:
kfree(seg->res);
- out_shared:
- vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
out_free:
kfree(seg);
out:
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8b6282cf7d139..3b9e71654c37b 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -20,14 +20,6 @@
static DEFINE_MUTEX(vmem_mutex);
-struct memory_segment {
- struct list_head list;
- unsigned long start;
- unsigned long size;
-};
-
-static LIST_HEAD(mem_segs);
-
static void __ref *vmem_alloc_pages(unsigned int order)
{
unsigned long size = PAGE_SIZE << order;
@@ -300,94 +292,25 @@ void vmemmap_free(unsigned long start, unsigned long end,
{
}
-/*
- * Add memory segment to the segment list if it doesn't overlap with
- * an already present segment.
- */
-static int insert_memory_segment(struct memory_segment *seg)
-{
- struct memory_segment *tmp;
-
- if (seg->start + seg->size > VMEM_MAX_PHYS ||
- seg->start + seg->size < seg->start)
- return -ERANGE;
-
- list_for_each_entry(tmp, &mem_segs, list) {
- if (seg->start >= tmp->start + tmp->size)
- continue;
- if (seg->start + seg->size <= tmp->start)
- continue;
- return -ENOSPC;
- }
- list_add(&seg->list, &mem_segs);
- return 0;
-}
-
-/*
- * Remove memory segment from the segment list.
- */
-static void remove_memory_segment(struct memory_segment *seg)
-{
- list_del(&seg->list);
-}
-
-static void __remove_shared_memory(struct memory_segment *seg)
+void vmem_remove_mapping(unsigned long start, unsigned long size)
{
- remove_memory_segment(seg);
- vmem_remove_range(seg->start, seg->size);
-}
-
-int vmem_remove_mapping(unsigned long start, unsigned long size)
-{
- struct memory_segment *seg;
- int ret;
-
mutex_lock(&vmem_mutex);
-
- ret = -ENOENT;
- list_for_each_entry(seg, &mem_segs, list) {
- if (seg->start == start && seg->size == size)
- break;
- }
-
- if (seg->start != start || seg->size != size)
- goto out;
-
- ret = 0;
- __remove_shared_memory(seg);
- kfree(seg);
-out:
+ vmem_remove_range(start, size);
mutex_unlock(&vmem_mutex);
- return ret;
}
int vmem_add_mapping(unsigned long start, unsigned long size)
{
- struct memory_segment *seg;
int ret;
- mutex_lock(&vmem_mutex);
- ret = -ENOMEM;
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
- if (!seg)
- goto out;
- seg->start = start;
- seg->size = size;
-
- ret = insert_memory_segment(seg);
- if (ret)
- goto out_free;
+ if (start + size > VMEM_MAX_PHYS ||
+ start + size < start)
+ return -ERANGE;
+ mutex_lock(&vmem_mutex);
ret = vmem_add_mem(start, size);
if (ret)
- goto out_remove;
- goto out;
-
-out_remove:
- __remove_shared_memory(seg);
-out_free:
- kfree(seg);
-out:
+ vmem_remove_range(start, size);
mutex_unlock(&vmem_mutex);
return ret;
}
@@ -421,27 +344,3 @@ void __init vmem_map_init(void)
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
-
-/*
- * Convert memblock.memory to a memory segment list so there is a single
- * list that contains all memory segments.
- */
-static int __init vmem_convert_memory_chunk(void)
-{
- struct memblock_region *reg;
- struct memory_segment *seg;
-
- mutex_lock(&vmem_mutex);
- for_each_memblock(memory, reg) {
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
- if (!seg)
- panic("Out of memory...\n");
- seg->start = reg->base;
- seg->size = reg->size;
- insert_memory_segment(seg);
- }
- mutex_unlock(&vmem_mutex);
- return 0;
-}
-
-core_initcall(vmem_convert_memory_chunk);
--
2.26.2
On Thu, Jun 25, 2020 at 05:00:29PM +0200, David Hildenbrand wrote:
> This smells like a leftover from ancient times, let's get rid of it. We
> can now convert vmem_remove_mapping() into a void function - everybody
> ignored the return value already.
This buys us what? Except that we get rid of a bit of code?
> Am 25.06.2020 um 21:38 schrieb Heiko Carstens <[email protected]>:
>
> On Thu, Jun 25, 2020 at 05:00:29PM +0200, David Hildenbrand wrote:
>> This smells like a leftover from ancient times, let's get rid of it. We
>> can now convert vmem_remove_mapping() into a void function - everybody
>> ignored the return value already.
>
> This buys us what? Except that we get rid of a bit of code?
I‘m looking into virtio-mem support for s390x, including vmemmap/vmem optimizations. Virtio-mem adds/removes memory in memory block granularity, which results in one list entry for essentially each memory section. That seems to be easy to avoid.
Thanks
On Thu, 25 Jun 2020 17:00:29 +0200
David Hildenbrand <[email protected]> wrote:
> I can't come up with a satisfying reason why we still need the memory
> segment list. We used to represent in the list:
> - boot memory
> - standby memory added via add_memory()
> - loaded dcss segments
>
> When loading/unloading dcss segments, we already track them in a
> separate list and check for overlaps
> (arch/s390/mm/extmem.c:segment_overlaps_others()) when loading segments.
>
> The overlap check was introduced for some segments in
> commit b2300b9efe1b ("[S390] dcssblk: add >2G DCSSs support and stacked
> contiguous DCSSs support.")
> and was extended to cover all dcss segments in
> commit ca57114609d1 ("s390/extmem: remove code for 31 bit addressing
> mode").
>
> Although I doubt that overlaps with boot memory and standby memory
> are relevant, let's reshuffle the checks in load_segment() to request
> the resource first. This will bail out in case we have overlaps with
> other resources (esp. boot memory and standby memory). The order
> is now different compared to segment_unload() and segment_unload(), but
> that should not matter.
You are right that this is ancient, but I think "overlaps with boot
memory and standby memory" were very relevant, that might actually
have been the initial reason for this in ancient times (but I do not
really remember).
With DCSS you can define a fixed start address where the segment will
be loaded into guest address space. The current code queries this address
and directly gives it to vmem_add_mapping(), at least if there is no
overlap with other DCSS segments. If there would be an overlap with
boot memory, and not checked / rejected in vmem_add_mapping(), the
following dcss_diag() would probably fail because AFAIR z/VM will
not allow loading a DCSS with guest memory overlap. So far, so good,
but the vmem_remove_mapping() on the exit path would then remove
(part of) boot memory.
That being said, I think your patch prevents this by moving
request_resource() up, so we should not call vmem_add_mapping()
for such overlaps. I still want to give it some test, but need
to find / setup systems with that ancient technology first :-)
One change introduced by this patch is that we no longer
see -ENOSPC and the corresponding error message from extmem code:
"DCSS %s overlaps with used storage and cannot be loaded"
Instead, now we would see -EBUSY and this message:
"%s needs used memory resources and cannot be loaded or queried"
That should be OK, as it is also the same message that we already
see for overlaps with other DCSSs. But you probably also should
remove that case from the segment_warning() function for
completeness.
Regards,
Gerald
On Fri, 26 Jun 2020 19:22:53 +0200
Gerald Schaefer <[email protected]> wrote:
[...]
>
> That should be OK, as it is also the same message that we already
> see for overlaps with other DCSSs. But you probably also should
> remove that case from the segment_warning() function for
> completeness.
... and also from the comment of segment_load()
On Fri, 26 Jun 2020 19:22:53 +0200
Gerald Schaefer <[email protected]> wrote:
> On Thu, 25 Jun 2020 17:00:29 +0200
> David Hildenbrand <[email protected]> wrote:
>
> > I can't come up with a satisfying reason why we still need the memory
> > segment list. We used to represent in the list:
> > - boot memory
> > - standby memory added via add_memory()
> > - loaded dcss segments
> >
> > When loading/unloading dcss segments, we already track them in a
> > separate list and check for overlaps
> > (arch/s390/mm/extmem.c:segment_overlaps_others()) when loading segments.
> >
> > The overlap check was introduced for some segments in
> > commit b2300b9efe1b ("[S390] dcssblk: add >2G DCSSs support and stacked
> > contiguous DCSSs support.")
> > and was extended to cover all dcss segments in
> > commit ca57114609d1 ("s390/extmem: remove code for 31 bit addressing
> > mode").
> >
> > Although I doubt that overlaps with boot memory and standby memory
> > are relevant, let's reshuffle the checks in load_segment() to request
> > the resource first. This will bail out in case we have overlaps with
> > other resources (esp. boot memory and standby memory). The order
> > is now different compared to segment_unload() and segment_unload(), but
> > that should not matter.
>
> You are right that this is ancient, but I think "overlaps with boot
> memory and standby memory" were very relevant, that might actually
> have been the initial reason for this in ancient times (but I do not
> really remember).
>
> With DCSS you can define a fixed start address where the segment will
> be loaded into guest address space. The current code queries this address
> and directly gives it to vmem_add_mapping(), at least if there is no
> overlap with other DCSS segments. If there would be an overlap with
> boot memory, and not checked / rejected in vmem_add_mapping(), the
> following dcss_diag() would probably fail because AFAIR z/VM will
> not allow loading a DCSS with guest memory overlap. So far, so good,
> but the vmem_remove_mapping() on the exit path would then remove
> (part of) boot memory.
>
> That being said, I think your patch prevents this by moving
> request_resource() up, so we should not call vmem_add_mapping()
> for such overlaps. I still want to give it some test, but need
> to find / setup systems with that ancient technology first :-)
>
Verified with DCSS overlapping boot and standby memory, works fine.
As expected, the error message changes, but I don't think that is a
problem, as long as you also remove the old -ENOSPC case / comment
in arch/s390/mm/extmem.c. It is actually more correct now I guess,
-ENOSPC doesn't look like the correct return value anyway.
Thanks for cleaning up! Looks good to me, and removes > 100 LOC,
unless Heiko remembers some other issues from ancient times.
Reviewed-by: Gerald Schaefer <[email protected]>
Tested-by: Gerald Schaefer <[email protected]> [DCSS]
On Mon, Jun 29, 2020 at 02:01:22PM +0200, David Hildenbrand wrote:
> On 29.06.20 13:55, Heiko Carstens wrote:
> > On Fri, Jun 26, 2020 at 08:46:21PM +0200, Gerald Schaefer wrote:
> >> Verified with DCSS overlapping boot and standby memory, works fine.
> >> As expected, the error message changes, but I don't think that is a
> >> problem, as long as you also remove the old -ENOSPC case / comment
> >> in arch/s390/mm/extmem.c. It is actually more correct now I guess,
> >> -ENOSPC doesn't look like the correct return value anyway.
> >>
> >> Thanks for cleaning up! Looks good to me, and removes > 100 LOC,
> >> unless Heiko remembers some other issues from ancient times.
> >>
> >> Reviewed-by: Gerald Schaefer <[email protected]>
> >> Tested-by: Gerald Schaefer <[email protected]> [DCSS]
> >
> > Looks good to me too. Gerald, thanks for looking and verifying this,
> > and David, thanks for providing the patch.
> >
> > Applied.
> >
>
> Thanks Gerald and Heiko! Should I send an addon patch to tweak the
> documentation or resend this patch?
Please send an addon patch. I will merge it.
On 29.06.20 13:55, Heiko Carstens wrote:
> On Fri, Jun 26, 2020 at 08:46:21PM +0200, Gerald Schaefer wrote:
>> Verified with DCSS overlapping boot and standby memory, works fine.
>> As expected, the error message changes, but I don't think that is a
>> problem, as long as you also remove the old -ENOSPC case / comment
>> in arch/s390/mm/extmem.c. It is actually more correct now I guess,
>> -ENOSPC doesn't look like the correct return value anyway.
>>
>> Thanks for cleaning up! Looks good to me, and removes > 100 LOC,
>> unless Heiko remembers some other issues from ancient times.
>>
>> Reviewed-by: Gerald Schaefer <[email protected]>
>> Tested-by: Gerald Schaefer <[email protected]> [DCSS]
>
> Looks good to me too. Gerald, thanks for looking and verifying this,
> and David, thanks for providing the patch.
>
> Applied.
>
Thanks Gerald and Heiko! Should I send an addon patch to tweak the
documentation or resend this patch?
--
Thanks,
David / dhildenb
On Fri, Jun 26, 2020 at 08:46:21PM +0200, Gerald Schaefer wrote:
> Verified with DCSS overlapping boot and standby memory, works fine.
> As expected, the error message changes, but I don't think that is a
> problem, as long as you also remove the old -ENOSPC case / comment
> in arch/s390/mm/extmem.c. It is actually more correct now I guess,
> -ENOSPC doesn't look like the correct return value anyway.
>
> Thanks for cleaning up! Looks good to me, and removes > 100 LOC,
> unless Heiko remembers some other issues from ancient times.
>
> Reviewed-by: Gerald Schaefer <[email protected]>
> Tested-by: Gerald Schaefer <[email protected]> [DCSS]
Looks good to me too. Gerald, thanks for looking and verifying this,
and David, thanks for providing the patch.
Applied.
segment_load() will no longer return -ENOSPC. If a segment overlaps with
storage, we now also return -EBUSY. Remove the stale comment from
__segment_load() and the stale handling from segment_warning().
Cc: Heiko Carstens <[email protected]>
Cc: Vasily Gorbik <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Gerald Schaefer <[email protected]>
Signed-off-by: David Hildenbrand <[email protected]>
---
arch/s390/mm/extmem.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 105c09282f8c5..5060956b8e7d6 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -401,8 +401,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
* -EIO : could not perform query or load diagnose
* -ENOENT : no such segment
* -EOPNOTSUPP: multi-part segment cannot be used with linux
- * -ENOSPC : segment cannot be used (overlaps with storage)
- * -EBUSY : segment can temporarily not be used (overlaps with dcss)
+ * -EBUSY : segment cannot be used (overlaps with dcss or storage)
* -ERANGE : segment cannot be used (exceeds kernel mapping range)
* -EPERM : segment is currently loaded with incompatible permissions
* -ENOMEM : out of memory
@@ -627,10 +626,6 @@ void segment_warning(int rc, char *seg_name)
pr_err("DCSS %s has multiple page ranges and cannot be "
"loaded or queried\n", seg_name);
break;
- case -ENOSPC:
- pr_err("DCSS %s overlaps with used storage and cannot "
- "be loaded\n", seg_name);
- break;
case -EBUSY:
pr_err("%s needs used memory resources and cannot be "
"loaded or queried\n", seg_name);
--
2.26.2
On Tue, Jun 30, 2020 at 10:42:40AM +0200, David Hildenbrand wrote:
> segment_load() will no longer return -ENOSPC. If a segment overlaps with
> storage, we now also return -EBUSY. Remove the stale comment from
> __segment_load() and the stale handling from segment_warning().
>
> Cc: Heiko Carstens <[email protected]>
> Cc: Vasily Gorbik <[email protected]>
> Cc: Christian Borntraeger <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Gerald Schaefer <[email protected]>
> Signed-off-by: David Hildenbrand <[email protected]>
> ---
> arch/s390/mm/extmem.c | 7 +------
> 1 file changed, 1 insertion(+), 6 deletions(-)
Applied, thanks!