1
CREATE TABLE t_log(f_time DATE, f_value int)
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-01', 100);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-02', 101);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-03', 102);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-04', 99);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-05', 105);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-06', 108);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-07', 105);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-08', 199);
insert into T_LOG (F_TIME, F_VALUE)values ('2016-10-09', 108);
with a as(
select f_group,lead(f_group,1,'9999-01-01') over (order by f_group) as f_grouptwo from
(select F_TIME, F_VALUE,case when lag(f_value,1,0)over (order by f_time)<100 then f_time else null end f_group from t_log)t
where f_group is not null)
, b as(
select * from a
where datediff(dd,f_group,f_grouptwo)>=3)
select f_group,max(f_time) as maxtime
from b
inner join t_log c
on b.f_group<=c.f_time and b.f_grouptwo>c.f_time
where c.f_value>=100
group by f_group

现在的输出:
f_group maxtime
2016-10-01 2016-10-03
2016-10-05 2016-10-09

目标输出
2016-10-01
2016-10-02
2016-10-03
2016-10-05
2016-10-06
2016-10-07
2016-10-08
2016-10-09

chanqin 23
2017-08-06 提问
4 个回答
2

已采纳

思路1

某日期(*)要想入选,则其前后的日期(-)也需按如下情况入选:

  1. * - -

  2. - * -

  3. - - *

因此有(MSSQL):

with t as (select f_time from t_log where f_value > 100)
select * from t
where 
    (dateadd(day, 1, f_time) in (select * from t) and
    dateadd(day, 2, f_time) in (select * from t))
    or
    (dateadd(day, 1, f_time) in (select * from t) and
    dateadd(day, -1, f_time) in (select * from t))
    or
    (dateadd(day, -1, f_time) in (select * from t) and
    dateadd(day, -2, f_time) in (select * from t))
order by f_time

思路2

找出所有满足条件的(d, d+1, d+2)三元组,然后求union:

with
    log as (
        select * from t_log where f_value > 100),
    dates as (
        select g1.f_time date1, g2.f_time date2, g3.f_time date3 from
            log g1 inner join log g2 on dateadd(day, 1, g1.f_time) = g2.f_time
            inner join log g3 on dateadd(day, 1, g2.f_time) = g3.f_time)
select * from
    (select date1 from dates union select date2 from dates union select date3 from dates) foo
order by 1
2

这种需求实际上应该采用定期统计整理的方法,一条sql跑出来结果简直是灾难,以下sql只为实现你的需求,线上数据量较大时慎用!!

select aa.f_time,aa.f_value 
from (
    select a.*,datediff(f_time,b.fmin) dif,@rn:=@rn+1 rn
    from t_log a,
         (select min(f_time) fmin from t_log) b,
         (select @rn:=0) c 
    where f_value>=100) aa
where rn-dif in 
(select rn-dif dev 
 from (
    select a.*,datediff(f_time,b.fmin) dif,@rn:=@rn+1 rn
    from t_log a,
         (select min(f_time) fmin from t_log) b,
         (select @rn:=0) c 
    where f_value>=100) aa
 group by dev
 having count(1)>=3);
0

@clcx_1315 @萝卜 的方法都不错,很受启发。
我写个用窗口函数的版本,同 @萝卜 的第一种思路,只是更易懂一些;另外用窗口函数避免了表多次扫描,性能表现会好一些。

oracle sql的写法,nvl的函数可以改成mysql中的ifnull
第一句t_n的写法,原因是担心log表中日期不连续,用oracle专用的connect by构建了一个日期连续的结果集,如果log表能保证连续的话就不需要这一段了。

WITH t_n AS (
  SELECT 
    to_char(to_date(t1.min_f_time, 'yyyy-mm-dd') + (LEVEL - 1), 'yyyy-mm-dd') AS f_time_n, 0 AS f_value
  FROM dual,
    (SELECT MIN(f_time) AS min_f_time FROM t_log_test) t1,
    (SELECT MAX(f_time) AS max_f_time FROM t_log_test) t2
  CONNECT BY to_date(t1.min_f_time, 'yyyy-mm-dd') + (LEVEL - 1) <= to_date(t2.max_f_time, 'yyyy-mm-dd')
)
SELECT *
FROM (
  SELECT
    f_time2,
    LAG(f_value2, 2, 0) OVER(ORDER BY f_time2) pre_2_value,  
    LAG(f_value2, 1, 0) OVER(ORDER BY f_time2) pre_1_value,
    f_value2 AS f_value,
    LEAD(f_value2, 1, 0) OVER(ORDER BY f_time2) next_1_value,  
    LEAD(f_value2, 2, 0) OVER(ORDER BY f_time2) next_2_value
  FROM (
    SELECT 
      t_n.f_time_n AS f_time2,
      NVL(t_log_test.f_value, t_n.f_value) AS f_value2
    FROM t_n LEFT JOIN t_log_test ON t_n.f_time_n = t_log_test.f_time
    ORDER BY F_TIME
  ) t3
) t4
WHERE (pre_2_value >= 100 AND pre_2_value >= 100 AND f_value >=100) OR
      (pre_1_value >= 100 AND f_value >= 100 AND next_1_value >=100) OR 
      (f_value >= 100 AND next_1_value >= 100 AND next_2_value >=100)
0

感谢各路英雄,特奉上python写法
imput.txt
id date visits
1 2017-07-01 100
2 2017-07-02 109
3 2017-07-03 150
4 2017-07-04 99
5 2017-07-05 145
6 2017-07-06 1455
7 2017-07-07 19
8 2017-07-08 188

import re
f=open("input.txt","r")
line=f.readline()
lines=f.readlines()
print ('date')
i=0
while i<len(lines):
    count=0
    while (i+count)<len(lines):
        line=re.split("\\s+",lines[i+count].strip())
        if int(line[2])>=100:
            count=count+1
        else:
            break
    if count>2:
        for j in range(count):
            print (re.split("\\s+",lines[i+j].strip())[1])
        i=i+count
    else:
        i=i+1
f.close()

撰写答案

推广链接