Ver código fonte

fix 新增爬取行政规划的接口

tjf 3 anos atrás
pai
commit
a6f8d7a86a

+ 7 - 0
boman-web-core/pom.xml

@@ -13,6 +13,13 @@
 
     <dependencies>
 
+        <!-- JSOUP 解析爬取-->
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.12.1</version>
+        </dependency>
+
         <!-- SpringCloud Ailibaba Nacos -->
         <dependency>
             <groupId>com.alibaba.cloud</groupId>

+ 241 - 0
boman-web-core/src/main/java/com/boman/web/core/controller/SysRegionController.java

@@ -0,0 +1,241 @@
+package com.boman.web.core.controller;
+
+import com.boman.common.core.utils.StringUtils;
+import com.boman.web.core.domain.SysRegion;
+import com.boman.web.core.mapper.SysRegionMapper;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * 生成中国行政区域划分数据
+ *
+ * @author tjf
+ * @Date: 2022/01/05/14:45
+ */
+@RestController
+@RequestMapping("/SysRegion")
+public class SysRegionController {
+    // 固定写法:国家统计局的首页链接
+    private static final String link = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/";
+    // 保存结果集的集合
+    private static List<SysRegion> regions = new ArrayList<>();
+
+    @Autowired
+    private SysRegionMapper sysRegionMapper;
+
+    @PostMapping("/insert")
+    public void insertSysRegion() throws IOException {
+
+        System.out.println("开始爬取");
+        // 爬取省份
+        Document document = null;
+        document = Jsoup.connect(link).get();
+        /*
+        <tr class="provincetr">
+            <td><a href="11.html">北京市<br></a></td>
+        </tr>
+        */
+        Elements provincetrAll = document.select(".provincetr");
+        for (Element provincetr : provincetrAll) {
+            for (Element td : provincetr.children()) {
+                Elements a = td.select("a");
+                String href = a.attr("href");     // 11.html
+                String name = a.text();           // 浙江省
+
+                Long id = 13L;
+                if ("福建省".equals(name)) {
+                    SysRegion region = new SysRegion();
+                    region.setId(id);
+                    region.setPid(0L);
+                    region.setName(name);
+                    region.setTreeLevel(0);
+                    region.setLeaf(1);
+                    region.setSort(0L);
+                    regions.add(region);
+                    System.out.println("开始爬取" + name);
+                    getShi(href, id);
+                    System.out.println(name + "爬取结束");
+                    //内循环结束,可以插入数据
+                    if (regions.size() > 0) {
+                        for (SysRegion region1 : regions) {
+                            sysRegionMapper.insertSysRegion(region1);
+                            System.out.println("插入数据" + region1.toString());
+                        }
+                    }
+                }
+
+            }
+        }
+        System.out.println("结束");
+    }
+
+    // 爬取市
+    private static void getShi(String shiHref, long i) throws IOException {
+
+        Document document = Jsoup.connect(link + shiHref).get();
+
+        /*
+        <tr class="citytr">
+             <td><a href="33/3309.html">330900000000</a></td>
+             <td><a href="33/3309.html">舟山市</a></td>
+        </tr>
+        */
+        Elements citytrAll = document.select(".citytr");
+        long sort = 0;
+        for (Element citytr : citytrAll) {
+            Element codetd = citytr.child(0);
+            Element nametd = citytr.child(1);
+
+            String href = codetd.select("a").attr("href");  // 33/3308.html
+            String code = codetd.select("a").text();        // 330800000000
+            String name = nametd.select("a").text();        // 衢州市
+            System.out.println("开始爬取" + name);
+
+            if ("福州市".equals(name) || "厦门市".equals(name)||"莆田市".equals(name)||"三明市".equals(name)||"泉州市".equals(name)) {
+                SysRegion region = new SysRegion();
+                region.setId(Long.valueOf(code));
+                region.setPid(i);
+                region.setName(name);
+                // 这里的排序写死,层级树设置为第一层。
+                // 因为我的业务逻辑只拿宁波市的乡镇信息。
+                // 如果你有其他需求,请在循环外部设置计数器。(例如:下面的区和街道逻辑)
+                region.setTreeLevel(1);
+                region.setSort(sort);
+                regions.add(region);
+                getQu(href, code);
+            }
+
+
+        }
+    }
+
+    // 爬取区
+    private static void getQu(String countryHref, String cityCode) throws IOException {
+        Document document = Jsoup.connect(link + countryHref).get();
+        /*
+        <tr class="countytr">
+             <td><a href="02/330203.html">330203000000</a></td>
+             <td><a href="02/330203.html">海曙区</a></td>
+        </tr>
+        */
+        Elements countryAll = document.select(".countytr");
+
+        // 这里的sort没有从0开始增加,是因为市下的第一级为市辖区,该级下无子级,用不着采集。所以直接跳过。
+        long sort = -1;
+
+        for (Element countrytr : countryAll) {
+            sort++;
+
+            if (sort > 0) {
+                Element codetd = countrytr.child(0);
+                Element nametd = countrytr.child(1);
+
+                String href = codetd.select("a").attr("href");  // 02/330281.html
+                String code = codetd.select("a").text();        // 330281000000
+                String name = nametd.select("a").text();        // 余姚市
+                System.out.println("开始爬取" + name);
+
+                SysRegion region = new SysRegion();
+                if (StringUtils.isNotBlank(code)){
+                    region.setId(Long.valueOf(code));
+                }else {
+                    region.setId(Long.valueOf( cityCode+ UUID.randomUUID()));
+                }
+                region.setPid(Long.valueOf(cityCode));
+                region.setName(name);
+                region.setTreeLevel(2);
+                region.setLeaf(1);
+                region.setSort(sort);
+                regions.add(region);
+                getJiedao(href, code);
+            }
+        }
+    }
+
+    // 爬取乡镇(街道)
+    private static void getJiedao(String jiedaoHref, String countryCode) throws IOException {
+
+        Document document = Jsoup.connect(link + countryCode.substring(0, 2) + "/" + jiedaoHref).get();
+
+        /*
+        <tr class="towntr">
+             <td><a href="12/330212001.html">330212001000</a></td>
+             <td><a href="12/330212001.html">下应街道</a></td>
+        </tr>
+         */
+        Elements townAll = document.select(".towntr");
+
+        long sort = 0;
+
+        for (Element towntr : townAll) {
+            Element codetd = towntr.child(0);
+            Element nametd = towntr.child(1);
+
+            String href = codetd.select("a").attr("href");  // 12/330212001.html
+            String code = codetd.select("a").text();        // 330212001000
+            String name = nametd.select("a").text();        // 下应街道
+            System.out.println("开始爬取" + name);
+
+            SysRegion region = new SysRegion();
+            region.setId(Long.valueOf(code));
+            region.setPid(Long.valueOf(countryCode));
+            region.setName(name);
+            region.setTreeLevel(3);
+            region.setLeaf(1);
+            sort++;
+            region.setSort(sort);
+            regions.add(region);
+
+            getShequ(href, code);
+
+        }
+
+    }
+
+    // 爬取村(社区)
+    private static void getShequ(String shequHref, String townCode) throws IOException {
+        String a = townCode.substring(0, 2);
+        String b = townCode.substring(2, 4);
+        Document document = Jsoup.connect(link + a + "/" + b + "/" + shequHref).get();
+         /*
+        <tr class="villagetr">
+             <td>330212001209</td>
+             <td>112</td>
+             <td>胜利村村委会</td>
+        </tr>
+         */
+        Elements villagetrAll = document.select(".villagetr");
+
+        long sort = 0;
+
+        for (Element villagetr : villagetrAll) {
+            Element codetd = villagetr.child(0);
+            Element nametd = villagetr.child(2);
+
+            String code = codetd.text();        // 330212001005
+            String name = nametd.text();        // 东兴社区居委会
+            System.out.println("开始爬取" + name);
+
+            SysRegion region = new SysRegion();
+            region.setId(Long.valueOf(code));
+            region.setPid(Long.valueOf(townCode));
+            region.setName(name);
+            region.setTreeLevel(4);
+            region.setLeaf(1);
+            sort++;
+            region.setSort(sort);
+            regions.add(region);
+        }
+    }
+}

+ 111 - 0
boman-web-core/src/main/java/com/boman/web/core/domain/SysRegion.java

@@ -0,0 +1,111 @@
+package com.boman.web.core.domain;
+
+import java.io.Serializable;
+
+/**
+ * @author tjf
+ * @Date: 2022/01/05/14:46
+ */
+public class SysRegion implements Serializable {
+    private static final long serialVersionUID=1L;
+
+    /**
+     * id
+     */
+
+    private Long id;
+
+    /**
+     * 上级ID,一级为0
+     */
+
+    private Long pid;
+
+    /**
+     * 名称
+     */
+
+    private String name;
+
+    /**
+     * 层级
+     */
+
+    private Integer treeLevel;
+
+    /**
+     * 是否叶子节点  0:否   1:是
+     */
+
+    private Integer leaf;
+
+    /**
+     * 排序
+     */
+
+    private Long sort;
+
+    public static long getSerialVersionUID() {
+        return serialVersionUID;
+    }
+
+    public Long getId() {
+        return id;
+    }
+
+    public void setId(Long id) {
+        this.id = id;
+    }
+
+    public Long getPid() {
+        return pid;
+    }
+
+    public void setPid(Long pid) {
+        this.pid = pid;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public Integer getTreeLevel() {
+        return treeLevel;
+    }
+
+    public void setTreeLevel(Integer treeLevel) {
+        this.treeLevel = treeLevel;
+    }
+
+    public Integer getLeaf() {
+        return leaf;
+    }
+
+    public void setLeaf(Integer leaf) {
+        this.leaf = leaf;
+    }
+
+    public Long getSort() {
+        return sort;
+    }
+
+    public void setSort(Long sort) {
+        this.sort = sort;
+    }
+
+    @Override
+    public String toString() {
+        return "SysRegion{" +
+                "id=" + id +
+                ", pid=" + pid +
+                ", name='" + name + '\'' +
+                ", treeLevel=" + treeLevel +
+                ", leaf=" + leaf +
+                ", sort=" + sort +
+                '}';
+    }
+}

+ 2 - 2
boman-web-core/src/main/java/com/boman/web/core/mapper/StandardlyMapper.java

@@ -241,8 +241,8 @@ public interface StandardlyMapper {
             "LEFT JOIN sys_dept d ON d.id = t.dept_id \n" +
             "<where>" +
             "1=1 " +
-            "<if test=\"name != null and name != ''\">and ( d.dept_name like concat('%', #{name}, '%') or t.attendance_table_username like concat('%', #{name}, '%'))</if>"+
-            "<if test='date!=null'>and DATE_FORMAT(t.create_time,'%Y-%m') = #{date} </if>"+
+            "<if SysRegion=\"name != null and name != ''\">and ( d.dept_name like concat('%', #{name}, '%') or t.attendance_table_username like concat('%', #{name}, '%'))</if>"+
+            "<if SysRegion='date!=null'>and DATE_FORMAT(t.create_time,'%Y-%m') = #{date} </if>"+
             "</where>" +
             "GROUP BY\n" +
             " d.dept_name,t.attendance_table_username,t.user_id,DATE_FORMAT( t.create_time, '%Y-%m' )\n" +

+ 19 - 0
boman-web-core/src/main/java/com/boman/web/core/mapper/SysRegionMapper.java

@@ -0,0 +1,19 @@
+package com.boman.web.core.mapper;
+
+import com.boman.domain.VaccineInfoUser;
+import com.boman.web.core.domain.SysRegion;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * @author tjf
+ * @Date: 2022/01/05/15:19
+ */
+@Mapper
+public interface SysRegionMapper {
+    /**
+     * 插入中国行政区域划分
+     * @param sysRegion
+     * @return
+     */
+    public int insertSysRegion(SysRegion sysRegion);
+}

+ 12 - 0
boman-web-core/src/main/resources/mapper/SysRegionMapper.xml

@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE mapper
+PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
+"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.boman.web.core.mapper.SysRegionMapper">
+
+    <insert id="insertSysRegion"  useGeneratedKeys="true"
+            keyProperty="id">
+        INSERT INTO  `qianshan_data_bureau`.`china_area`(`area_id`, `name`, `pid`, `sort`) VALUES (#{id}, #{name}, #{pid}, #{sort}) on duplicate key update sort = #{sort}
+    </insert>
+
+</mapper>

+ 1 - 1
boman-web-core/src/main/resources/mapper/VaccineInfoMapper.xml

@@ -312,7 +312,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
             <if test="region != null and region != ''">region = #{region},</if>
             <if test="userName != null">user_name = #{userName},</if>
             <if test="gender != null">gender = #{gender},</if>
-<!--            <if test="idCard != null">id_card = #{idCard},</if>-->
+<!--            <if SysRegion="idCard != null">id_card = #{idCard},</if>-->
             <if test="phoneNum != null">phone_num = #{phoneNum},</if>
             <if test="keyIndustries != null">key_industries = #{keyIndustries},</if>
             <if test="isVaccination != null">is_vaccination = #{isVaccination},</if>