|
@@ -0,0 +1,241 @@
|
|
|
|
+package com.boman.web.core.controller;
|
|
|
|
+
|
|
|
|
+import com.boman.common.core.utils.StringUtils;
|
|
|
|
+import com.boman.web.core.domain.SysRegion;
|
|
|
|
+import com.boman.web.core.mapper.SysRegionMapper;
|
|
|
|
+import org.jsoup.Jsoup;
|
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
|
+import org.jsoup.select.Elements;
|
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
+import org.springframework.web.bind.annotation.PostMapping;
|
|
|
|
+import org.springframework.web.bind.annotation.RequestMapping;
|
|
|
|
+import org.springframework.web.bind.annotation.RestController;
|
|
|
|
+
|
|
|
|
+import java.io.IOException;
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
+import java.util.List;
|
|
|
|
+import java.util.UUID;
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * 生成中国行政区域划分数据
|
|
|
|
+ *
|
|
|
|
+ * @author tjf
|
|
|
|
+ * @Date: 2022/01/05/14:45
|
|
|
|
+ */
|
|
|
|
+@RestController
|
|
|
|
+@RequestMapping("/SysRegion")
|
|
|
|
+public class SysRegionController {
|
|
|
|
+ // 固定写法:国家统计局的首页链接
|
|
|
|
+ private static final String link = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/";
|
|
|
|
+ // 保存结果集的集合
|
|
|
|
+ private static List<SysRegion> regions = new ArrayList<>();
|
|
|
|
+
|
|
|
|
+ @Autowired
|
|
|
|
+ private SysRegionMapper sysRegionMapper;
|
|
|
|
+
|
|
|
|
+ @PostMapping("/insert")
|
|
|
|
+ public void insertSysRegion() throws IOException {
|
|
|
|
+
|
|
|
|
+ System.out.println("开始爬取");
|
|
|
|
+ // 爬取省份
|
|
|
|
+ Document document = null;
|
|
|
|
+ document = Jsoup.connect(link).get();
|
|
|
|
+ /*
|
|
|
|
+ <tr class="provincetr">
|
|
|
|
+ <td><a href="11.html">北京市<br></a></td>
|
|
|
|
+ </tr>
|
|
|
|
+ */
|
|
|
|
+ Elements provincetrAll = document.select(".provincetr");
|
|
|
|
+ for (Element provincetr : provincetrAll) {
|
|
|
|
+ for (Element td : provincetr.children()) {
|
|
|
|
+ Elements a = td.select("a");
|
|
|
|
+ String href = a.attr("href"); // 11.html
|
|
|
|
+ String name = a.text(); // 浙江省
|
|
|
|
+
|
|
|
|
+ Long id = 13L;
|
|
|
|
+ if ("福建省".equals(name)) {
|
|
|
|
+ SysRegion region = new SysRegion();
|
|
|
|
+ region.setId(id);
|
|
|
|
+ region.setPid(0L);
|
|
|
|
+ region.setName(name);
|
|
|
|
+ region.setTreeLevel(0);
|
|
|
|
+ region.setLeaf(1);
|
|
|
|
+ region.setSort(0L);
|
|
|
|
+ regions.add(region);
|
|
|
|
+ System.out.println("开始爬取" + name);
|
|
|
|
+ getShi(href, id);
|
|
|
|
+ System.out.println(name + "爬取结束");
|
|
|
|
+ //内循环结束,可以插入数据
|
|
|
|
+ if (regions.size() > 0) {
|
|
|
|
+ for (SysRegion region1 : regions) {
|
|
|
|
+ sysRegionMapper.insertSysRegion(region1);
|
|
|
|
+ System.out.println("插入数据" + region1.toString());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ System.out.println("结束");
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 爬取市
|
|
|
|
+ private static void getShi(String shiHref, long i) throws IOException {
|
|
|
|
+
|
|
|
|
+ Document document = Jsoup.connect(link + shiHref).get();
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ <tr class="citytr">
|
|
|
|
+ <td><a href="33/3309.html">330900000000</a></td>
|
|
|
|
+ <td><a href="33/3309.html">舟山市</a></td>
|
|
|
|
+ </tr>
|
|
|
|
+ */
|
|
|
|
+ Elements citytrAll = document.select(".citytr");
|
|
|
|
+ long sort = 0;
|
|
|
|
+ for (Element citytr : citytrAll) {
|
|
|
|
+ Element codetd = citytr.child(0);
|
|
|
|
+ Element nametd = citytr.child(1);
|
|
|
|
+
|
|
|
|
+ String href = codetd.select("a").attr("href"); // 33/3308.html
|
|
|
|
+ String code = codetd.select("a").text(); // 330800000000
|
|
|
|
+ String name = nametd.select("a").text(); // 衢州市
|
|
|
|
+ System.out.println("开始爬取" + name);
|
|
|
|
+
|
|
|
|
+ if ("福州市".equals(name) || "厦门市".equals(name)||"莆田市".equals(name)||"三明市".equals(name)||"泉州市".equals(name)) {
|
|
|
|
+ SysRegion region = new SysRegion();
|
|
|
|
+ region.setId(Long.valueOf(code));
|
|
|
|
+ region.setPid(i);
|
|
|
|
+ region.setName(name);
|
|
|
|
+ // 这里的排序写死,层级树设置为第一层。
|
|
|
|
+ // 因为我的业务逻辑只拿宁波市的乡镇信息。
|
|
|
|
+ // 如果你有其他需求,请在循环外部设置计数器。(例如:下面的区和街道逻辑)
|
|
|
|
+ region.setTreeLevel(1);
|
|
|
|
+ region.setSort(sort);
|
|
|
|
+ regions.add(region);
|
|
|
|
+ getQu(href, code);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 爬取区
|
|
|
|
+ private static void getQu(String countryHref, String cityCode) throws IOException {
|
|
|
|
+ Document document = Jsoup.connect(link + countryHref).get();
|
|
|
|
+ /*
|
|
|
|
+ <tr class="countytr">
|
|
|
|
+ <td><a href="02/330203.html">330203000000</a></td>
|
|
|
|
+ <td><a href="02/330203.html">海曙区</a></td>
|
|
|
|
+ </tr>
|
|
|
|
+ */
|
|
|
|
+ Elements countryAll = document.select(".countytr");
|
|
|
|
+
|
|
|
|
+ // 这里的sort没有从0开始增加,是因为市下的第一级为市辖区,该级下无子级,用不着采集。所以直接跳过。
|
|
|
|
+ long sort = -1;
|
|
|
|
+
|
|
|
|
+ for (Element countrytr : countryAll) {
|
|
|
|
+ sort++;
|
|
|
|
+
|
|
|
|
+ if (sort > 0) {
|
|
|
|
+ Element codetd = countrytr.child(0);
|
|
|
|
+ Element nametd = countrytr.child(1);
|
|
|
|
+
|
|
|
|
+ String href = codetd.select("a").attr("href"); // 02/330281.html
|
|
|
|
+ String code = codetd.select("a").text(); // 330281000000
|
|
|
|
+ String name = nametd.select("a").text(); // 余姚市
|
|
|
|
+ System.out.println("开始爬取" + name);
|
|
|
|
+
|
|
|
|
+ SysRegion region = new SysRegion();
|
|
|
|
+ if (StringUtils.isNotBlank(code)){
|
|
|
|
+ region.setId(Long.valueOf(code));
|
|
|
|
+ }else {
|
|
|
|
+ region.setId(Long.valueOf( cityCode+ UUID.randomUUID()));
|
|
|
|
+ }
|
|
|
|
+ region.setPid(Long.valueOf(cityCode));
|
|
|
|
+ region.setName(name);
|
|
|
|
+ region.setTreeLevel(2);
|
|
|
|
+ region.setLeaf(1);
|
|
|
|
+ region.setSort(sort);
|
|
|
|
+ regions.add(region);
|
|
|
|
+ getJiedao(href, code);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 爬取乡镇(街道)
|
|
|
|
+ private static void getJiedao(String jiedaoHref, String countryCode) throws IOException {
|
|
|
|
+
|
|
|
|
+ Document document = Jsoup.connect(link + countryCode.substring(0, 2) + "/" + jiedaoHref).get();
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ <tr class="towntr">
|
|
|
|
+ <td><a href="12/330212001.html">330212001000</a></td>
|
|
|
|
+ <td><a href="12/330212001.html">下应街道</a></td>
|
|
|
|
+ </tr>
|
|
|
|
+ */
|
|
|
|
+ Elements townAll = document.select(".towntr");
|
|
|
|
+
|
|
|
|
+ long sort = 0;
|
|
|
|
+
|
|
|
|
+ for (Element towntr : townAll) {
|
|
|
|
+ Element codetd = towntr.child(0);
|
|
|
|
+ Element nametd = towntr.child(1);
|
|
|
|
+
|
|
|
|
+ String href = codetd.select("a").attr("href"); // 12/330212001.html
|
|
|
|
+ String code = codetd.select("a").text(); // 330212001000
|
|
|
|
+ String name = nametd.select("a").text(); // 下应街道
|
|
|
|
+ System.out.println("开始爬取" + name);
|
|
|
|
+
|
|
|
|
+ SysRegion region = new SysRegion();
|
|
|
|
+ region.setId(Long.valueOf(code));
|
|
|
|
+ region.setPid(Long.valueOf(countryCode));
|
|
|
|
+ region.setName(name);
|
|
|
|
+ region.setTreeLevel(3);
|
|
|
|
+ region.setLeaf(1);
|
|
|
|
+ sort++;
|
|
|
|
+ region.setSort(sort);
|
|
|
|
+ regions.add(region);
|
|
|
|
+
|
|
|
|
+ getShequ(href, code);
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 爬取村(社区)
|
|
|
|
+ private static void getShequ(String shequHref, String townCode) throws IOException {
|
|
|
|
+ String a = townCode.substring(0, 2);
|
|
|
|
+ String b = townCode.substring(2, 4);
|
|
|
|
+ Document document = Jsoup.connect(link + a + "/" + b + "/" + shequHref).get();
|
|
|
|
+ /*
|
|
|
|
+ <tr class="villagetr">
|
|
|
|
+ <td>330212001209</td>
|
|
|
|
+ <td>112</td>
|
|
|
|
+ <td>胜利村村委会</td>
|
|
|
|
+ </tr>
|
|
|
|
+ */
|
|
|
|
+ Elements villagetrAll = document.select(".villagetr");
|
|
|
|
+
|
|
|
|
+ long sort = 0;
|
|
|
|
+
|
|
|
|
+ for (Element villagetr : villagetrAll) {
|
|
|
|
+ Element codetd = villagetr.child(0);
|
|
|
|
+ Element nametd = villagetr.child(2);
|
|
|
|
+
|
|
|
|
+ String code = codetd.text(); // 330212001005
|
|
|
|
+ String name = nametd.text(); // 东兴社区居委会
|
|
|
|
+ System.out.println("开始爬取" + name);
|
|
|
|
+
|
|
|
|
+ SysRegion region = new SysRegion();
|
|
|
|
+ region.setId(Long.valueOf(code));
|
|
|
|
+ region.setPid(Long.valueOf(townCode));
|
|
|
|
+ region.setName(name);
|
|
|
|
+ region.setTreeLevel(4);
|
|
|
|
+ region.setLeaf(1);
|
|
|
|
+ sort++;
|
|
|
|
+ region.setSort(sort);
|
|
|
|
+ regions.add(region);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|