graph LR A[用户上传文档/URL抓取] --> B[API服务接收请求] B --> C[文件存入MinIO + 元数据写入PostgreSQL] C --> D[Controller创建解析任务,推入Redis任务队列] D --> E[Worker节点获取任务,调用DeepDoc引擎] E --> F[文档深度解析与结构化] F --> G[智能语义分块] G --> H[调用嵌入模型,文本块向量化] H --> I[向量+原文+元数据写入向量/全文数据库] I --> J[索引构建完成,等待检索调用]```
#### 2\. 查询问答(用户对话)流程
```mermaid graph LR A[用户输入问题] --> B[API服务接收请求] B --> C[查询预处理与增强] C --> D[调用同一嵌入模型,问题向量化] D --> E[多路混合检索召回] E --> F[检索结果融合] F --> G[重排序Rerank精排] G --> H[Prompt工程与上下文组装] H --> I[调用LLM大模型执行推理生成] I --> J[答案校验与后处理] J --> K[返回最终答案给用户]```
```mermaid graph TD A[输入文档/PDF/图片] --> B[页面渲染与统一预处理] B --> C[视觉版面分析] C --> C1[基于YOLOv8+LayoutLM的区域定位] C --> C2[区域分类:标题/正文/表格/图片/公式] C --> C3[阅读顺序还原:多栏排版/跨页内容逻辑拼接] C --> D[分区域专项解析] D --> D1[文本区域:原生文本提取+OCR兜底] D --> D2[表格区域:Table-Transformer结构识别] D --> D3[公式区域:公式定位+LaTeX语法生成] D --> D4[图片区域:OCR提取+多模态图文描述生成] D --> E[结构化内容融合] E --> F[输出带层级结构、空间位置、语义关联的高保真文本]```
-- 1. 等值查询 SELECT*FROM sys_user WHERE id =1; SELECT*FROM sys_user WHERE username ='zhangsan';
-- 2. 范围查询 SELECT*FROM sys_user WHERE age BETWEEN20AND30; SELECT*FROM sys_user WHERE id IN (1,3,5);
-- 3. 模糊查询(%匹配任意字符,_匹配单个字符) SELECT*FROM sys_user WHERE username LIKE'zhang%'; -- 以zhang开头 SELECT*FROM sys_user WHERE phone LIKE'%8000'; -- 以8000结尾 SELECT*FROM sys_user WHERE username LIKE'_hang%'; -- 第二个字符是hang
-- 4. 空值判断 SELECT*FROM sys_user WHERE phone ISNULL; SELECT*FROM sys_user WHERE phone ISNOT NULL;
-- 5. 逻辑运算符(AND/OR/NOT) SELECT*FROM sys_user WHERE age >25AND gender ='男'AND balance >0; SELECT*FROM sys_user WHERE age <20OR age >35; SELECT*FROM sys_user WHERENOT gender ='未知';
-- 6. 比较运算符 SELECT*FROM sys_user WHERE age >=25; SELECT*FROM sys_user WHERE balance !=0;
4.4.3 排序与分页
1 2 3 4 5 6 7 8 9 10 11 12 13
-- 1. 排序(ASC升序,默认;DESC降序) -- 单字段排序 SELECT*FROM sys_user ORDERBY age DESC; -- 多字段排序 SELECT*FROM sys_user ORDERBY balance DESC, age ASC;
-- 1. 内连接 INNER JOIN:只返回两张表中匹配关联条件的数据 -- 查询有订单的用户及其订单信息 SELECT u.id, u.username, o.order_no, o.order_amount, o.order_status FROM sys_user u INNERJOIN sys_order o ON u.id = o.user_id;
-- 2. 左连接 LEFT JOIN:返回左表所有数据,右表匹配不到的显示NULL -- 查询所有用户,及其订单信息(无订单的用户也会显示) SELECT u.id, u.username, o.order_no, o.order_amount FROM sys_user u LEFTJOIN sys_order o ON u.id = o.user_id;
-- 3. 右连接 RIGHT JOIN:返回右表所有数据,左表匹配不到的显示NULL SELECT u.id, u.username, o.order_no, o.order_amount FROM sys_user u RIGHTJOIN sys_order o ON u.id = o.user_id;
-- 4. 多表联查 SELECT u.id, u.username, o.order_no, o.order_amount, p.product_name, p.price FROM sys_user u LEFTJOIN sys_order o ON u.id = o.user_id LEFTJOIN order_item i ON o.order_id = i.order_id LEFTJOIN product p ON i.product_id = p.id;
-- 1. 标量子查询(子查询返回单个值) -- 查询余额大于平均余额的用户 SELECT*FROM sys_user WHERE balance > (SELECTAVG(balance) FROM sys_user);
-- 2. 列子查询(子查询返回一列数据),搭配IN/ANY/ALL使用 -- 查询有下单记录的用户 SELECT*FROM sys_user WHERE id IN (SELECTDISTINCT user_id FROM sys_order);
-- 3. 行子查询(子查询返回一行数据) -- 查询和用户ID=1的年龄、性别都相同的用户 SELECT*FROM sys_user WHERE (age, gender) = (SELECT age, gender FROM sys_user WHERE id =1);
-- 4. 表子查询(子查询返回多行多列),作为临时表使用 -- 查询订单金额大于100的订单,及其用户信息 SELECT u.username, t.order_no, t.order_amount FROM sys_user u JOIN (SELECT*FROM sys_order WHERE order_amount >100) t ON u.id = t.user_id;
-- 5. EXISTS子查询(判断子查询是否有结果,有则返回true) -- 查询有订单的用户(比IN效率更高,大数据量推荐) SELECT*FROM sys_user u WHEREEXISTS (SELECT1FROM sys_order o WHERE o.user_id = u.id);
5.4 常用内置函数
5.4.1 字符串函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
-- 1. 字符串拼接 SELECT CONCAT(username, '-', phone) FROM sys_user; -- 带分隔符拼接 SELECT CONCAT_WS('-', username, gender, age) FROM sys_user;
-- 3. CASE WHEN 函数(多条件分支) SELECT username, age, CASE WHEN age <18THEN'未成年' WHEN age BETWEEN18AND30THEN'青年' WHEN age BETWEEN31AND60THEN'中年' ELSE'老年' ENDAS 年龄分段 FROM sys_user;
5.5 合并查询
1 2 3 4 5 6 7 8 9
-- UNION ALL:合并两个查询结果,不去重,性能高 SELECT id, username, phone FROM sys_user WHERE gender ='男' UNIONALL SELECT id, username, phone FROM sys_user WHERE age >30;
-- UNION:合并两个查询结果,去重,性能略低 SELECT id, username, phone FROM sys_user WHERE gender ='男' UNION SELECT id, username, phone FROM sys_user WHERE age >30;
-- 1. 创建视图 CREATEVIEW user_order_view AS SELECT u.id, u.username, u.phone, o.order_no, o.order_amount, o.create_time order_time FROM sys_user u LEFTJOIN sys_order o ON u.id = o.user_id;
-- 2. 查询视图(和查询普通表语法一致) SELECT*FROM user_order_view WHERE order_amount >100;
-- 3. 修改视图 CREATEOR REPLACE VIEW user_order_view AS SELECT u.id, u.username, o.order_no, o.order_amount, o.order_status FROM sys_user u LEFTJOIN sys_order o ON u.id = o.user_id;
-- 1. 创建存储过程(无参数) DELIMITER //-- 临时修改语句结束符为//,避免;提前结束 CREATEPROCEDURE get_user_count() BEGIN SELECTCOUNT(*) AS 用户总数 FROM sys_user; END// DELIMITER ; -- 恢复默认结束符;
-- 调用存储过程 CALL get_user_count();
-- 2. 创建带参数的存储过程(IN入参、OUT出参、INOUT入出参) DELIMITER // CREATEPROCEDURE get_user_by_id( IN in_user_id BIGINT, -- 入参:用户ID OUT out_username VARCHAR(50), -- 出参:用户名 INOUT inout_balance DECIMAL(10,2) -- 入出参:账户余额 ) BEGIN -- 查询用户名赋值给出参 SELECT username INTO out_username FROM sys_user WHERE id = in_user_id; -- 更新余额 UPDATE sys_user SET balance = balance + inout_balance WHERE id = in_user_id; -- 查询更新后的余额赋值给入出参 SELECT balance INTO inout_balance FROM sys_user WHERE id = in_user_id; END// DELIMITER ;
Author identity unknown *** Please tell me who you are. Run git config --global user.email "you@example.com" git config --global user.name "Your Name" to set your account's default identity.