MariaK commited on
Commit
641bde8
·
verified ·
1 Parent(s): 97c9cf7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+
5
+ # Load source_connectors and destination_connectors from JSON files
6
+ with open('source_connectors.json', 'r') as f:
7
+ source_connectors = json.load(f)
8
+
9
+ with open('destination_connectors.json', 'r') as f:
10
+ destination_connectors = json.load(f)
11
+
12
+ def generate_documentation_link(source, destination):
13
+ return f"[{source['source_connector']} documentation]({source['docs']}) | [{destination['destination_connector']} documentation]({destination['docs']})"
14
+
15
+ def generate_code(source, destination, chunking, embedding):
16
+ source_connector = source_connectors[source]
17
+ destination_connector = destination_connectors[destination]
18
+
19
+ # Ensure proper indentation for source and destination configs
20
+ indented_source_configs = '\n'.join(
21
+ ' ' + line
22
+ for line in source_connector['configs'].strip().split('\n'))
23
+ indented_destination_configs = '\n'.join(
24
+ ' ' + line
25
+ for line in destination_connector['configs'].strip().split('\n'))
26
+
27
+ code = f'''
28
+ import os
29
+ from unstructured_ingest.v2.pipeline.pipeline import Pipeline
30
+ from unstructured_ingest.v2.interfaces import ProcessorConfig
31
+ from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
32
+ {source_connector['imports']}
33
+ {destination_connector['imports']}
34
+ from unstructured_ingest.v2.processes.chunker import ChunkerConfig
35
+ from unstructured_ingest.v2.processes.embedder import EmbedderConfig
36
+
37
+ if __name__ == "__main__":
38
+ Pipeline.from_configs(
39
+ context=ProcessorConfig(),
40
+ {indented_source_configs}
41
+ partitioner_config=PartitionerConfig(
42
+ partition_by_api=True,
43
+ api_key=os.getenv("UNSTRUCTURED_API_KEY"),
44
+ partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
45
+ strategy="hi_res",
46
+ ),
47
+ {'chunker_config=ChunkerConfig(chunking_strategy="by_title"),' if chunking else '# Chunking is disabled'}
48
+ {'embedder_config=EmbedderConfig(embedding_provider="' + embedding + '")' if embedding else '# Embedding is disabled'}
49
+ {indented_destination_configs}
50
+ ).run()
51
+ '''
52
+ doc_link = generate_documentation_link(source_connector, destination_connector)
53
+ return code, doc_link
54
+
55
+ demo = gr.Interface(
56
+ fn=generate_code,
57
+ inputs=[
58
+ gr.Dropdown(list(source_connectors.keys()),
59
+ label="Get unstructured documents from:"),
60
+ gr.Dropdown(list(destination_connectors.keys()),
61
+ label="Upload RAG-ready documents to:"),
62
+ gr.Checkbox(label="Check to enable chunking"),
63
+ gr.Dropdown(["langchain-openai", "langchain-huggingface"],
64
+ label="Embedding provider:")
65
+ ],
66
+ outputs=[
67
+ gr.Code(language="python", label="Generated Code"),
68
+ gr.Markdown(label="Documentation Links")
69
+ ],
70
+ title="Unstructured-Ingest Code Generator",
71
+ description="Generate code for the unstructured-ingest library based on your inputs.")
72
+
73
+
74
+ demo.launch()