mkaramb commited on
Commit
58e491e
1 Parent(s): b9d7b09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -1
app.py CHANGED
@@ -55,6 +55,53 @@ def unzip_and_find_jpgs(file_path):
55
  jpg_files.append(full_path)
56
  return jpg_files
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def process_images(uploaded_file):
59
  global results_df
60
  results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
@@ -75,11 +122,58 @@ def process_images(uploaded_file):
75
  "Translated Text": translated_text
76
  }])
77
  results_df = pd.concat([results_df, new_row], ignore_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  except Exception as e:
79
  return f"An error occurred: {str(e)}"
80
 
81
  return results_df.to_html()
82
 
 
 
 
 
 
 
83
  interface = gr.Interface(
84
  fn=process_images,
85
  inputs="file",
@@ -89,4 +183,4 @@ interface = gr.Interface(
89
  )
90
 
91
  if __name__ == "__main__":
92
- interface.launch(debug=True)
 
55
  jpg_files.append(full_path)
56
  return jpg_files
57
 
58
+ def get_random_pairs_list(shots, num_pairs=2):
59
+ keys = random.sample(list(shots.keys()), num_pairs)
60
+ return [(key, shots[key]) for key in keys]
61
+
62
+ def construct_prompt(input_text, random_pairs):
63
+ # Example setup based on your specified format
64
+ prompt = \
65
+ """
66
+ Follow the examples below. Your response should contain only JSON. If you
67
+ encounter two dates in an input, prefer the earliest. If the answer is not
68
+ exact, try your best, but do not use excess wording. If you are completely
69
+ unsure or there is no answer, insert UNKNOWN.
70
+
71
+ Input 1:
72
+ {random_pairs[0][0]}
73
+
74
+ Output 1:
75
+ {{"Collector":"{random_pairs[0][1]['Collector']}","Location":"{random_pairs[0][1]['Location']}","Taxon":"{random_pairs[0][1]['Taxon']}","Date":"{random_pairs[0][1]['Date']}"}}
76
+
77
+ Input 2:
78
+ {random_pairs[1][0]}
79
+
80
+ Output 2:
81
+ {{"Collector":"{random_pairs[1][1]['Collector']}","Location":"{random_pairs[1][1]['Location']}","Taxon":"{random_pairs[1][1]['Taxon']}","Date":"{random_pairs[1][1]['Date']}"}}
82
+
83
+ Input 3:
84
+ {input_text}
85
+ Output 3:
86
+ """
87
+ return prompt
88
+
89
+ def process_responses(responses):
90
+ structured_responses = []
91
+ for response in responses:
92
+ try:
93
+ # Assuming response is a string of JSON data
94
+ parsed_json = json.loads(response.text)
95
+ structured_responses.append(parsed_json)
96
+ except json.JSONDecodeError:
97
+ structured_responses.append({
98
+ "Collector": "UNKNOWN",
99
+ "Location": "UNKNOWN",
100
+ "Taxon": "UNKNOWN",
101
+ "Date": "UNKNOWN"
102
+ })
103
+ return structured_responses
104
+
105
  def process_images(uploaded_file):
106
  global results_df
107
  results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
 
122
  "Translated Text": translated_text
123
  }])
124
  results_df = pd.concat([results_df, new_row], ignore_index=True)
125
+
126
+ # Configure the generative AI model
127
+ genai.configure(api_key='AIzaSyB9iHlqAgz5TEF36Kg_fJLJvoIDCJkqwJI')
128
+ model = genai.GenerativeModel('gemini-pro')
129
+
130
+ # Prepare data for few-shot learning
131
+ shots = \
132
+ {
133
+ "Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6663 NCIL 14 September 2017 N° 2581259 TIBET PE CHINESE NATIONAL HERBARIUM (PE) 02334125 #PE6663 COMPOSITAE Aster albescens (DC.) Hand.-Mazz. A: it (Guo-Jin ZHANG) 01 April 2018"\
134
+ :{"Collector":"Guo-Jin, Zhang",
135
+ "Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
136
+ "Taxon":"Aster albescens (DC.) Hand.-Mazz., Compositae (Asteraceae) family",
137
+ "Date":"14 September 2017"
138
+ },
139
+
140
+ "PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang Spiral Leaf Green 17 May 2018"
141
+ :{"Collector":"UNKNOWN",
142
+ "Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
143
+ "Taxon":"Spiral Leaf Green",
144
+ "Date":"17 May 2018"
145
+ },
146
+
147
+ "Honey Plants Research Institute of the Chinese Academy of Agricultural Sciences Collection No.: 13687. May 7, 1993 Habitat Roadside Altitude: 1600 * Characters Shrub No. Herbarium of the Institute of Botany, Chinese Academy of Sciences Collector 3687 Scientific Name Height: m (cm) Diameter at breast height m (cm) Flower: White Fruit: Notes Blooming period: from January to July Honey: Scientific Name: Rosa Sericea Lindl. Appendix: Collector: cm 1 2 3 4 25 CHINESE NATIONAL HERBARUM ( 01833954 No 1479566 * Herbarium of the Institute of Botany, Chinese Academy of Sciences Sichuan SZECHUAN DET. Rosa sercea Lindl. var. Various Zhi 2009-02-16"
148
+ :{"Collector":"UNKNOWN",
149
+ "Location":"Sichuan, China",
150
+ "Taxon":"Rosa sericea Lindl., with possible variant identification as 'var. Various Zhi'",
151
+ "Date":"7 May 1993",
152
+ },
153
+ }
154
+
155
+ responses = []
156
+ for input_text in results_df["Translated Text"]:
157
+ random_pairs = get_random_pairs_list(shots)
158
+ prompt = construct_prompt(input_text, random_pairs)
159
+ response = model.generate_content(prompt)
160
+ responses.append(response)
161
+
162
+ # Processing responses
163
+ json_responses = process_responses(responses)
164
+ results_df = pd.concat([results_df, pd.DataFrame(json_responses)], axis=1)
165
+
166
  except Exception as e:
167
  return f"An error occurred: {str(e)}"
168
 
169
  return results_df.to_html()
170
 
171
+ css = """
172
+ body { font-family: Arial, sans-serif; }
173
+ .input-container { width: 95%; margin: auto; }
174
+ .output-container { width: 95%; margin: auto; }
175
+ """
176
+
177
  interface = gr.Interface(
178
  fn=process_images,
179
  inputs="file",
 
183
  )
184
 
185
  if __name__ == "__main__":
186
+ interface.launch(share=True)